From 57df858a46f0a4cc104716e0ec88864e5c386ca4 Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Mon, 9 Feb 2026 17:36:19 -0600 Subject: mailbox: add API to query available TX queue slots Clients sometimes need to know whether the mailbox TX queue has room before posting a new message. Rather than exposing internal queue state through a struct field, provide a proper accessor function that returns the number of available slots for a given channel. This lets clients choose to back off when the queue is full instead of hitting the -ENOBUFS error path and the misleading "Try increasing MBOX_TX_QUEUE_LEN" warning. Tested-by: Tanmay Shah Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 23 +++++++++++++++++++++++ include/linux/mailbox_client.h | 1 + 2 files changed, 24 insertions(+) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 617ba505691d..354434cd1209 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -218,6 +218,29 @@ bool mbox_client_peek_data(struct mbox_chan *chan) } EXPORT_SYMBOL_GPL(mbox_client_peek_data); +/** + * mbox_chan_tx_slots_available - Query the number of available TX queue slots. + * @chan: Mailbox channel to query. + * + * Clients may call this to check how many messages can be queued via + * mbox_send_message() before the channel's TX queue is full. This helps + * clients avoid the -ENOBUFS error without needing to increase + * MBOX_TX_QUEUE_LEN. + * This can be called from atomic context. + * + * Return: Number of available slots in the channel's TX queue. + */ +unsigned int mbox_chan_tx_slots_available(struct mbox_chan *chan) +{ + unsigned int ret; + + guard(spinlock_irqsave)(&chan->lock); + ret = MBOX_TX_QUEUE_LEN - chan->msg_count; + + return ret; +} +EXPORT_SYMBOL_GPL(mbox_chan_tx_slots_available); + /** * mbox_send_message - For client to submit a message to be * sent to the remote. diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index c6eea9afb943..e5997120f45c 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -45,6 +45,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg); int mbox_flush(struct mbox_chan *chan, unsigned long timeout); void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */ bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */ +unsigned int mbox_chan_tx_slots_available(struct mbox_chan *chan); /* atomic */ void mbox_free_channel(struct mbox_chan *chan); /* may sleep */ #endif /* __MAILBOX_CLIENT_H */ -- cgit v1.2.3 From f9f0df23193a8afee3bfb5fc34970c93792d7163 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 10 Mar 2026 17:37:44 -0700 Subject: mailbox: rockchip: kzalloc + kcalloc to kzalloc Use a flexible array member to reduce allocations. Signed-off-by: Rosen Penev Signed-off-by: Jassi Brar --- drivers/mailbox/rockchip-mailbox.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/mailbox/rockchip-mailbox.c b/drivers/mailbox/rockchip-mailbox.c index 4d966cb2ed03..a1a7dee64356 100644 --- a/drivers/mailbox/rockchip-mailbox.c +++ b/drivers/mailbox/rockchip-mailbox.c @@ -46,7 +46,7 @@ struct rockchip_mbox { /* The maximum size of buf for each channel */ u32 buf_size; - struct rockchip_mbox_chan *chans; + struct rockchip_mbox_chan chans[]; }; static int rockchip_mbox_send_data(struct mbox_chan *chan, void *data) @@ -173,15 +173,10 @@ static int rockchip_mbox_probe(struct platform_device *pdev) drv_data = (const struct rockchip_mbox_data *) device_get_match_data(&pdev->dev); - mb = devm_kzalloc(&pdev->dev, sizeof(*mb), GFP_KERNEL); + mb = devm_kzalloc(&pdev->dev, struct_size(mb, chans, drv_data->num_chans), GFP_KERNEL); if (!mb) return -ENOMEM; - mb->chans = devm_kcalloc(&pdev->dev, drv_data->num_chans, - sizeof(*mb->chans), GFP_KERNEL); - if (!mb->chans) - return -ENOMEM; - mb->mbox.chans = devm_kcalloc(&pdev->dev, drv_data->num_chans, sizeof(*mb->mbox.chans), GFP_KERNEL); if (!mb->mbox.chans) -- cgit v1.2.3 From df1de2abf907ab4fef991eaddab1981c1a9354cf Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 10 Mar 2026 17:35:59 -0700 Subject: mailbox: hi6220: kzalloc + kcalloc to kzalloc Reduce allocations to a single one by using a flexible array member. Allows using __counted_by for extra runtime analysis. Signed-off-by: Rosen Penev Signed-off-by: Jassi Brar --- drivers/mailbox/hi6220-mailbox.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c index f77741ce42e7..69d15b6283e9 100644 --- a/drivers/mailbox/hi6220-mailbox.c +++ b/drivers/mailbox/hi6220-mailbox.c @@ -79,12 +79,12 @@ struct hi6220_mbox { /* region for mailbox */ void __iomem *base; - unsigned int chan_num; - struct hi6220_mbox_chan *mchan; - void *irq_map_chan[MBOX_CHAN_MAX]; struct mbox_chan *chan; struct mbox_controller controller; + + unsigned int chan_num; + struct hi6220_mbox_chan mchan[] __counted_by(chan_num); }; static void mbox_set_state(struct hi6220_mbox *mbox, @@ -267,16 +267,12 @@ static int hi6220_mbox_probe(struct platform_device *pdev) struct hi6220_mbox *mbox; int i, err; - mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL); + mbox = devm_kzalloc(dev, struct_size(mbox, mchan, MBOX_CHAN_MAX), GFP_KERNEL); if (!mbox) return -ENOMEM; - mbox->dev = dev; mbox->chan_num = MBOX_CHAN_MAX; - mbox->mchan = devm_kcalloc(dev, - mbox->chan_num, sizeof(*mbox->mchan), GFP_KERNEL); - if (!mbox->mchan) - return -ENOMEM; + mbox->dev = dev; mbox->chan = devm_kcalloc(dev, mbox->chan_num, sizeof(*mbox->chan), GFP_KERNEL); -- cgit v1.2.3 From 1e0ec9719f58d53da61adf830e81f4af892e4582 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 26 Feb 2026 00:33:24 +0800 Subject: mailbox: mtk-vcp-mailbox: Fix the return value in mtk_vcp_mbox_xlate() The return value of mtk_vcp_mbox_xlate() is checked by IS_ERR(), so return NULL is incorrect and could lead to a NULL pointer dereference. Fixes: b562abd95672 ("mailbox: mediatek: Add mtk-vcp-mailbox driver") Signed-off-by: Felix Gu Signed-off-by: Jassi Brar --- drivers/mailbox/mtk-vcp-mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/mtk-vcp-mailbox.c b/drivers/mailbox/mtk-vcp-mailbox.c index cedad575528f..1b291b8ea15a 100644 --- a/drivers/mailbox/mtk-vcp-mailbox.c +++ b/drivers/mailbox/mtk-vcp-mailbox.c @@ -50,7 +50,7 @@ static struct mbox_chan *mtk_vcp_mbox_xlate(struct mbox_controller *mbox, const struct of_phandle_args *sp) { if (sp->args_count) - return NULL; + return ERR_PTR(-EINVAL); return &mbox->chans[0]; } -- cgit v1.2.3 From d2591db9c8ef19fbb4d24ed15e0c6edfa6bc7917 Mon Sep 17 00:00:00 2001 From: Jason-JH Lin Date: Mon, 23 Mar 2026 17:07:11 +0800 Subject: mailbox: mtk-cmdq: Fix CURR and END addr for task insert case Fix CURR and END address calculation for inserting a cmdq task into the task list by using cmdq_reg_shift_addr() for proper address converting. This ensures both CURR and END addresses are set correctly when enabling the thread. Fixes: a195c7ccfb7a ("mailbox: mtk-cmdq: Refine DMA address handling for the command buffer") Signed-off-by: Jason-JH Lin Signed-off-by: Jassi Brar --- drivers/mailbox/mtk-cmdq-mailbox.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index d7c6b38888a3..547a10a8fad3 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -493,14 +493,14 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) if (curr_pa == end_pa - CMDQ_INST_SIZE || curr_pa == end_pa) { /* set to this task directly */ - writel(task->pa_base >> cmdq->pdata->shift, - thread->base + CMDQ_THR_CURR_ADDR); + gce_addr = cmdq_convert_gce_addr(task->pa_base, cmdq->pdata); + writel(gce_addr, thread->base + CMDQ_THR_CURR_ADDR); } else { cmdq_task_insert_into_thread(task); smp_mb(); /* modify jump before enable thread */ } - writel((task->pa_base + pkt->cmd_buf_size) >> cmdq->pdata->shift, - thread->base + CMDQ_THR_END_ADDR); + gce_addr = cmdq_convert_gce_addr(task->pa_base + pkt->cmd_buf_size, cmdq->pdata); + writel(gce_addr, thread->base + CMDQ_THR_END_ADDR); cmdq_thread_resume(thread); } list_move_tail(&task->list_entry, &thread->task_busy_list); -- cgit v1.2.3 From 8a19c5aa2f04c38926318d128f57f0c350bab4c6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 27 Mar 2026 16:12:46 +0100 Subject: mailbox: exynos: drop superfluous mbox setting per channel The core initializes the 'mbox' field exactly like this, so don't duplicate it in the driver. Signed-off-by: Wolfram Sang Reviewed-by: Tudor Ambarus Tested-by: Tudor Ambarus Signed-off-by: Jassi Brar --- drivers/mailbox/exynos-mailbox.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mailbox/exynos-mailbox.c b/drivers/mailbox/exynos-mailbox.c index 5f2d3b81c1db..d2355b128ba4 100644 --- a/drivers/mailbox/exynos-mailbox.c +++ b/drivers/mailbox/exynos-mailbox.c @@ -99,7 +99,6 @@ static int exynos_mbox_probe(struct platform_device *pdev) struct mbox_controller *mbox; struct mbox_chan *chans; struct clk *pclk; - int i; exynos_mbox = devm_kzalloc(dev, sizeof(*exynos_mbox), GFP_KERNEL); if (!exynos_mbox) @@ -129,9 +128,6 @@ static int exynos_mbox_probe(struct platform_device *pdev) mbox->ops = &exynos_mbox_chan_ops; mbox->of_xlate = exynos_mbox_of_xlate; - for (i = 0; i < EXYNOS_MBOX_CHAN_COUNT; i++) - chans[i].mbox = mbox; - exynos_mbox->mbox = mbox; platform_set_drvdata(pdev, exynos_mbox); -- cgit v1.2.3 From 9efbbf810ee3e50360daa83cb8e0cc5ab998cef3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 27 Mar 2026 16:11:44 +0100 Subject: mailbox: test: really ignore optional memory resources Memory resources are optional but if the resource is empty devm_platform_get_and_ioremap_resource() prints an error nonetheless. Refactor the code to check the resources locally first and process them only if they are present. The -EBUSY error message of ioremap_resource() is still kept because it is correct. The comment which explains that a plain ioremap() is tried as a workaround is turned into a info message. So, a user will be informed about it, too. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 3a28ab5c42e5..058c0fe4b9c2 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -355,11 +355,27 @@ mbox_test_request_channel(struct platform_device *pdev, const char *name) return channel; } +static void __iomem *mbox_test_ioremap(struct platform_device *pdev, unsigned int res_num) +{ + struct resource *res; + void __iomem *mmio; + + res = platform_get_resource(pdev, IORESOURCE_MEM, res_num); + if (!res) + return NULL; + + mmio = devm_ioremap_resource(&pdev->dev, res); + if (PTR_ERR(mmio) == -EBUSY) { + dev_info(&pdev->dev, "trying workaround with plain ioremap\n"); + return devm_ioremap(&pdev->dev, res->start, resource_size(res)); + } + + return IS_ERR(mmio) ? NULL : mmio; +} + static int mbox_test_probe(struct platform_device *pdev) { struct mbox_test_device *tdev; - struct resource *res; - resource_size_t size; int ret; tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL); @@ -367,23 +383,12 @@ static int mbox_test_probe(struct platform_device *pdev) return -ENOMEM; /* It's okay for MMIO to be NULL */ - tdev->tx_mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (PTR_ERR(tdev->tx_mmio) == -EBUSY) { - /* if reserved area in SRAM, try just ioremap */ - size = resource_size(res); - tdev->tx_mmio = devm_ioremap(&pdev->dev, res->start, size); - } else if (IS_ERR(tdev->tx_mmio)) { - tdev->tx_mmio = NULL; - } + tdev->tx_mmio = mbox_test_ioremap(pdev, 0); /* If specified, second reg entry is Rx MMIO */ - tdev->rx_mmio = devm_platform_get_and_ioremap_resource(pdev, 1, &res); - if (PTR_ERR(tdev->rx_mmio) == -EBUSY) { - size = resource_size(res); - tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size); - } else if (IS_ERR(tdev->rx_mmio)) { + tdev->rx_mmio = mbox_test_ioremap(pdev, 1); + if (!tdev->rx_mmio) tdev->rx_mmio = tdev->tx_mmio; - } tdev->tx_channel = mbox_test_request_channel(pdev, "tx"); tdev->rx_channel = mbox_test_request_channel(pdev, "rx"); -- cgit v1.2.3 From d81e6703b8f12bbb885967933d1730600bce02c7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 23 Feb 2026 13:21:33 +0100 Subject: mailbox: correct kdoc title for mbox_bind_client "Request" is wrong, there is a separate function for requesting. This functions binds, so describe this. Signed-off-by: Wolfram Sang Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 354434cd1209..03473ae41ed1 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -364,7 +364,7 @@ static int __mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl) } /** - * mbox_bind_client - Request a mailbox channel. + * mbox_bind_client - Bind client to a mailbox channel. * @chan: The mailbox channel to bind the client to. * @cl: Identity of the client requesting the channel. * -- cgit v1.2.3 From 89e5d7d616009e5fada5da081b1d79cdd59150ab Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 27 Mar 2026 16:10:21 +0100 Subject: mailbox: remove superfluous internal header Quite some controller drivers use the defines from the internal header already. This prevents controller drivers outside the mailbox directory. Move the defines to the public controller header to allow this again as the defines are not strictly internal anyhow. Signed-off-by: Wolfram Sang Reviewed-by: Sudeep Holla Reviewed-by: Daniel Baluta Signed-off-by: Jassi Brar --- drivers/mailbox/cix-mailbox.c | 2 -- drivers/mailbox/hi3660-mailbox.c | 2 -- drivers/mailbox/imx-mailbox.c | 2 -- drivers/mailbox/mailbox-sti.c | 2 -- drivers/mailbox/mailbox.c | 2 -- drivers/mailbox/mailbox.h | 12 ------------ drivers/mailbox/omap-mailbox.c | 2 -- drivers/mailbox/pcc.c | 2 -- drivers/mailbox/tegra-hsp.c | 2 -- include/linux/mailbox_controller.h | 5 +++++ 10 files changed, 5 insertions(+), 28 deletions(-) delete mode 100644 drivers/mailbox/mailbox.h diff --git a/drivers/mailbox/cix-mailbox.c b/drivers/mailbox/cix-mailbox.c index 443620e8ae37..864f98f21fc3 100644 --- a/drivers/mailbox/cix-mailbox.c +++ b/drivers/mailbox/cix-mailbox.c @@ -12,8 +12,6 @@ #include #include -#include "mailbox.h" - /* * The maximum transmission size is 32 words or 128 bytes. */ diff --git a/drivers/mailbox/hi3660-mailbox.c b/drivers/mailbox/hi3660-mailbox.c index 17c29e960fbf..9b727a2b54a5 100644 --- a/drivers/mailbox/hi3660-mailbox.c +++ b/drivers/mailbox/hi3660-mailbox.c @@ -15,8 +15,6 @@ #include #include -#include "mailbox.h" - #define MBOX_CHAN_MAX 32 #define MBOX_RX 0x0 diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 003f9236c35e..22331b579489 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -23,8 +23,6 @@ #include #include -#include "mailbox.h" - #define IMX_MU_CHANS 24 /* TX0/RX0/RXDB[0-3] */ #define IMX_MU_SCU_CHANS 6 diff --git a/drivers/mailbox/mailbox-sti.c b/drivers/mailbox/mailbox-sti.c index b4b5bdd503cf..b6c9ecbbc8ec 100644 --- a/drivers/mailbox/mailbox-sti.c +++ b/drivers/mailbox/mailbox-sti.c @@ -21,8 +21,6 @@ #include #include -#include "mailbox.h" - #define STI_MBOX_INST_MAX 4 /* RAM saving: Max supported instances */ #define STI_MBOX_CHAN_MAX 20 /* RAM saving: Max supported channels */ diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 03473ae41ed1..13de3d047853 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -18,8 +18,6 @@ #include #include -#include "mailbox.h" - static LIST_HEAD(mbox_cons); static DEFINE_MUTEX(con_mutex); diff --git a/drivers/mailbox/mailbox.h b/drivers/mailbox/mailbox.h deleted file mode 100644 index e1ec4efab693..000000000000 --- a/drivers/mailbox/mailbox.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef __MAILBOX_H -#define __MAILBOX_H - -#include - -#define TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */ -#define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */ -#define TXDONE_BY_ACK BIT(2) /* S/W ACK received by Client ticks the TX */ - -#endif /* __MAILBOX_H */ diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c index d9f100c18895..5772c6b9886a 100644 --- a/drivers/mailbox/omap-mailbox.c +++ b/drivers/mailbox/omap-mailbox.c @@ -22,8 +22,6 @@ #include #include -#include "mailbox.h" - #define MAILBOX_REVISION 0x000 #define MAILBOX_MESSAGE(m) (0x040 + 4 * (m)) #define MAILBOX_FIFOSTATUS(m) (0x080 + 4 * (m)) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 22e70af1ae5d..636879ae1db7 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -59,8 +59,6 @@ #include #include -#include "mailbox.h" - #define MBOX_IRQ_NAME "pcc-mbox" /** diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c index ed9a0bb2bcd8..2231050bb5a9 100644 --- a/drivers/mailbox/tegra-hsp.c +++ b/drivers/mailbox/tegra-hsp.c @@ -16,8 +16,6 @@ #include -#include "mailbox.h" - #define HSP_INT_IE(x) (0x100 + ((x) * 4)) #define HSP_INT_IV 0x300 #define HSP_INT_IR 0x304 diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 80a427c7ca29..16fef421c30c 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -3,6 +3,7 @@ #ifndef __MAILBOX_CONTROLLER_H #define __MAILBOX_CONTROLLER_H +#include #include #include #include @@ -11,6 +12,10 @@ struct mbox_chan; +#define TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */ +#define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */ +#define TXDONE_BY_ACK BIT(2) /* S/W ACK received by Client ticks the TX */ + /** * struct mbox_chan_ops - methods to control mailbox channels * @send_data: The API asks the MBOX controller driver, in atomic -- cgit v1.2.3 From c58e9456e30c7098cbcd9f04571992be8a2e4e63 Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Fri, 27 Mar 2026 17:00:40 -0500 Subject: mailbox: Fix NULL message support in mbox_send_message() The active_req field serves double duty as both the "is a TX in flight" flag (NULL means idle) and the storage for the in-flight message pointer. When a client sends NULL via mbox_send_message(), active_req is set to NULL, which the framework misinterprets as "no active request". This breaks the TX state machine by: - tx_tick() short-circuits on (!mssg), skipping the tx_done callback and the tx_complete completion - txdone_hrtimer() skips the channel entirely since active_req is NULL, so poll-based TX-done detection never fires. Fix this by introducing a MBOX_NO_MSG sentinel value that means "no active request," freeing NULL to be valid message data. The sentinel is defined in the subsystem-internal mailbox.h so that controller drivers within drivers/mailbox/ can reference it, but it is not exposed to clients outside the subsystem. Fifteen in-tree callers send NULL (doorbell-style IPCs on Qualcomm, Tegra, TI, Xilinx, i.MX, SCMI, and PCC platforms). All were audited for regression: - Most already work around the bug via knows_txdone=true with a manual mbox_client_txdone() call, making the framework's tracking irrelevant. These are unaffected. - Poll-based callers (Xilinx zynqmp/r5) are strictly better off: the poll timer now correctly detects NULL-active channels instead of silently skipping them. - irq-qcom-mpm.c was a pre-existing bug -- the only Qualcomm caller that omitted the knows_txdone + mbox_client_txdone() pattern. Fixed in a companion commit ("irqchip/qcom-mpm: Fix missing mailbox TX done acknowledgment"). - No caller sets both a tx_done callback and sends NULL, nor combines tx_block=true with NULL sends, so the newly reachable callback/completion paths are never exercised. Also update tegra-hsp's flush callback, which directly inspects active_req to wait for the channel to drain: the old "!= NULL" check becomes "!= MBOX_NO_MSG", otherwise flush spins until timeout since the sentinel is non-NULL. The only tradeoff is that 'MBOX_NO_MSG' can not be used as a message by clients. Reported-by: Joonwon Kang Reviewed-by: Douglas Anderson Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 15 ++++++++------- drivers/mailbox/tegra-hsp.c | 2 +- include/linux/mailbox_controller.h | 3 +++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 13de3d047853..138ffbcd4fde 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -50,7 +50,7 @@ static void msg_submit(struct mbox_chan *chan) int err = -EBUSY; scoped_guard(spinlock_irqsave, &chan->lock) { - if (!chan->msg_count || chan->active_req) + if (!chan->msg_count || chan->active_req != MBOX_NO_MSG) break; count = chan->msg_count; @@ -85,13 +85,13 @@ static void tx_tick(struct mbox_chan *chan, int r) scoped_guard(spinlock_irqsave, &chan->lock) { mssg = chan->active_req; - chan->active_req = NULL; + chan->active_req = MBOX_NO_MSG; } /* Submit next message */ msg_submit(chan); - if (!mssg) + if (mssg == MBOX_NO_MSG) return; /* Notify the client */ @@ -112,7 +112,7 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer) for (i = 0; i < mbox->num_chans; i++) { struct mbox_chan *chan = &mbox->chans[i]; - if (chan->active_req && chan->cl) { + if (chan->active_req != MBOX_NO_MSG && chan->cl) { txdone = chan->mbox->ops->last_tx_done(chan); if (txdone) tx_tick(chan, 0); @@ -267,7 +267,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg) { int t; - if (!chan || !chan->cl) + if (!chan || !chan->cl || mssg == MBOX_NO_MSG) return -EINVAL; t = add_to_rbuf(chan, mssg); @@ -340,7 +340,7 @@ static int __mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl) scoped_guard(spinlock_irqsave, &chan->lock) { chan->msg_free = 0; chan->msg_count = 0; - chan->active_req = NULL; + chan->active_req = MBOX_NO_MSG; chan->cl = cl; init_completion(&chan->tx_complete); @@ -498,7 +498,7 @@ void mbox_free_channel(struct mbox_chan *chan) /* The queued TX requests are simply aborted, no callbacks are made */ scoped_guard(spinlock_irqsave, &chan->lock) { chan->cl = NULL; - chan->active_req = NULL; + chan->active_req = MBOX_NO_MSG; if (chan->txdone_method == TXDONE_BY_ACK) chan->txdone_method = TXDONE_BY_POLL; } @@ -553,6 +553,7 @@ int mbox_controller_register(struct mbox_controller *mbox) chan->cl = NULL; chan->mbox = mbox; + chan->active_req = MBOX_NO_MSG; chan->txdone_method = txdone; spin_lock_init(&chan->lock); } diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c index 2231050bb5a9..7b1e1b83ea29 100644 --- a/drivers/mailbox/tegra-hsp.c +++ b/drivers/mailbox/tegra-hsp.c @@ -495,7 +495,7 @@ static int tegra_hsp_mailbox_flush(struct mbox_chan *chan, mbox_chan_txdone(chan, 0); /* Wait until channel is empty */ - if (chan->active_req != NULL) + if (chan->active_req != MBOX_NO_MSG) continue; return 0; diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 16fef421c30c..e3896b08f22e 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -12,6 +12,9 @@ struct mbox_chan; +/* Sentinel value distinguishing "no active request" from "NULL message data" */ +#define MBOX_NO_MSG ((void *)-1) + #define TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */ #define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */ #define TXDONE_BY_ACK BIT(2) /* S/W ACK received by Client ticks the TX */ -- cgit v1.2.3 From 80784b427970219ebc338a6fb4118cde67a6c317 Mon Sep 17 00:00:00 2001 From: Dylan Wu Date: Mon, 9 Feb 2026 16:34:52 +0800 Subject: mailbox: cix: Add IRQF_NO_SUSPEND to mailbox interrupt During the system suspend process, device interrupts are masked in the noirq phase. However, SCMI often needs to exchange final messages with the firmware to complete the power-down transition. Without the IRQF_NO_SUSPEND flag, the mailbox ISR cannot run during this late stage, leading to SCMI communication timeouts and error messages like "SCMI protocol wait for resp timeout" during suspend. Add the IRQF_NO_SUSPEND flag to the interrupt request to ensure the mailbox can continue to handle responses during the noirq stages of suspend and resume, thereby ensuring a reliable power state transition. Signed-off-by: Dylan Wu Signed-off-by: Jassi Brar --- drivers/mailbox/cix-mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/cix-mailbox.c b/drivers/mailbox/cix-mailbox.c index 864f98f21fc3..8cfaa91b75bd 100644 --- a/drivers/mailbox/cix-mailbox.c +++ b/drivers/mailbox/cix-mailbox.c @@ -403,7 +403,7 @@ static int cix_mbox_startup(struct mbox_chan *chan) int index = cp->index, ret; u32 val; - ret = request_irq(priv->irq, cix_mbox_isr, 0, + ret = request_irq(priv->irq, cix_mbox_isr, IRQF_NO_SUSPEND, dev_name(priv->dev), chan); if (ret) { dev_err(priv->dev, "Unable to acquire IRQ %d\n", priv->irq); -- cgit v1.2.3 From 220045247712ddfda1fcedfa61e91dae24e63bcf Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Fri, 27 Mar 2026 14:36:00 +0200 Subject: dt-bindings: mailbox: qcom-ipcc: Document the Eliza Inter-Processor Communication Controller Document the Inter-Processor Communication Controller (IPCC) found in the Qualcomm Eliza SoC. It is used to route interrupts across various subsystems. Signed-off-by: Abel Vesa Acked-by: Manivannan Sadhasivam Reviewed-by: Konrad Dybcio Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index 7c4d6170491d..f5c584cf2146 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -24,6 +24,7 @@ properties: compatible: items: - enum: + - qcom,eliza-ipcc - qcom,glymur-ipcc - qcom,kaanapali-ipcc - qcom,milos-ipcc -- cgit v1.2.3 From 4606467a75cfc16721937272ed29462a750b60c8 Mon Sep 17 00:00:00 2001 From: Shivam Kumar Date: Wed, 18 Mar 2026 18:56:58 -0400 Subject: nvmet-tcp: check INIT_FAILED before nvmet_req_uninit in digest error path In nvmet_tcp_try_recv_ddgst(), when a data digest mismatch is detected, nvmet_req_uninit() is called unconditionally. However, if the command arrived via the nvmet_tcp_handle_req_failure() path, nvmet_req_init() had returned false and percpu_ref_tryget_live() was never executed. The unconditional percpu_ref_put() inside nvmet_req_uninit() then causes a refcount underflow, leading to a WARNING in percpu_ref_switch_to_atomic_rcu, a use-after-free diagnostic, and eventually a permanent workqueue deadlock. Check cmd->flags & NVMET_TCP_F_INIT_FAILED before calling nvmet_req_uninit(), matching the existing pattern in nvmet_tcp_execute_request(). Reviewed-by: Christoph Hellwig Signed-off-by: Shivam Kumar Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 4b8b02341ddc..69e971b179ae 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1310,7 +1310,8 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - nvmet_req_uninit(&cmd->req); + if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) + nvmet_req_uninit(&cmd->req); nvmet_tcp_free_cmd_buffers(cmd); nvmet_tcp_fatal_error(queue); ret = -EPROTO; -- cgit v1.2.3 From 723277b15ed97185ce6f75abbf19f06e00f0a6f5 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 31 Mar 2026 16:17:31 +0800 Subject: nvme: add missing MODULE_ALIAS for fabrics transports The generic fabrics layer uses request_module("nvme-%s", opts->transport) to auto-load transport modules. Currently, the nvme-tcp, nvme-rdma, and nvme-fc modules lack MODULE_ALIAS entries for these names, which prevents the kernel from automatically finding and loading them when requested. Reviewed-by: Christoph Hellwig Signed-off-by: Geliang Tang Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 1 + drivers/nvme/host/rdma.c | 1 + drivers/nvme/host/tcp.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e1bb4707183c..e4f4528fe2a2 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3968,3 +3968,4 @@ module_exit(nvme_fc_exit_module); MODULE_DESCRIPTION("NVMe host FC transport driver"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("nvme-fc"); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 57111139e84f..1ec6e867aedb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2432,3 +2432,4 @@ module_exit(nvme_rdma_cleanup_module); MODULE_DESCRIPTION("NVMe host RDMA transport driver"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("nvme-rdma"); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 243dab830dc8..02c95c32b07e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -3071,3 +3071,4 @@ module_exit(nvme_tcp_cleanup_module); MODULE_DESCRIPTION("NVMe host TCP transport driver"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("nvme-tcp"); -- cgit v1.2.3 From 0a5a94648627a438067fc8a6dd178187ceb112fb Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Wed, 8 Apr 2026 09:02:26 +0000 Subject: nvmet: introduce new mdts configuration entry Using this port configuration, one will be able to set the Maximum Data Transfer Size (MDTS) for any controller that will be associated to the configured port. The default value remains 0 (no limit). Signed-off-by: Max Gurtovoy Signed-off-by: Aurelien Aptel Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 8 ++------ drivers/nvme/target/configfs.c | 27 +++++++++++++++++++++++++++ drivers/nvme/target/core.c | 8 ++++++++ drivers/nvme/target/nvmet.h | 13 +++++++++++++ drivers/nvme/target/zns.c | 6 +----- 5 files changed, 51 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3794ef258556..3842087209da 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -687,12 +687,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL | NVME_CTRL_CMIC_ANA; - /* Limit MDTS according to transport capability */ - if (ctrl->ops->get_mdts) - id->mdts = ctrl->ops->get_mdts(ctrl); - else - id->mdts = 0; - + /* Limit MDTS according to port config or transport capability */ + id->mdts = nvmet_ctrl_mdts(req); id->cntlid = cpu_to_le16(ctrl->cntlid); id->ver = cpu_to_le32(ctrl->subsys->ver); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 463348c7f097..b88f897f06e2 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -301,6 +301,31 @@ static ssize_t nvmet_param_max_queue_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_max_queue_size); +static ssize_t nvmet_param_mdts_show(struct config_item *item, char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->mdts); +} + +static ssize_t nvmet_param_mdts_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + int ret; + + if (nvmet_is_port_enabled(port, __func__)) + return -EACCES; + ret = kstrtoint(page, 0, &port->mdts); + if (ret) { + pr_err("Invalid value '%s' for mdts\n", page); + return -EINVAL; + } + return count; +} + +CONFIGFS_ATTR(nvmet_, param_mdts); + #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_param_pi_enable_show(struct config_item *item, char *page) @@ -1995,6 +2020,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_tsas, &nvmet_attr_param_inline_data_size, &nvmet_attr_param_max_queue_size, + &nvmet_attr_param_mdts, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_attr_param_pi_enable, #endif @@ -2053,6 +2079,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, INIT_LIST_HEAD(&port->referrals); port->inline_data_size = -1; /* < 0 == let the transport choose */ port->max_queue_size = -1; /* < 0 == let the transport choose */ + port->mdts = -1; /* < 0 == let the transport choose */ port->disc_addr.trtype = NVMF_TRTYPE_MAX; port->disc_addr.portid = cpu_to_le16(portid); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 03cc7d5f9683..33db6c5534e2 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -368,6 +368,14 @@ int nvmet_enable_port(struct nvmet_port *port) NVMET_MIN_QUEUE_SIZE, NVMET_MAX_QUEUE_SIZE); + /* + * If the transport didn't set the mdts properly, then clamp it to the + * target limits. Also set default values in case the transport didn't + * set it at all. + */ + if (port->mdts < 0 || port->mdts > NVMET_MAX_MDTS) + port->mdts = 0; + port->enabled = true; port->tr_ops = ops; return 0; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 5db8f0d6e3f2..64af6bf569bf 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -214,6 +214,7 @@ struct nvmet_port { bool enabled; int inline_data_size; int max_queue_size; + int mdts; const struct nvmet_fabrics_ops *tr_ops; bool pi_enable; }; @@ -672,6 +673,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, #define NVMET_MAX_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 #define NVMET_MAX_CMD(ctrl) (NVME_CAP_MQES(ctrl->cap) + 1) +#define NVMET_MAX_MDTS 255 /* * Nice round number that makes a list of nsids fit into a page. @@ -760,6 +762,17 @@ static inline bool nvmet_is_pci_ctrl(struct nvmet_ctrl *ctrl) return ctrl->port->disc_addr.trtype == NVMF_TRTYPE_PCI; } +/* Limit MDTS according to port config or transport capability */ +static inline u8 nvmet_ctrl_mdts(struct nvmet_req *req) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + u8 mdts = req->port->mdts; + + if (!ctrl->ops->get_mdts) + return mdts; + return min_not_zero(ctrl->ops->get_mdts(ctrl), mdts); +} + #ifdef CONFIG_NVME_TARGET_PASSTHRU void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys); int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index aeaf73b54c3a..f00921931eb6 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -69,7 +69,6 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req) { u8 zasl = req->sq->ctrl->subsys->zasl; - struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl_zns *id; u16 status; @@ -79,10 +78,7 @@ void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req) goto out; } - if (ctrl->ops->get_mdts) - id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl); - else - id->zasl = zasl; + id->zasl = min_not_zero(nvmet_ctrl_mdts(req), zasl); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); -- cgit v1.2.3 From 23528aa3320a74b028e990b5a939fed32a8afc2f Mon Sep 17 00:00:00 2001 From: Shivaji Kant Date: Mon, 6 Apr 2026 09:21:32 +0000 Subject: nvme: enable PCI P2PDMA support for RDMA transport Enable BLK_FEAT_PCI_P2PDMA on the NVMe when the underlying RDMA controller supports it. Suggested-by: Pranjal Shrivastava Reviewed-by: Pranjal Shrivastava Reviewed-by: Henrique Carvalho Reviewed-by: Christoph Hellwig Signed-off-by: Shivaji Kant Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1ec6e867aedb..f77c960f7632 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2189,6 +2189,13 @@ out_fail: nvme_rdma_reconnect_or_remove(ctrl, ret); } +static bool nvme_rdma_supports_pci_p2pdma(struct nvme_ctrl *ctrl) +{ + struct nvme_rdma_ctrl *r_ctrl = to_rdma_ctrl(ctrl); + + return ib_dma_pci_p2p_dma_supported(r_ctrl->device->dev); +} + static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, @@ -2203,6 +2210,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .get_address = nvmf_get_address, .stop_ctrl = nvme_rdma_stop_ctrl, .get_virt_boundary = nvme_get_virt_boundary, + .supports_pci_p2pdma = nvme_rdma_supports_pci_p2pdma, }; /* -- cgit v1.2.3 From ea8e356acb165cb1fd75537a52e1f66e5e76c538 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 16 Mar 2026 15:39:35 +0100 Subject: nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers Currently, when nvmet_tcp_build_pdu_iovec() detects an out-of-bounds PDU length or offset, it triggers nvmet_tcp_fatal_error(cmd->queue) and returns early. However, because the function returns void, the callers are entirely unaware that a fatal error has occurred and that the cmd->recv_msg.msg_iter was left uninitialized. Callers such as nvmet_tcp_handle_h2c_data_pdu() proceed to blindly overwrite the queue state with queue->rcv_state = NVMET_TCP_RECV_DATA Consequently, the socket receiving loop may attempt to read incoming network data into the uninitialized iterator. Fix this by shifting the error handling responsibility to the callers. Fixes: 52a0a9854934 ("nvmet-tcp: add bounds checks in nvmet_tcp_build_pdu_iovec") Reviewed-by: Hannes Reinecke Reviewed-by: Yunje Shin Reviewed-by: Chaitanya Kulkarni Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 51 +++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 69e971b179ae..3ade734c8bcf 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -351,7 +351,7 @@ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue); -static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) +static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { struct bio_vec *iov = cmd->iov; struct scatterlist *sg; @@ -364,22 +364,19 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) offset = cmd->rbytes_done; cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; - if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt) { - nvmet_tcp_fatal_error(cmd->queue); - return; - } + if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt) + return -EPROTO; + sg = &cmd->req.sg[cmd->sg_idx]; sg_remaining = cmd->req.sg_cnt - cmd->sg_idx; while (length) { - if (!sg_remaining) { - nvmet_tcp_fatal_error(cmd->queue); - return; - } - if (!sg->length || sg->length <= sg_offset) { - nvmet_tcp_fatal_error(cmd->queue); - return; - } + if (!sg_remaining) + return -EPROTO; + + if (!sg->length || sg->length <= sg_offset) + return -EPROTO; + u32 iov_len = min_t(u32, length, sg->length - sg_offset); bvec_set_page(iov, sg_page(sg), iov_len, @@ -394,6 +391,7 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov, nr_pages, cmd->pdu_len); + return 0; } static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) @@ -931,7 +929,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) return 0; } -static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, +static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) { size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); @@ -947,19 +945,23 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, if (!nvme_is_write(cmd->req.cmd) || !data_len || data_len > cmd->req.port->inline_data_size) { nvmet_prepare_receive_pdu(queue); - return; + return 0; } ret = nvmet_tcp_map_data(cmd); if (unlikely(ret)) { pr_err("queue %d: failed to map data\n", queue->idx); nvmet_tcp_fatal_error(queue); - return; + return -EPROTO; } queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_build_pdu_iovec(cmd); cmd->flags |= NVMET_TCP_F_INIT_FAILED; + ret = nvmet_tcp_build_pdu_iovec(cmd); + if (unlikely(ret)) + pr_err("queue %d: failed to build PDU iovec\n", queue->idx); + + return ret; } static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) @@ -1011,7 +1013,10 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) goto err_proto; } cmd->pdu_recv = 0; - nvmet_tcp_build_pdu_iovec(cmd); + if (unlikely(nvmet_tcp_build_pdu_iovec(cmd))) { + pr_err("queue %d: failed to build PDU iovec\n", queue->idx); + goto err_proto; + } queue->cmd = cmd; queue->rcv_state = NVMET_TCP_RECV_DATA; @@ -1074,8 +1079,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) le32_to_cpu(req->cmd->common.dptr.sgl.length), le16_to_cpu(req->cqe->status)); - nvmet_tcp_handle_req_failure(queue, queue->cmd, req); - return 0; + return nvmet_tcp_handle_req_failure(queue, queue->cmd, req); } ret = nvmet_tcp_map_data(queue->cmd); @@ -1092,8 +1096,11 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(queue->cmd)) { if (nvmet_tcp_has_inline_data(queue->cmd)) { queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_build_pdu_iovec(queue->cmd); - return 0; + ret = nvmet_tcp_build_pdu_iovec(queue->cmd); + if (unlikely(ret)) + pr_err("queue %d: failed to build PDU iovec\n", + queue->idx); + return ret; } /* send back R2T */ nvmet_tcp_queue_response(&queue->cmd->req); -- cgit v1.2.3 From bad44c9c312f07b590ad7be892a95693baba976e Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 16 Mar 2026 15:39:36 +0100 Subject: nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error() Executing nvmet_tcp_fatal_error() is generally the responsibility of the caller (nvmet_tcp_try_recv); all other functions should just return the error code. Remove the nvmet_tcp_fatal_error() function, it's not needed anymore. Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 3ade734c8bcf..a4c3c62e33f5 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -349,8 +349,6 @@ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) cmd->req.sg = NULL; } -static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue); - static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { struct bio_vec *iov = cmd->iov; @@ -394,22 +392,13 @@ static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) return 0; } -static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) -{ - queue->rcv_state = NVMET_TCP_RECV_ERR; - if (queue->nvme_sq.ctrl) - nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); - else - kernel_sock_shutdown(queue->sock, SHUT_RDWR); -} - static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { queue->rcv_state = NVMET_TCP_RECV_ERR; - if (status == -EPIPE || status == -ECONNRESET) + if (status == -EPIPE || status == -ECONNRESET || !queue->nvme_sq.ctrl) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else - nvmet_tcp_fatal_error(queue); + nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); } static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) @@ -885,7 +874,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) { pr_err("bad nvme-tcp pdu length (%d)\n", le32_to_cpu(icreq->hdr.plen)); - nvmet_tcp_fatal_error(queue); return -EPROTO; } @@ -951,7 +939,6 @@ static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, ret = nvmet_tcp_map_data(cmd); if (unlikely(ret)) { pr_err("queue %d: failed to map data\n", queue->idx); - nvmet_tcp_fatal_error(queue); return -EPROTO; } @@ -1024,7 +1011,6 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) err_proto: /* FIXME: use proper transport errors */ - nvmet_tcp_fatal_error(queue); return -EPROTO; } @@ -1039,7 +1025,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (hdr->type != nvme_tcp_icreq) { pr_err("unexpected pdu type (%d) before icreq\n", hdr->type); - nvmet_tcp_fatal_error(queue); return -EPROTO; } return nvmet_tcp_handle_icreq(queue); @@ -1048,7 +1033,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (unlikely(hdr->type == nvme_tcp_icreq)) { pr_err("queue %d: received icreq pdu in state %d\n", queue->idx, queue->state); - nvmet_tcp_fatal_error(queue); return -EPROTO; } @@ -1065,7 +1049,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d", queue->idx, queue->nr_cmds, queue->send_list_len, nvme_cmd->common.opcode); - nvmet_tcp_fatal_error(queue); return -ENOMEM; } @@ -1086,9 +1069,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (unlikely(ret)) { pr_err("queue %d: failed to map data\n", queue->idx); if (nvmet_tcp_has_inline_data(queue->cmd)) - nvmet_tcp_fatal_error(queue); - else - nvmet_req_complete(req, ret); + return -EPROTO; + + nvmet_req_complete(req, ret); ret = -EAGAIN; goto out; } @@ -1211,7 +1194,6 @@ recv: if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) { pr_err("unexpected pdu type %d\n", hdr->type); - nvmet_tcp_fatal_error(queue); return -EIO; } @@ -1225,16 +1207,12 @@ recv: } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { - nvmet_tcp_fatal_error(queue); /* fatal */ + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) return -EPROTO; - } if (queue->data_digest && - nvmet_tcp_check_ddgst(queue, &queue->pdu)) { - nvmet_tcp_fatal_error(queue); /* fatal */ + nvmet_tcp_check_ddgst(queue, &queue->pdu)) return -EPROTO; - } return nvmet_tcp_done_recv_pdu(queue); } @@ -1320,7 +1298,6 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) nvmet_req_uninit(&cmd->req); nvmet_tcp_free_cmd_buffers(cmd); - nvmet_tcp_fatal_error(queue); ret = -EPROTO; goto out; } -- cgit v1.2.3 From 5293a8882c549fab4a878bc76b0b6c951f980a61 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 8 Apr 2026 00:51:31 -0700 Subject: nvmet-tcp: fix race between ICReq handling and queue teardown nvmet_tcp_handle_icreq() updates queue->state after sending an Initialization Connection Response (ICResp), but it does so without serializing against target-side queue teardown. If an NVMe/TCP host sends an Initialization Connection Request (ICReq) and immediately closes the connection, target-side teardown may start in softirq context before io_work drains the already buffered ICReq. In that case, nvmet_tcp_schedule_release_queue() sets queue->state to NVMET_TCP_Q_DISCONNECTING and drops the queue reference under state_lock. If io_work later processes that ICReq, nvmet_tcp_handle_icreq() can still overwrite the state back to NVMET_TCP_Q_LIVE. That defeats the DISCONNECTING-state guard in nvmet_tcp_schedule_release_queue() and allows a later socket state change to re-enter teardown and issue a second kref_put() on an already released queue. The ICResp send failure path has the same problem. If teardown has already moved the queue to DISCONNECTING, a send error can still overwrite the state with NVMET_TCP_Q_FAILED, again reopening the window for a second teardown path to drop the queue reference. Fix this by serializing both post-send state transitions with state_lock and bailing out if teardown has already started. Use -ESHUTDOWN as an internal sentinel for that bail-out path rather than propagating it as a transport error like -ECONNRESET. Keep nvmet_tcp_socket_error() setting rcv_state to NVMET_TCP_RECV_ERR before honoring that sentinel so receive-side parsing stays quiesced until the existing release path completes. Fixes: c46a6465bac2 ("nvmet-tcp: add NVMe over TCP target driver") Cc: stable@vger.kernel.org Reported-by: Shivam Kumar Tested-by: Shivam Kumar Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index a4c3c62e33f5..164a564ba3b4 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -394,6 +394,19 @@ static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { + /* + * Keep rcv_state at RECV_ERR even for the internal -ESHUTDOWN path. + * nvmet_tcp_handle_icreq() can return -ESHUTDOWN after the ICReq has + * already been consumed and queue teardown has started. + * + * If nvmet_tcp_data_ready() or nvmet_tcp_write_space() queues + * nvmet_tcp_io_work() again before nvmet_tcp_release_queue_work() + * cancels it, the queue must not keep that old receive state. + * Otherwise the next nvmet_tcp_io_work() run can reach + * nvmet_tcp_done_recv_pdu() and try to handle the same ICReq again. + * + * That is why queue->rcv_state needs to be updated before we return. + */ queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET || !queue->nvme_sq.ctrl) kernel_sock_shutdown(queue->sock, SHUT_RDWR); @@ -908,11 +921,24 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) { + spin_lock_bh(&queue->state_lock); + if (queue->state == NVMET_TCP_Q_DISCONNECTING) { + spin_unlock_bh(&queue->state_lock); + return -ESHUTDOWN; + } queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); return ret; /* queue removal will cleanup */ } + spin_lock_bh(&queue->state_lock); + if (queue->state == NVMET_TCP_Q_DISCONNECTING) { + spin_unlock_bh(&queue->state_lock); + /* Tell nvmet_tcp_socket_error() teardown is in progress. */ + return -ESHUTDOWN; + } queue->state = NVMET_TCP_Q_LIVE; + spin_unlock_bh(&queue->state_lock); nvmet_prepare_receive_pdu(queue); return 0; } -- cgit v1.2.3 From 7f991e3f9b8f044640bcb5fa8570350a68932843 Mon Sep 17 00:00:00 2001 From: Alan Cui Date: Thu, 9 Apr 2026 16:15:25 +0800 Subject: nvme: add quirk NVME_QUIRK_IGNORE_DEV_SUBNQN for 144d:a808 (Samsung PM981/983/970 EVO Plus ) The firmware for Samsung 970 Evo Plus / PM981 / PM983 does not support SUBNQN. Make quirks to suppress warnings. # nvme id-ctrl /dev/nvme1n1 NVME Identify Controller: vid : 0x144d ssvid : 0x144d sn : *** mn : Samsung SSD 970 EVO Plus 500GB fr : 2B2QEXM7 mcdqpc : 0 subnqn : ioccsz : 0 Signed-off-by: Alan Cui Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9aa19255b041..c693308c6dd6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -4102,6 +4102,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x144d, 0xa808), /* Samsung PM981/983 */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ -- cgit v1.2.3 From 7d435caacd91d23ebba281c4aac859196e1e2938 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 8 Apr 2026 08:03:57 +0000 Subject: nvme-multipath: drop head pointer check in nvme_mpath_clear_current_path() A NS will always have a head pointer, so drop the check. As proof in practice, all the nvme_mpath_clear_current_path() callers also dereference ns->head. This check has endured since the original changes to support multipath. Reviewed-by: Christoph Hellwig Signed-off-by: John Garry Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ba00f0b72b85..263161cb8ac0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -231,16 +231,12 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns) bool changed = false; int node; - if (!head) - goto out; - for_each_node(node) { if (ns == rcu_access_pointer(head->current_path[node])) { rcu_assign_pointer(head->current_path[node], NULL); changed = true; } } -out: return changed; } -- cgit v1.2.3 From 0bd75b7abafb3ed199df830c539c57ef9b62c2a2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 10 Apr 2026 14:49:12 +0200 Subject: mailbox: prefix new constants with MBOX_ Commit 89e5d7d61600 ("mailbox: remove superfluous internal header") moved some constants to a public header but forgot to add a mailbox specific prefix. Add this now to prevent future collisions on a too generic naming. Link: https://sashiko.dev/#/patchset/20260327151112.5202-2-wsa%2Brenesas%40sang-engineering.com Signed-off-by: Wolfram Sang Reviewed-by: Sudeep Holla Signed-off-by: Jassi Brar --- drivers/mailbox/cix-mailbox.c | 2 +- drivers/mailbox/imx-mailbox.c | 2 +- drivers/mailbox/mailbox.c | 22 +++++++++++----------- drivers/mailbox/mtk-cmdq-mailbox.c | 2 +- drivers/mailbox/omap-mailbox.c | 2 +- drivers/mailbox/tegra-hsp.c | 2 +- include/linux/mailbox_controller.h | 6 +++--- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/mailbox/cix-mailbox.c b/drivers/mailbox/cix-mailbox.c index 8cfaa91b75bd..43c76cdab24a 100644 --- a/drivers/mailbox/cix-mailbox.c +++ b/drivers/mailbox/cix-mailbox.c @@ -413,7 +413,7 @@ static int cix_mbox_startup(struct mbox_chan *chan) switch (cp->type) { case CIX_MBOX_TYPE_DB: /* Overwrite txdone_method for DB channel */ - chan->txdone_method = TXDONE_BY_ACK; + chan->txdone_method = MBOX_TXDONE_BY_ACK; fallthrough; case CIX_MBOX_TYPE_REG: if (priv->dir == CIX_MBOX_TX) { diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 22331b579489..246a9a9e3952 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -732,7 +732,7 @@ static struct mbox_chan * imx_mu_xlate(struct mbox_controller *mbox, p_chan = &mbox->chans[chan]; if (type == IMX_MU_TYPE_TXDB_V2) - p_chan->txdone_method = TXDONE_BY_ACK; + p_chan->txdone_method = MBOX_TXDONE_BY_ACK; return p_chan; } diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 138ffbcd4fde..30eafdf3a91e 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -72,7 +72,7 @@ static void msg_submit(struct mbox_chan *chan) } } - if (!err && (chan->txdone_method & TXDONE_BY_POLL)) { + if (!err && (chan->txdone_method & MBOX_TXDONE_BY_POLL)) { /* kick start the timer immediately to avoid delays */ scoped_guard(spinlock_irqsave, &chan->mbox->poll_hrt_lock) hrtimer_start(&chan->mbox->poll_hrt, 0, HRTIMER_MODE_REL); @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(mbox_chan_received_data); */ void mbox_chan_txdone(struct mbox_chan *chan, int r) { - if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) { + if (unlikely(!(chan->txdone_method & MBOX_TXDONE_BY_IRQ))) { dev_err(chan->mbox->dev, "Controller can't run the TX ticker\n"); return; @@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(mbox_chan_txdone); */ void mbox_client_txdone(struct mbox_chan *chan, int r) { - if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) { + if (unlikely(!(chan->txdone_method & MBOX_TXDONE_BY_ACK))) { dev_err(chan->mbox->dev, "Client can't run the TX ticker\n"); return; } @@ -344,8 +344,8 @@ static int __mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl) chan->cl = cl; init_completion(&chan->tx_complete); - if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) - chan->txdone_method = TXDONE_BY_ACK; + if (chan->txdone_method == MBOX_TXDONE_BY_POLL && cl->knows_txdone) + chan->txdone_method = MBOX_TXDONE_BY_ACK; } if (chan->mbox->ops->startup) { @@ -499,8 +499,8 @@ void mbox_free_channel(struct mbox_chan *chan) scoped_guard(spinlock_irqsave, &chan->lock) { chan->cl = NULL; chan->active_req = MBOX_NO_MSG; - if (chan->txdone_method == TXDONE_BY_ACK) - chan->txdone_method = TXDONE_BY_POLL; + if (chan->txdone_method == MBOX_TXDONE_BY_ACK) + chan->txdone_method = MBOX_TXDONE_BY_POLL; } module_put(chan->mbox->dev->driver->owner); @@ -531,13 +531,13 @@ int mbox_controller_register(struct mbox_controller *mbox) return -EINVAL; if (mbox->txdone_irq) - txdone = TXDONE_BY_IRQ; + txdone = MBOX_TXDONE_BY_IRQ; else if (mbox->txdone_poll) - txdone = TXDONE_BY_POLL; + txdone = MBOX_TXDONE_BY_POLL; else /* It has to be ACK then */ - txdone = TXDONE_BY_ACK; + txdone = MBOX_TXDONE_BY_ACK; - if (txdone == TXDONE_BY_POLL) { + if (txdone == MBOX_TXDONE_BY_POLL) { if (!mbox->ops->last_tx_done) { dev_err(mbox->dev, "last_tx_done method is absent\n"); diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 547a10a8fad3..e523c84b4808 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -728,7 +728,7 @@ static int cmdq_probe(struct platform_device *pdev) cmdq->mbox.ops = &cmdq_mbox_chan_ops; cmdq->mbox.of_xlate = cmdq_xlate; - /* make use of TXDONE_BY_ACK */ + /* make use of MBOX_TXDONE_BY_ACK */ cmdq->mbox.txdone_irq = false; cmdq->mbox.txdone_poll = false; diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c index 5772c6b9886a..535ca8020877 100644 --- a/drivers/mailbox/omap-mailbox.c +++ b/drivers/mailbox/omap-mailbox.c @@ -238,7 +238,7 @@ static int omap_mbox_startup(struct omap_mbox *mbox) } if (mbox->send_no_irq) - mbox->chan->txdone_method = TXDONE_BY_ACK; + mbox->chan->txdone_method = MBOX_TXDONE_BY_ACK; omap_mbox_enable_irq(mbox, IRQ_RX); diff --git a/drivers/mailbox/tegra-hsp.c b/drivers/mailbox/tegra-hsp.c index 7b1e1b83ea29..500fa77c7d53 100644 --- a/drivers/mailbox/tegra-hsp.c +++ b/drivers/mailbox/tegra-hsp.c @@ -514,7 +514,7 @@ static int tegra_hsp_mailbox_startup(struct mbox_chan *chan) struct tegra_hsp *hsp = mb->channel.hsp; unsigned long flags; - chan->txdone_method = TXDONE_BY_IRQ; + chan->txdone_method = MBOX_TXDONE_BY_IRQ; /* * Shared mailboxes start out as consumers by default. FULL and EMPTY diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index e3896b08f22e..a49ee687d4cf 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -15,9 +15,9 @@ struct mbox_chan; /* Sentinel value distinguishing "no active request" from "NULL message data" */ #define MBOX_NO_MSG ((void *)-1) -#define TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */ -#define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */ -#define TXDONE_BY_ACK BIT(2) /* S/W ACK received by Client ticks the TX */ +#define MBOX_TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */ +#define MBOX_TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */ +#define MBOX_TXDONE_BY_ACK BIT(2) /* S/W ACK received by Client ticks the TX */ /** * struct mbox_chan_ops - methods to control mailbox channels -- cgit v1.2.3 From c02053a9055d5fdfd32432287cca8958db1d5bc5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 10 Apr 2026 14:53:00 +0200 Subject: mailbox: mailbox-test: free channels on probe error On probe error, free the previously obtained channels. This not only prevents a leak, but also UAF scenarios because the client structure will be removed nonetheless because it was allocated with devm. Link: https://sashiko.dev/#/patchset/20260327151217.5327-2-wsa%2Brenesas%40sang-engineering.com Fixes: 8ea4484d0c2b ("mailbox: Add generic mechanism for testing Mailbox Controllers") Signed-off-by: Wolfram Sang Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 058c0fe4b9c2..5e68f708205c 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -409,18 +409,27 @@ static int mbox_test_probe(struct platform_device *pdev) if (tdev->rx_channel) { tdev->rx_buffer = devm_kzalloc(&pdev->dev, MBOX_MAX_MSG_LEN, GFP_KERNEL); - if (!tdev->rx_buffer) - return -ENOMEM; + if (!tdev->rx_buffer) { + ret = -ENOMEM; + goto err_free_chans; + } } ret = mbox_test_add_debugfs(pdev, tdev); if (ret) - return ret; + goto err_free_chans; init_waitqueue_head(&tdev->waitq); dev_info(&pdev->dev, "Successfully registered\n"); return 0; + +err_free_chans: + if (tdev->tx_channel) + mbox_free_channel(tdev->tx_channel); + if (tdev->rx_channel) + mbox_free_channel(tdev->rx_channel); + return ret; } static void mbox_test_remove(struct platform_device *pdev) -- cgit v1.2.3 From 772a896a56e0e3ef9424a025cec9176f9d8f4552 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 11 Apr 2026 14:06:00 +0200 Subject: scsi: target: configfs: Bound snprintf() return in tg_pt_gp_members_show() target_tg_pt_gp_members_show() formats LUN paths with snprintf() into a 256-byte stack buffer, then will memcpy() cur_len bytes from that buffer. snprintf() returns the length the output would have had, which can exceed the buffer size when the fabric WWN is long because iSCSI IQN names can be up to 223 bytes. The check at the memcpy() site only guards the destination page write, not the source read, so memcpy() will read past the stack buffer and copy adjacent stack contents to the sysfs reader, which when CONFIG_FORTIFY_SOURCE is enabled, fortify_panic() will be triggered. Commit 27e06650a5ea ("scsi: target: target_core_configfs: Add length check to avoid buffer overflow") added the same bound to the target_lu_gp_members_show() but the tg_pt_gp variant was missed so resolve that here. Cc: Martin K. Petersen Fixes: c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0-rc6") Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026041159-garter-theft-3be0@gregkh Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index d93773b3227c..2b19a956007b 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3249,7 +3249,7 @@ static ssize_t target_tg_pt_gp_members_show(struct config_item *item, config_item_name(&lun->lun_group.cg_item)); cur_len++; /* Extra byte for NULL terminator */ - if ((cur_len + len) > PAGE_SIZE) { + if (cur_len > TG_PT_GROUP_NAME_BUF || (cur_len + len) > PAGE_SIZE) { pr_warn("Ran out of lu_gp_show_attr" "_members buffer\n"); break; -- cgit v1.2.3 From 7746e3bd4cc19b5092e00d32d676e329bfcb6900 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 10 Apr 2026 16:49:47 +0200 Subject: fanotify: fix false positive on permission events fsnotify_get_mark_safe() may return false for a mark on an unrelated group, which results in bypassing the permission check. Fix by skipping over detached marks that are not in the current group. CC: stable@vger.kernel.org Fixes: abc77577a669 ("fsnotify: Provide framework for dropping SRCU lock in ->handle_event") Signed-off-by: Miklos Szeredi Link: https://patch.msgid.link/20260410144950.156160-1-mszeredi@redhat.com Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 2 +- fs/notify/mark.c | 18 +++++++++++------- include/linux/fsnotify_backend.h | 1 + 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9995de1710e5..b646a861a84c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -388,7 +388,7 @@ static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } -static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) +struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) { struct hlist_node *node = NULL; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index c2ed5b11b0fe..622f05977f86 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -457,9 +457,6 @@ EXPORT_SYMBOL_GPL(fsnotify_put_mark); */ static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - if (!mark) - return true; - if (refcount_inc_not_zero(&mark->refcnt)) { spin_lock(&mark->lock); if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { @@ -500,15 +497,22 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) int type; fsnotify_foreach_iter_type(type) { + struct fsnotify_mark *mark = iter_info->marks[type]; + /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->marks[type])) { - __release(&fsnotify_mark_srcu); - goto fail; + while (mark && !fsnotify_get_mark_safe(mark)) { + if (mark->group == iter_info->current_group) { + __release(&fsnotify_mark_srcu); + goto fail; + } + /* This is a mark in an unrelated group, skip */ + mark = fsnotify_next_mark(mark); + iter_info->marks[type] = mark; } } /* - * Now that both marks are pinned by refcount in the inode / vfsmount + * Now that all marks are pinned by refcount in the inode / vfsmount / etc * lists, we can drop SRCU lock, and safely resume the list iteration * once userspace returns. */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 95985400d3d8..e5cde39d6e85 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -915,6 +915,7 @@ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); +struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -- cgit v1.2.3 From aade8abd8b868b6ffa9697aadaea28ec7f65bee6 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 8 Apr 2026 17:56:47 -0700 Subject: nvmet: avoid recursive nvmet-wq flush in nvmet_ctrl_free nvmet_tcp_release_queue_work() runs on nvmet-wq and can drop the final controller reference through nvmet_cq_put(). If that triggers nvmet_ctrl_free(), the teardown path flushes ctrl->async_event_work on the same nvmet-wq. Call chain: nvmet_tcp_schedule_release_queue() kref_put(&queue->kref, nvmet_tcp_release_queue) nvmet_tcp_release_queue() queue_work(nvmet_wq, &queue->release_work) <--- nvmet_wq process_one_work() nvmet_tcp_release_queue_work() nvmet_cq_put(&queue->nvme_cq) nvmet_cq_destroy() nvmet_ctrl_put(cq->ctrl) nvmet_ctrl_free() flush_work(&ctrl->async_event_work) <--- nvmet_wq Previously Scheduled by :- nvmet_add_async_event queue_work(nvmet_wq, &ctrl->async_event_work); This trips lockdep with a possible recursive locking warning. [ 5223.015876] run blktests nvme/003 at 2026-04-07 20:53:55 [ 5223.061801] loop0: detected capacity change from 0 to 2097152 [ 5223.072206] nvmet: adding nsid 1 to subsystem blktests-subsystem-1 [ 5223.088368] nvmet_tcp: enabling port 0 (127.0.0.1:4420) [ 5223.126086] nvmet: Created discovery controller 1 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. [ 5223.128453] nvme nvme1: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 127.0.0.1:4420, hostnqn: nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349 [ 5233.199447] nvme nvme1: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" [ 5233.227718] ============================================ [ 5233.231283] WARNING: possible recursive locking detected [ 5233.234696] 7.0.0-rc3nvme+ #20 Tainted: G O N [ 5233.238434] -------------------------------------------- [ 5233.241852] kworker/u192:6/2413 is trying to acquire lock: [ 5233.245429] ffff888111632548 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x26/0x90 [ 5233.251438] but task is already holding lock: [ 5233.255254] ffff888111632548 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x5cc/0x6e0 [ 5233.261125] other info that might help us debug this: [ 5233.265333] Possible unsafe locking scenario: [ 5233.269217] CPU0 [ 5233.270795] ---- [ 5233.272436] lock((wq_completion)nvmet-wq); [ 5233.275241] lock((wq_completion)nvmet-wq); [ 5233.278020] *** DEADLOCK *** [ 5233.281793] May be due to missing lock nesting notation [ 5233.286195] 3 locks held by kworker/u192:6/2413: [ 5233.289192] #0: ffff888111632548 ((wq_completion)nvmet-wq){+.+.}-{0:0}, at: process_one_work+0x5cc/0x6e0 [ 5233.294569] #1: ffffc9000e2a7e40 ((work_completion)(&queue->release_work)){+.+.}-{0:0}, at: process_one_work+0x1c5/0x6e0 [ 5233.300128] #2: ffffffff82d7dc40 (rcu_read_lock){....}-{1:3}, at: __flush_work+0x62/0x530 [ 5233.304290] stack backtrace: [ 5233.306520] CPU: 4 UID: 0 PID: 2413 Comm: kworker/u192:6 Tainted: G O N 7.0.0-rc3nvme+ #20 PREEMPT(full) [ 5233.306524] Tainted: [O]=OOT_MODULE, [N]=TEST [ 5233.306525] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 [ 5233.306527] Workqueue: nvmet-wq nvmet_tcp_release_queue_work [nvmet_tcp] [ 5233.306532] Call Trace: [ 5233.306534] [ 5233.306536] dump_stack_lvl+0x73/0xb0 [ 5233.306552] print_deadlock_bug+0x225/0x2f0 [ 5233.306556] __lock_acquire+0x13f0/0x2290 [ 5233.306563] lock_acquire+0xd0/0x300 [ 5233.306565] ? touch_wq_lockdep_map+0x26/0x90 [ 5233.306571] ? __flush_work+0x20b/0x530 [ 5233.306573] ? touch_wq_lockdep_map+0x26/0x90 [ 5233.306577] touch_wq_lockdep_map+0x3b/0x90 [ 5233.306580] ? touch_wq_lockdep_map+0x26/0x90 [ 5233.306583] ? __flush_work+0x20b/0x530 [ 5233.306585] __flush_work+0x268/0x530 [ 5233.306588] ? __pfx_wq_barrier_func+0x10/0x10 [ 5233.306594] ? xen_error_entry+0x30/0x60 [ 5233.306600] nvmet_ctrl_free+0x140/0x310 [nvmet] [ 5233.306617] nvmet_cq_put+0x74/0x90 [nvmet] [ 5233.306629] nvmet_tcp_release_queue_work+0x19f/0x360 [nvmet_tcp] [ 5233.306634] process_one_work+0x206/0x6e0 [ 5233.306640] worker_thread+0x184/0x320 [ 5233.306643] ? __pfx_worker_thread+0x10/0x10 [ 5233.306646] kthread+0xf1/0x130 [ 5233.306648] ? __pfx_kthread+0x10/0x10 [ 5233.306651] ret_from_fork+0x355/0x450 [ 5233.306653] ? __pfx_kthread+0x10/0x10 [ 5233.306656] ret_from_fork_asm+0x1a/0x30 [ 5233.306664] There is also no need to flush async_event_work from controller teardown. The admin queue teardown already fails outstanding AER requests before the final controller put :- nvmet_sq_destroy(admin sq) nvmet_async_events_failall(ctrl) The controller has already been removed from the subsystem list before nvmet_ctrl_free() quiesces outstanding work. Replace flush_work() with cancel_work_sync() so a pending async_event_work item is canceled and a running instance is waited on without recursing into the same workqueue. Fixes: 06406d81a2d7 ("nvmet: cancel fatal error and flush async work before free controller") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 33db6c5534e2..a87567f40c91 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1749,7 +1749,7 @@ static void nvmet_ctrl_free(struct kref *ref) nvmet_stop_keep_alive_timer(ctrl); - flush_work(&ctrl->async_event_work); + cancel_work_sync(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); nvmet_destroy_auth(ctrl); -- cgit v1.2.3 From e80e39f25567310c1c7392eed886890b5c6788ba Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Wed, 8 Apr 2026 14:45:22 +0200 Subject: nvme-core: fix parameter name in comment In the declaration of the structure "core_quirks[]", in the comment referred to the devices "Kioxia CD6-V Series / HPE PE8030", the parameter "default_ps_max_latency_us" is reported in a wrong way: nvme_core.default_ps_max_latency=0 The correct form is, instead: nvme_core.default_ps_max_latency_us=0 Reviewed-by: Christoph Hellwig Signed-off-by: Flavio Suligoi Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b42d8768d297..0d623476e36f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3044,7 +3044,7 @@ static const struct nvme_core_quirk_entry core_quirks[] = { * * The device is left in a state where it is also not possible * to use "nvme set-feature" to disable APST, but booting with - * nvme_core.default_ps_max_latency=0 works. + * nvme_core.default_ps_max_latency_us=0 works. */ .vid = 0x1e0f, .mn = "KCD6XVUL6T40", -- cgit v1.2.3 From ba9d308ccd6732dd97ed8080d834a4a89e758e14 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 8 Apr 2026 17:18:14 +0300 Subject: nvme-apple: drop invalid put of admin queue reference count Commit 03b3bcd319b3 ("nvme: fix admin request_queue lifetime") moved the admin queue reference ->put call into nvme_free_ctrl() - a controller device release callback performed for every nvme driver doing nvme_init_ctrl(). nvme-apple sets refcount of the admin queue to 1 at allocation during the probe function and then puts it twice now: nvme_free_ctrl() blk_put_queue(ctrl->admin_q) // #1 ->free_ctrl() apple_nvme_free_ctrl() blk_put_queue(anv->ctrl.admin_q) // #2 Note that there is a commit 941f7298c70c ("nvme-apple: remove an extra queue reference") which intended to drop taking an extra admin queue reference. Looks like at that moment it accidentally fixed a refcount leak, which existed since the driver's introduction. There were two ->get calls at driver's probe function and a single ->put inside apple_nvme_free_ctrl(). However now after commit 03b3bcd319b3 ("nvme: fix admin request_queue lifetime") the refcount is imbalanced again. Fix it by removing extra ->put call from apple_nvme_free_ctrl(). anv->dev and ctrl->dev point to the same device, so use ctrl->dev directly for simplification. Compile tested only. Found by Linux Verification Center (linuxtesting.org). Fixes: 03b3bcd319b3 ("nvme: fix admin request_queue lifetime") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Fedor Pchelkin Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index ed61b97fde59..423c9c628e7b 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1267,11 +1267,7 @@ static int apple_nvme_get_address(struct nvme_ctrl *ctrl, char *buf, int size) static void apple_nvme_free_ctrl(struct nvme_ctrl *ctrl) { - struct apple_nvme *anv = ctrl_to_apple_nvme(ctrl); - - if (anv->ctrl.admin_q) - blk_put_queue(anv->ctrl.admin_q); - put_device(anv->dev); + put_device(ctrl->dev); } static const struct nvme_ctrl_ops nvme_ctrl_ops = { -- cgit v1.2.3 From 20925812de7bf5e6fdc133c691ef52b33f700fbc Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 8 Apr 2026 18:19:56 +0200 Subject: nvme: expose TLS mode It is not possible to determine the active TLS mode from the presence or absence of sysfs attributes like tls_key, tls_configured_key, or dhchap_secret. With the introduction of the concat mode and optional DH-CHAP authentication, different configurations can result in identical sysfs state. This makes user space detection unreliable. Expose the TLS mode explicitly to allow user space to unambiguously identify the active configuration and avoid fragile heuristics in nvme-cli. Reviewed-by: Chris Leech Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 7bf2e972126b..e59758616f27 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -883,10 +883,26 @@ static ssize_t tls_keyring_show(struct device *dev, } static DEVICE_ATTR_RO(tls_keyring); +static ssize_t tls_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + const char *mode; + + if (ctrl->opts->tls) + mode = "tls"; + else + mode = "concat"; + + return sysfs_emit(buf, "%s\n", mode); +} +static DEVICE_ATTR_RO(tls_mode); + static struct attribute *nvme_tls_attrs[] = { &dev_attr_tls_key.attr, &dev_attr_tls_configured_key.attr, &dev_attr_tls_keyring.attr, + &dev_attr_tls_mode.attr, NULL, }; @@ -908,6 +924,9 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, if (a == &dev_attr_tls_keyring.attr && !ctrl->opts->keyring) return 0; + if (a == &dev_attr_tls_mode.attr && + !ctrl->opts->tls && !ctrl->opts->concat) + return 0; return a->mode; } -- cgit v1.2.3 From 3f150f0f010f234f34a67897344f18e68fe803f7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 15 Apr 2026 15:53:58 +0000 Subject: nvme-multipath: put module reference when delayed removal work is canceled The delayed disk removal work is canceled when a NS (re)appears. However, we do not put the module reference grabbed in nvme_mpath_remove_disk(), so fix that. Reviewed-by: Christoph Hellwig Reviewed-by: Nilay Shroff Reviewed-by: Chaitanya Kulkarni Signed-off-by: John Garry Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d623476e36f..fead3b7cd4be 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4083,7 +4083,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) mutex_unlock(&ctrl->subsys->lock); #ifdef CONFIG_NVME_MULTIPATH - cancel_delayed_work(&head->remove_work); + if (cancel_delayed_work(&head->remove_work)) + module_put(THIS_MODULE); #endif return 0; -- cgit v1.2.3 From cf92d78a4aa2adbc2b1e687776aabe63c5b97f3f Mon Sep 17 00:00:00 2001 From: Tao Jiang Date: Thu, 16 Apr 2026 01:27:15 +0800 Subject: nvme-pci: add quirk for Memblaze Pblaze5 (0x1c5f:0x0555) The Memblaze Pblaze5 NVMe device (PCI ID 0x1c5f:0x0555) is detected as a controller on recent kernels (tested on 5.15.85 and 6.8.4), but no namespace is exposed. Tools like lsblk and fdisk do not report any block device. dmesg shows: nvme nvme0: missing or invalid SUBNQN field. The device works correctly on older kernels (e.g. 4.19), suggesting a compatibility issue with newer namespace handling. This indicates the device does not properly support the Namespace Descriptor List feature. Applying NVME_QUIRK_NO_NS_DESC_LIST allows the namespace to be discovered correctly. Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Tao Jiang Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c693308c6dd6..bcf68c198f74 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -4102,6 +4102,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c5f, 0x0555), /* Memblaze Pblaze5 adapter */ + .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x144d, 0xa808), /* Samsung PM981/983 */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ -- cgit v1.2.3 From bc0fcb9823cd0894934cf968b525c575833d7078 Mon Sep 17 00:00:00 2001 From: Yilin Zhu Date: Sun, 12 Apr 2026 13:07:54 +0800 Subject: ipv6: xfrm6: release dst on error in xfrm6_rcv_encap() xfrm6_rcv_encap() performs an IPv6 route lookup when the skb does not already have a dst attached. ip6_route_input_lookup() returns a referenced dst entry even when the lookup resolves to an error route. If dst->error is set, xfrm6_rcv_encap() drops the skb without attaching the dst to the skb and without releasing the reference returned by the lookup. Repeated packets hitting this path therefore leak dst entries. Release the dst before jumping to the drop path. Fixes: 0146dca70b87 ("xfrm: add support for UDPv6 encapsulation of ESP") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ruide Cao Signed-off-by: Yilin Zhu Signed-off-by: Ren Wei Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_protocol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index ea2f805d3b01..9b586fcec485 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -88,8 +88,10 @@ int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, skb, flags); - if (dst->error) + if (dst->error) { + dst_release(dst); goto drop; + } skb_dst_set(skb, dst); } -- cgit v1.2.3 From 16d990a15491cf76cd6eef0846e1b4100e63261a Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Wed, 15 Apr 2026 17:26:55 +0800 Subject: KVM: s390: pci: fix GAIT table indexing due to double-scaling pointer arithmetic kvm_s390_pci_aif_enable(), kvm_s390_pci_aif_disable(), and aen_host_forward() index the GAIT by manually multiplying the index with sizeof(struct zpci_gaite). Since aift->gait is already a struct zpci_gaite pointer, this double-scales the offset, accessing element aisb*16 instead of aisb. This causes out-of-bounds accesses when aisb >= 32 (with ZPCI_NR_DEVICES=512) Fix by removing the erroneous sizeof multiplication. Fixes: 3c5a1b6f0a18 ("KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding") Fixes: 73f91b004321 ("KVM: s390: pci: enable host forwarding of Adapter Event Notifications") Reported-by: Yuhao Jiang Cc: stable@vger.kernel.org Signed-off-by: Junrui Luo Reviewed-by: Christian Borntraeger Reviewed-by: Matthew Rosato Tested-by: Matthew Rosato Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 3 +-- arch/s390/kvm/pci.c | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7cb8ce833b62..f48f25c7dc8f 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3307,8 +3307,7 @@ static void aen_host_forward(unsigned long si) struct zpci_gaite *gaite; struct kvm *kvm; - gaite = (struct zpci_gaite *)aift->gait + - (si * sizeof(struct zpci_gaite)); + gaite = aift->gait + si; if (gaite->count == 0) return; if (gaite->aisb != 0) diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 86d93e8dddae..eed45af1a92d 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -290,8 +290,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, phys_to_virt(fib->fmt0.aibv)); spin_lock_irq(&aift->gait_lock); - gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * - sizeof(struct zpci_gaite)); + gaite = aift->gait + zdev->aisb; /* If assist not requested, host will get all alerts */ if (assist) @@ -357,8 +356,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) if (zdev->kzdev->fib.fmt0.aibv == 0) goto out; spin_lock_irq(&aift->gait_lock); - gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * - sizeof(struct zpci_gaite)); + gaite = aift->gait + zdev->aisb; isc = gaite->gisc; gaite->count--; if (gaite->count == 0) { -- cgit v1.2.3 From ccab51d69b1478b549ad0bbb38f556ab3bfb47ab Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Tue, 14 Apr 2026 11:02:31 +0100 Subject: KVM: arm64: Re-allow hyp tracing HVCs for [nh]VHE The introduction of __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM excluded hyp tracing HVCs from the common [nh]VHE/pKVM list. Re-allow them. Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20260414100231.1859687-1-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 16 ++++++++-------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 37414440cee7..11dcdf434971 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -72,6 +72,14 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, + __KVM_HOST_SMCCC_FUNC___tracing_load, + __KVM_HOST_SMCCC_FUNC___tracing_unload, + __KVM_HOST_SMCCC_FUNC___tracing_enable, + __KVM_HOST_SMCCC_FUNC___tracing_swap_reader, + __KVM_HOST_SMCCC_FUNC___tracing_update_clock, + __KVM_HOST_SMCCC_FUNC___tracing_reset, + __KVM_HOST_SMCCC_FUNC___tracing_enable_event, + __KVM_HOST_SMCCC_FUNC___tracing_write_event, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr, @@ -100,14 +108,6 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, - __KVM_HOST_SMCCC_FUNC___tracing_load, - __KVM_HOST_SMCCC_FUNC___tracing_unload, - __KVM_HOST_SMCCC_FUNC___tracing_enable, - __KVM_HOST_SMCCC_FUNC___tracing_swap_reader, - __KVM_HOST_SMCCC_FUNC___tracing_update_clock, - __KVM_HOST_SMCCC_FUNC___tracing_reset, - __KVM_HOST_SMCCC_FUNC___tracing_enable_event, - __KVM_HOST_SMCCC_FUNC___tracing_write_event, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 73f2e0221e70..8f7582d57ab5 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -709,6 +709,14 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__tracing_load), + HANDLE_FUNC(__tracing_unload), + HANDLE_FUNC(__tracing_enable), + HANDLE_FUNC(__tracing_swap_reader), + HANDLE_FUNC(__tracing_update_clock), + HANDLE_FUNC(__tracing_reset), + HANDLE_FUNC(__tracing_enable_event), + HANDLE_FUNC(__tracing_write_event), HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__vgic_v5_save_apr), @@ -735,14 +743,6 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_vcpu_load), HANDLE_FUNC(__pkvm_vcpu_put), HANDLE_FUNC(__pkvm_tlb_flush_vmid), - HANDLE_FUNC(__tracing_load), - HANDLE_FUNC(__tracing_unload), - HANDLE_FUNC(__tracing_enable), - HANDLE_FUNC(__tracing_swap_reader), - HANDLE_FUNC(__tracing_update_clock), - HANDLE_FUNC(__tracing_reset), - HANDLE_FUNC(__tracing_enable_event), - HANDLE_FUNC(__tracing_write_event), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) -- cgit v1.2.3 From a5b98009f16d8a5fb4a8ff9a193f5735515c38fa Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 14 Apr 2026 14:15:43 +0800 Subject: sched/psi: fix race between file release and pressure write A potential race condition exists between pressure write and cgroup file release regarding the priv member of struct kernfs_open_file, which triggers the uaf reported in [1]. Consider the following scenario involving execution on two separate CPUs: CPU0 CPU1 ==== ==== vfs_rmdir() kernfs_iop_rmdir() cgroup_rmdir() cgroup_kn_lock_live() cgroup_destroy_locked() cgroup_addrm_files() cgroup_rm_file() kernfs_remove_by_name() kernfs_remove_by_name_ns() vfs_write() __kernfs_remove() new_sync_write() kernfs_drain() kernfs_fop_write_iter() kernfs_drain_open_files() cgroup_file_write() kernfs_release_file() pressure_write() cgroup_file_release() ctx = of->priv; kfree(ctx); of->priv = NULL; cgroup_kn_unlock() cgroup_kn_lock_live() cgroup_get(cgrp) cgroup_kn_unlock() if (ctx->psi.trigger) // here, trigger uaf for ctx, that is of->priv The cgroup_rmdir() is protected by the cgroup_mutex, it also safeguards the memory deallocation of of->priv performed within cgroup_file_release(). However, the operations involving of->priv executed within pressure_write() are not entirely covered by the protection of cgroup_mutex. Consequently, if the code in pressure_write(), specifically the section handling the ctx variable executes after cgroup_file_release() has completed, a uaf vulnerability involving of->priv is triggered. Therefore, the issue can be resolved by extending the scope of the cgroup_mutex lock within pressure_write() to encompass all code paths involving of->priv, thereby properly synchronizing the race condition occurring between cgroup_file_release() and pressure_write(). And, if an live kn lock can be successfully acquired while executing the pressure write operation, it indicates that the cgroup deletion process has not yet reached its final stage; consequently, the priv pointer within open_file cannot be NULL. Therefore, the operation to retrieve the ctx value must be moved to a point *after* the live kn lock has been successfully acquired. In another situation, specifically after entering cgroup_kn_lock_live() but before acquiring cgroup_mutex, there exists a different class of race condition: CPU0: write memory.pressure CPU1: write cgroup.pressure=0 =========================== ============================= kernfs_fop_write_iter() kernfs_get_active_of(of) pressure_write() cgroup_kn_lock_live(memory.pressure) cgroup_tryget(cgrp) kernfs_break_active_protection(kn) ... blocks on cgroup_mutex cgroup_pressure_write() cgroup_kn_lock_live(cgroup.pressure) cgroup_file_show(memory.pressure, false) kernfs_show(false) kernfs_drain_open_files() cgroup_file_release(of) kfree(ctx) of->priv = NULL cgroup_kn_unlock() ... acquires cgroup_mutex ctx = of->priv; // may now be NULL if (ctx->psi.trigger) // NULL dereference Consequently, there is a possibility that of->priv is NULL, the pressure write needs to check for this. Now that the scope of the cgroup_mutex has been expanded, the original explicit cgroup_get/put operations are no longer necessary, this is because acquiring/releasing the live kn lock inherently executes a cgroup get/put operation. [1] BUG: KASAN: slab-use-after-free in pressure_write+0xa4/0x210 kernel/cgroup/cgroup.c:4011 Call Trace: pressure_write+0xa4/0x210 kernel/cgroup/cgroup.c:4011 cgroup_file_write+0x36f/0x790 kernel/cgroup/cgroup.c:4311 kernfs_fop_write_iter+0x3b0/0x540 fs/kernfs/file.c:352 Allocated by task 9352: cgroup_file_open+0x90/0x3a0 kernel/cgroup/cgroup.c:4256 kernfs_fop_open+0x9eb/0xcb0 fs/kernfs/file.c:724 do_dentry_open+0x83d/0x13e0 fs/open.c:949 Freed by task 9353: cgroup_file_release+0xd6/0x100 kernel/cgroup/cgroup.c:4283 kernfs_release_file fs/kernfs/file.c:764 [inline] kernfs_drain_open_files+0x392/0x720 fs/kernfs/file.c:834 kernfs_drain+0x470/0x600 fs/kernfs/dir.c:525 Fixes: 0e94682b73bf ("psi: introduce psi monitor") Reported-by: syzbot+33e571025d88efd1312c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=33e571025d88efd1312c Tested-by: syzbot+33e571025d88efd1312c@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Reviewed-by: Chen Ridong Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1f084ee71443..3243c2087ee3 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3934,33 +3934,41 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) static ssize_t pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { - struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_file_ctx *ctx; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; + ssize_t ret = 0; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; - cgroup_get(cgrp); - cgroup_kn_unlock(of->kn); + ctx = of->priv; + if (!ctx) { + ret = -ENODEV; + goto out_unlock; + } /* Allow only one trigger per file descriptor */ if (ctx->psi.trigger) { - cgroup_put(cgrp); - return -EBUSY; + ret = -EBUSY; + goto out_unlock; } psi = cgroup_psi(cgrp); new = psi_trigger_create(psi, buf, res, of->file, of); if (IS_ERR(new)) { - cgroup_put(cgrp); - return PTR_ERR(new); + ret = PTR_ERR(new); + goto out_unlock; } smp_store_release(&ctx->psi.trigger, new); - cgroup_put(cgrp); + +out_unlock: + cgroup_kn_unlock(of->kn); + if (ret) + return ret; return nbytes; } -- cgit v1.2.3 From c802f460dd485c1332b5a35e7adcfb2bc22536a2 Mon Sep 17 00:00:00 2001 From: cuitao Date: Tue, 14 Apr 2026 09:53:27 +0800 Subject: cgroup/rdma: fix integer overflow in rdmacg_try_charge() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The expression `rpool->resources[index].usage + 1` is computed in int arithmetic before being assigned to s64 variable `new`. When usage equals INT_MAX (the default "max" value), the addition overflows to INT_MIN. This negative value then passes the `new > max` check incorrectly, allowing a charge that should be rejected and corrupting usage to negative. Fix by casting usage to s64 before the addition so the arithmetic is done in 64-bit. Fixes: 39d3e7584a68 ("rdmacg: Added rdma cgroup controller") Signed-off-by: cuitao Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index 9967fb25c563..4fdab4cf49e0 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -283,7 +283,7 @@ int rdmacg_try_charge(struct rdma_cgroup **rdmacg, ret = PTR_ERR(rpool); goto err; } else { - new = rpool->resources[index].usage + 1; + new = (s64)rpool->resources[index].usage + 1; if (new > rpool->resources[index].max) { ret = -EAGAIN; goto err; -- cgit v1.2.3 From 41d701ddc36d5301b44ea79529f3cf03c541c1e1 Mon Sep 17 00:00:00 2001 From: Guopeng Zhang Date: Fri, 17 Apr 2026 11:37:41 +0800 Subject: cgroup/cpuset: record DL BW alloc CPU for attach rollback cpuset_can_attach() allocates DL bandwidth only when migrating deadline tasks to a disjoint CPU mask, but cpuset_cancel_attach() rolls back based only on nr_migrate_dl_tasks. This makes the DL bandwidth alloc/free paths asymmetric: rollback can call dl_bw_free() even when no dl_bw_alloc() was done. Rollback also needs to undo the reservation against the same CPU/root domain that was charged. Record the CPU used by dl_bw_alloc() and use that state in cpuset_cancel_attach(). If no allocation happened, dl_bw_cpu stays at -1 and rollback skips dl_bw_free(). If allocation did happen, bandwidth is returned to the same CPU/root domain. Successful attach paths are unchanged. This only fixes failed attach rollback accounting. Fixes: 2ef269ef1ac0 ("cgroup/cpuset: Free DL BW in case can_attach() fails") Signed-off-by: Guopeng Zhang Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset-internal.h | 5 +++++ kernel/cgroup/cpuset.c | 13 +++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h index fd7d19842ded..bb4e692bea30 100644 --- a/kernel/cgroup/cpuset-internal.h +++ b/kernel/cgroup/cpuset-internal.h @@ -168,6 +168,11 @@ struct cpuset { int nr_deadline_tasks; int nr_migrate_dl_tasks; u64 sum_migrate_dl_bw; + /* + * CPU used for temporary DL bandwidth allocation during attach; + * -1 if no DL bandwidth was allocated in the current attach. + */ + int dl_bw_cpu; /* Invalid partition error code, not lock protected */ enum prs_errcode prs_err; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 1335e437098e..e3a081a07c6d 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -288,6 +288,7 @@ struct cpuset top_cpuset = { .flags = BIT(CS_CPU_EXCLUSIVE) | BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE), .partition_root_state = PRS_ROOT, + .dl_bw_cpu = -1, }; /** @@ -579,6 +580,8 @@ static struct cpuset *dup_or_alloc_cpuset(struct cpuset *cs) if (!trial) return NULL; + trial->dl_bw_cpu = -1; + /* Setup cpumask pointer array */ cpumask_var_t *pmask[4] = { &trial->cpus_allowed, @@ -2980,6 +2983,7 @@ static void reset_migrate_dl_data(struct cpuset *cs) { cs->nr_migrate_dl_tasks = 0; cs->sum_migrate_dl_bw = 0; + cs->dl_bw_cpu = -1; } /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ @@ -3056,6 +3060,8 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) reset_migrate_dl_data(cs); goto out_unlock; } + + cs->dl_bw_cpu = cpu; } out_success: @@ -3080,12 +3086,11 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) mutex_lock(&cpuset_mutex); dec_attach_in_progress_locked(cs); - if (cs->nr_migrate_dl_tasks) { - int cpu = cpumask_any(cs->effective_cpus); + if (cs->dl_bw_cpu >= 0) + dl_bw_free(cs->dl_bw_cpu, cs->sum_migrate_dl_bw); - dl_bw_free(cpu, cs->sum_migrate_dl_bw); + if (cs->nr_migrate_dl_tasks) reset_migrate_dl_data(cs); - } mutex_unlock(&cpuset_mutex); } -- cgit v1.2.3 From f05799491d6a2a29d8e15f4451e685c4a6e13d8f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Apr 2026 17:05:28 +0100 Subject: KVM: arm64: pkvm: Adopt MARKER() to define host hypercall ranges The EL2 code defines ranges of host hypercalls that are either enabled at boot-time only, used by [nh]VHE KVM, or reserved to pKVM. The way these ranges are delineated is error prone, as the enum symbols defining the limits are expressed in terms of actual function symbols. This means that should a new function be added, special care must be taken to also update the limit symbol. Improve this by reusing the mechanism introduced for the vcpu_sysreg enum, which uses a MARKER() macro and some extra trickery to make the limit symbol standalone. Crucially, the limit symbol has the same value as the *following* symbol. The handle_host_hcall() function is then updated to make use of the new limit definitions and get rid of the brittle default upper limit. This allows for some more strict checks at build time, and the removal of an comparison at run time. Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Link: https://patch.msgid.link/20260414160528.2218858-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 12 ++++++++++-- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 10 +++++----- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 11dcdf434971..043495f7fc78 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -50,6 +50,9 @@ #include +#define MARKER(m) \ + m, __after_##m = m - 1 + enum __kvm_host_smccc_func { /* Hypercalls that are unavailable once pKVM has finalised. */ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ @@ -59,8 +62,10 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + + MARKER(__KVM_HOST_SMCCC_FUNC_MIN_PKVM), + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, - __KVM_HOST_SMCCC_FUNC_MIN_PKVM = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, /* Hypercalls that are always available and common to [nh]VHE/pKVM. */ __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, @@ -84,7 +89,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr, __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr, - __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM = __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr, + + MARKER(__KVM_HOST_SMCCC_FUNC_PKVM_ONLY), /* Hypercalls that are available only when pKVM has finalised. */ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, @@ -108,6 +114,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, + + MARKER(__KVM_HOST_SMCCC_FUNC_MAX) }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 851f6171751c..44211e86f5eb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -450,9 +450,6 @@ struct kvm_vcpu_fault_info { r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ __after_##r = __MAX__(__before_##r - 1, r) -#define MARKER(m) \ - m, __after_##m = m - 1 - enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8f7582d57ab5..1de9c70599c6 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -748,9 +748,11 @@ static const hcall_t host_hcall[] = { static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(unsigned long, id, host_ctxt, 0); - unsigned long hcall_min = 0, hcall_max = -1; + unsigned long hcall_min = 0, hcall_max = __KVM_HOST_SMCCC_FUNC_MAX; hcall_t hfn; + BUILD_BUG_ON(ARRAY_SIZE(host_hcall) != __KVM_HOST_SMCCC_FUNC_MAX); + /* * If pKVM has been initialised then reject any calls to the * early "privileged" hypercalls. Note that we cannot reject @@ -763,16 +765,14 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (static_branch_unlikely(&kvm_protected_mode_initialized)) { hcall_min = __KVM_HOST_SMCCC_FUNC_MIN_PKVM; } else { - hcall_max = __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM; + hcall_max = __KVM_HOST_SMCCC_FUNC_PKVM_ONLY; } id &= ~ARM_SMCCC_CALL_HINTS; id -= KVM_HOST_SMCCC_ID(0); - if (unlikely(id < hcall_min || id > hcall_max || - id >= ARRAY_SIZE(host_hcall))) { + if (unlikely(id < hcall_min || id >= hcall_max)) goto inval; - } hfn = host_hcall[id]; if (unlikely(!hfn)) -- cgit v1.2.3 From c1aad75595fb67edc7fda8af249d3b886efa1be9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 13 Apr 2026 12:42:38 +0200 Subject: mailbox: add sanity check for channel array Fail gracefully if there is no channel array attached to the mailbox controller. Otherwise the later dereference will cause an OOPS which might not be seen because mailbox controllers might instantiate very early. Remove the comment explaining the obvious while here. Fixes: 2b6d83e2b8b7 ("mailbox: Introduce framework for mailbox") Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 30eafdf3a91e..bbc9fd75a95f 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -526,8 +526,7 @@ int mbox_controller_register(struct mbox_controller *mbox) { int i, txdone; - /* Sanity check */ - if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans) + if (!mbox || !mbox->dev || !mbox->ops || !mbox->chans || !mbox->num_chans) return -EINVAL; if (mbox->txdone_irq) -- cgit v1.2.3 From a068c4d42c035c63b26ff91c394e6dc2cb7dc5d0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 13 Apr 2026 12:42:39 +0200 Subject: mailbox: update kdoc for struct mbox_controller Add field for missing lock around the hrtimer. Add 'Required' where the core checks for valid entries. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Jassi Brar --- include/linux/mailbox_controller.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index a49ee687d4cf..dc93287a2a01 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -62,10 +62,10 @@ struct mbox_chan_ops { /** * struct mbox_controller - Controller of a class of communication channels - * @dev: Device backing this controller - * @ops: Operators that work on each communication chan - * @chans: Array of channels - * @num_chans: Number of channels in the 'chans' array. + * @dev: Device backing this controller. Required. + * @ops: Operators that work on each communication chan. Required. + * @chans: Array of channels. Required. + * @num_chans: Number of channels in the 'chans' array. Required. * @txdone_irq: Indicates if the controller can report to API when * the last transmitted data was read by the remote. * Eg, if it has some TX ACK irq. @@ -78,6 +78,7 @@ struct mbox_chan_ops { * @of_xlate: Controller driver specific mapping of channel via DT * @poll_hrt: API private. hrtimer used to poll for TXDONE on all * channels. + * @poll_hrt_lock: API private. Lock protecting access to poll_hrt. * @node: API private. To hook into list of controllers. */ struct mbox_controller { -- cgit v1.2.3 From dd9aa1f269000d679f4ec12b32abacfc8d921413 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 17 Apr 2026 09:42:33 +0200 Subject: mailbox: mailbox-test: handle channel errors consistently mbox_test_request_channel() returns either an ERR_PTR or NULL. The callers, however, mostly checked for non-NULL which allows for bogus code paths when an ERR_PTR is treated like a valid channel. A later commit tried to fix it in one place but missed the other ones. Because the ERR_PTR is only used for -ENOMEM once and is converted to -EPROBE_DEFER anyhow, convert the callee to only return NULL which simplifies handling a lot and makes it less error prone. Fixes: 8ea4484d0c2b ("mailbox: Add generic mechanism for testing Mailbox Controllers") Fixes: 9b63a810c6f9 ("mailbox: mailbox-test: Fix an error check in mbox_test_probe()") Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 5e68f708205c..daf4b6f27d11 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -336,7 +336,7 @@ mbox_test_request_channel(struct platform_device *pdev, const char *name) client = devm_kzalloc(&pdev->dev, sizeof(*client), GFP_KERNEL); if (!client) - return ERR_PTR(-ENOMEM); + return NULL; client->dev = &pdev->dev; client->rx_callback = mbox_test_receive_message; @@ -393,7 +393,7 @@ static int mbox_test_probe(struct platform_device *pdev) tdev->tx_channel = mbox_test_request_channel(pdev, "tx"); tdev->rx_channel = mbox_test_request_channel(pdev, "rx"); - if (IS_ERR_OR_NULL(tdev->tx_channel) && IS_ERR_OR_NULL(tdev->rx_channel)) + if (!tdev->tx_channel && !tdev->rx_channel) return -EPROBE_DEFER; /* If Rx is not specified but has Rx MMIO, then Rx = Tx */ -- cgit v1.2.3 From 88ebadbf0deefdaccdab868b44ff70a0a257f473 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 17 Apr 2026 09:42:34 +0200 Subject: mailbox: mailbox-test: don't free the reused channel The RX channel can be aliased to the TX channel if it has a different MMIO. This special case needs to be handled when freeing the channels otherwise a double-free occurs. Fixes: 8ea4484d0c2b ("mailbox: Add generic mechanism for testing Mailbox Controllers") Signed-off-by: Wolfram Sang Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index daf4b6f27d11..0a56b593fcac 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -427,7 +427,7 @@ static int mbox_test_probe(struct platform_device *pdev) err_free_chans: if (tdev->tx_channel) mbox_free_channel(tdev->tx_channel); - if (tdev->rx_channel) + if (tdev->rx_channel && tdev->rx_channel != tdev->tx_channel) mbox_free_channel(tdev->rx_channel); return ret; } @@ -440,7 +440,7 @@ static void mbox_test_remove(struct platform_device *pdev) if (tdev->tx_channel) mbox_free_channel(tdev->tx_channel); - if (tdev->rx_channel) + if (tdev->rx_channel && tdev->rx_channel != tdev->tx_channel) mbox_free_channel(tdev->rx_channel); } -- cgit v1.2.3 From bbcf9af68bfedb3d9cc3c7eae62f5c844d8b78b9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 17 Apr 2026 09:42:35 +0200 Subject: mailbox: mailbox-test: initialize struct earlier The waitqueue must be initialized before the debugfs files are created because from that time, requests from userspace can already be made. Similarily, drvdata and spinlock needs to be initialized before we request the channel, otherwise dangling irqs might run into problems like a NULL pointer exception. Fixes: 8ea4484d0c2b ("mailbox: Add generic mechanism for testing Mailbox Controllers") Signed-off-by: Wolfram Sang Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 0a56b593fcac..ec591616fe46 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -382,6 +382,12 @@ static int mbox_test_probe(struct platform_device *pdev) if (!tdev) return -ENOMEM; + tdev->dev = &pdev->dev; + spin_lock_init(&tdev->lock); + mutex_init(&tdev->mutex); + init_waitqueue_head(&tdev->waitq); + platform_set_drvdata(pdev, tdev); + /* It's okay for MMIO to be NULL */ tdev->tx_mmio = mbox_test_ioremap(pdev, 0); @@ -400,12 +406,6 @@ static int mbox_test_probe(struct platform_device *pdev) if (!tdev->rx_channel && (tdev->rx_mmio != tdev->tx_mmio)) tdev->rx_channel = tdev->tx_channel; - tdev->dev = &pdev->dev; - platform_set_drvdata(pdev, tdev); - - spin_lock_init(&tdev->lock); - mutex_init(&tdev->mutex); - if (tdev->rx_channel) { tdev->rx_buffer = devm_kzalloc(&pdev->dev, MBOX_MAX_MSG_LEN, GFP_KERNEL); @@ -419,7 +419,6 @@ static int mbox_test_probe(struct platform_device *pdev) if (ret) goto err_free_chans; - init_waitqueue_head(&tdev->waitq); dev_info(&pdev->dev, "Successfully registered\n"); return 0; -- cgit v1.2.3 From 6e937f4e769e60947909e3525965f0137b9039e8 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 17 Apr 2026 09:42:36 +0200 Subject: mailbox: mailbox-test: make data_ready a per-instance variable While not the default case, multiple tests can be run simultaneously. Then, data_ready being a global variable will be overwritten and the per-instance lock will not help. Turn the global variable into a per-instance one to avoid this problem. Fixes: e339c80af95e ("mailbox: mailbox-test: don't rely on rx_buffer content to signal data ready") Signed-off-by: Wolfram Sang Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index ec591616fe46..7b6ef033e77a 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -28,8 +28,6 @@ #define MBOX_HEXDUMP_MAX_LEN (MBOX_HEXDUMP_LINE_LEN * \ (MBOX_MAX_MSG_LEN / MBOX_BYTES_PER_LINE)) -static bool mbox_data_ready; - struct mbox_test_device { struct device *dev; void __iomem *tx_mmio; @@ -42,6 +40,7 @@ struct mbox_test_device { spinlock_t lock; struct mutex mutex; wait_queue_head_t waitq; + bool data_ready; struct fasync_struct *async_queue; struct dentry *root_debugfs_dir; }; @@ -162,7 +161,7 @@ static bool mbox_test_message_data_ready(struct mbox_test_device *tdev) unsigned long flags; spin_lock_irqsave(&tdev->lock, flags); - data_ready = mbox_data_ready; + data_ready = tdev->data_ready; spin_unlock_irqrestore(&tdev->lock, flags); return data_ready; @@ -227,7 +226,7 @@ static ssize_t mbox_test_message_read(struct file *filp, char __user *userbuf, *(touser + l) = '\0'; memset(tdev->rx_buffer, 0, MBOX_MAX_MSG_LEN); - mbox_data_ready = false; + tdev->data_ready = false; spin_unlock_irqrestore(&tdev->lock, flags); @@ -297,7 +296,7 @@ static void mbox_test_receive_message(struct mbox_client *client, void *message) message, MBOX_MAX_MSG_LEN); memcpy(tdev->rx_buffer, message, MBOX_MAX_MSG_LEN); } - mbox_data_ready = true; + tdev->data_ready = true; spin_unlock_irqrestore(&tdev->lock, flags); wake_up_interruptible(&tdev->waitq); -- cgit v1.2.3 From 73bd1227787bfe73eea3d04c63a89cb55db9c23e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Apr 2026 09:41:21 +0800 Subject: rhashtable: Restore insecure_elasticity toggle Some users of rhashtable cannot handle insertion failures, and are happy to accept the consequences of a hash table that having very long chains. Restore the insecure_elasticity toggle for these users. In addition to disabling the chain length checks, this also removes the emergency resize that would otherwise occur when the hash table occupancy hits 100% (an async resize is still scheduled at 75%). Signed-off-by: Herbert Xu Signed-off-by: Tejun Heo --- include/linux/rhashtable-types.h | 2 ++ include/linux/rhashtable.h | 5 +++-- lib/rhashtable.c | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 015c8298bebc..72082428d6c6 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -49,6 +49,7 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking + * @insecure_elasticity: Set to true to disable chain length checks * @automatic_shrinking: Enable automatic shrinking of tables * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -61,6 +62,7 @@ struct rhashtable_params { u16 head_offset; unsigned int max_size; u16 min_size; + bool insecure_elasticity; bool automatic_shrinking; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0480509a6339..7def3f0f556b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -821,14 +821,15 @@ slow_path: goto out; } - if (elasticity <= 0) + if (elasticity <= 0 && !params.insecure_elasticity) goto slow_path; data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out_unlock; - if (unlikely(rht_grow_above_100(ht, tbl))) + if (unlikely(rht_grow_above_100(ht, tbl)) && + !params.insecure_elasticity) goto slow_path; /* Inserting at head of list makes unlocking free. */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6074ed5f66f3..fb2b7bc137ba 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -538,7 +538,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, return NULL; } - if (elasticity <= 0) + if (elasticity <= 0 && !ht->p.insecure_elasticity) return ERR_PTR(-EAGAIN); return ERR_PTR(-ENOENT); @@ -568,7 +568,8 @@ static struct bucket_table *rhashtable_insert_one( if (unlikely(rht_grow_above_max(ht, tbl))) return ERR_PTR(-E2BIG); - if (unlikely(rht_grow_above_100(ht, tbl))) + if (unlikely(rht_grow_above_100(ht, tbl)) && + !ht->p.insecure_elasticity) return ERR_PTR(-EAGAIN); head = rht_ptr(bkt, tbl, hash); -- cgit v1.2.3 From 87019cb6c26178cef8fb9f9265b6ab7c4bda5262 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2026 05:33:41 -1000 Subject: sched_ext: Mark scx_sched_hash insecure_elasticity scx_sched_hash is inserted into under scx_sched_lock (raw_spinlock_irq) in scx_link_sched(). rhashtable's sync grow path calls get_random_u32() and does a GFP_ATOMIC allocation; both acquire regular spinlocks, which is unsafe under raw_spinlock_t. Set insecure_elasticity to skip the sync grow. v2: - Dropped dsq_hash changes. Insertion is not under raw_spin_lock. - Switched from no_sync_grow flag to insecure_elasticity. Fixes: 25037af712eb ("sched_ext: Add rhashtable lookup for sub-schedulers") Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 012ca8bd70fb..7edd46f3ac43 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -32,6 +32,7 @@ static const struct rhashtable_params scx_sched_hash_params = { .key_len = sizeof_field(struct scx_sched, ops.sub_cgroup_id), .key_offset = offsetof(struct scx_sched, ops.sub_cgroup_id), .head_offset = offsetof(struct scx_sched, hash_node), + .insecure_elasticity = true, /* inserted under scx_sched_lock */ }; static struct rhashtable scx_sched_hash; -- cgit v1.2.3 From ec54093e6a8f87e800bb6aa15eb7fc1e33faa524 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 18:35:42 -0400 Subject: xfrm: ah: account for ESN high bits in async callbacks AH allocates its temporary auth/ICV layout differently when ESN is enabled: the async ahash setup appends a 4-byte seqhi slot before the ICV or auth_data area, but the async completion callbacks still reconstruct the temporary layout as if seqhi were absent. With an async AH implementation selected, that makes AH copy or compare the wrong bytes on both the IPv4 and IPv6 paths. In UML repro on IPv4 AH with ESN and forced async hmac(sha1), ping fails with 100% packet loss, and the callback logs show the pre-fix drift: ah4 output_done: esn=1 err=0 icv_off=20 expected_off=24 ah4 input_done: esn=1 auth_off=20 expected_auth_off=24 icv_off=32 expected_icv_off=36 Reconstruct the callback-side layout the same way the setup path built it by skipping the ESN seqhi slot before locating the saved auth_data or ICV. Per RFC 4302, the ESN high-order 32 bits participate in the AH ICV computation, so the async callbacks must account for the seqhi slot. Post-fix, the same IPv4 AH+ESN+forced-async-hmac(sha1) UML repro shows the corrected offset (ah4 output_done: esn=1 err=0 icv_off=24 expected_off=24) and ping succeeds; net/ipv4/ah4.o and net/ipv6/ah6.o build clean at W=1. IPv6 AH+ESN was not exercised at runtime, and the change has not been tested against a real async hardware AH engine. Fixes: d4d573d0334d ("{IPv4,xfrm} Add ESN support for AH egress part") Fixes: d8b2a8600b0e ("{IPv4,xfrm} Add ESN support for AH ingress part") Fixes: 26dd70c3fad3 ("{IPv6,xfrm} Add ESN support for AH egress part") Fixes: 8d6da6f32557 ("{IPv6,xfrm} Add ESN support for AH ingress part") Cc: stable@vger.kernel.org Assisted-by: Codex:gpt-5-4 Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Signed-off-by: Steffen Klassert --- net/ipv4/ah4.c | 14 ++++++++++++-- net/ipv6/ah6.c | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 5fb812443a08..4366cbac3f06 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -124,9 +124,14 @@ static void ah_output_done(void *data, int err) struct iphdr *top_iph = ip_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); + int seqhi_len = 0; + __be32 *seqhi; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph = AH_SKB_CB(skb)->tmp; - icv = ah_tmp_icv(iph, ihl); + seqhi = (__be32 *)((char *)iph + ihl); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; @@ -270,12 +275,17 @@ static void ah_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; - auth_data = ah_tmp_auth(work_iph, ihl); + seqhi = (__be32 *)((char *)work_iph + ihl); + auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index cb26beea4398..de1e68199a01 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -317,14 +317,19 @@ static void ah6_output_done(void *data, int err) struct ipv6hdr *top_iph = ipv6_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(seqhi, seqhi_len); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -471,13 +476,18 @@ static void ah6_input_done(void *data, int err) struct ip_auth_hdr *ah = ip_auth_hdr(skb); int hdr_len = skb_network_header_len(skb); int ah_hlen = ipv6_authlen(ah); + int seqhi_len = 0; + __be32 *seqhi; if (err) goto out; + if (x->props.flags & XFRM_STATE_ESN) + seqhi_len = sizeof(*seqhi); work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(seqhi, seqhi_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) -- cgit v1.2.3 From ae974ca6f0f3138a835d0ed38bedc87dec85b3b2 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Sat, 18 Apr 2026 20:42:30 -0400 Subject: fanotify: Fix spelling mistake "enforecement" -> "enforcement" There is a spelling mistake in a comment. Fix it. Signed-off-by: Ethan Carter Edwards Link: https://patch.msgid.link/20260418-fanotify-typo-v1-1-03ea48cb44ba@ethancedwards.com Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index bfe884d624e7..38290b9c07f7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -457,7 +457,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, /* * Unlike file_handle, type and len of struct fanotify_fh are u8. * Traditionally, filesystem return handle_type < 0xff, but there - * is no enforecement for that in vfs. + * is no enforcement for that in vfs. */ BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff); if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2) -- cgit v1.2.3 From a36d990f591320e9dd379ab30063ebfe91d47e1f Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 17:21:54 -0400 Subject: isofs: validate Rock Ridge CE continuation extent against volume size rock_continue() reads rs->cont_extent verbatim from the Rock Ridge CE record and passes it to sb_bread() without checking that the block number is within the mounted ISO 9660 volume. commit e595447e177b ("[PATCH] rock.c: handle corrupted directories") added cont_offset and cont_size rejection for the CE continuation but did not validate the extent block number itself. commit f54e18f1b831 ("isofs: Fix infinite looping over CE entries") later capped the CE chain length at RR_MAX_CE_ENTRIES = 32 but again left the block number unchecked. With a crafted ISO mounted via udisks2 (desktop optical auto-mount) or via CAP_SYS_ADMIN mount, rs->cont_extent can therefore point at an out-of-range block or at blocks belonging to an adjacent filesystem on the same block device. sb_bread() on an out-of-range block returns NULL cleanly via the block layer EIO path, so there is no memory-safety violation. For in-range reads of adjacent- filesystem data, the CE buffer is parsed as Rock Ridge records and only the text of SL sub-records reaches userspace through readlink(), which makes the info-leak channel narrow and difficult to exploit; still, rejecting the malformed CE outright matches the rejection shape already present in the same function for cont_offset and cont_size. Add an ISOFS_SB(sb)->s_nzones bounds check to rock_continue() next to the existing offset/size rejection, printing the same corrupted-directory-entry notice. Fixes: f54e18f1b831 ("isofs: Fix infinite looping over CE entries") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419212155.2169382-2-michael.bommarito@gmail.com Signed-off-by: Jan Kara --- fs/isofs/rock.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 6fe6dbd0c740..1232fab59a4e 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -101,6 +101,15 @@ static int rock_continue(struct rock_state *rs) goto out; } + if ((unsigned)rs->cont_extent >= ISOFS_SB(rs->inode->i_sb)->s_nzones) { + printk(KERN_NOTICE "rock: corrupted directory entry. " + "extent=%u out of volume (nzones=%lu)\n", + (unsigned)rs->cont_extent, + ISOFS_SB(rs->inode->i_sb)->s_nzones); + ret = -EIO; + goto out; + } + if (rs->cont_extent) { struct buffer_head *bh; -- cgit v1.2.3 From 24376458138387fb251e782e624c7776e9826796 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 17:21:55 -0400 Subject: isofs: validate block number from NFS file handle in isofs_export_iget isofs_fh_to_dentry() and isofs_fh_to_parent() pass an attacker- controlled block number (ifid->block or ifid->parent_block) from the NFS file handle to isofs_export_iget(), which only rejects block == 0 before calling isofs_iget() and ultimately sb_bread(). A crafted file handle with fh_len sufficient to pass the check added by commit 0405d4b63d08 ("isofs: Prevent the use of too small fid") can still drive the server to read any in-range block on the backing device as if it were an iso_directory_record. That earlier fix was assigned CVE-2025-37780. sb_bread() on an out-of-range block returns NULL cleanly via the EIO path, so there is no memory-safety violation. For in-range reads of adjacent-partition data on the same block device, the unrelated bytes end up in iso_inode_info fields that reach the NFS client as dentry metadata. The deployment surface (isofs exported over NFS from loop-mounted images) is narrow and requires an authenticated NFS peer, but the malformed-file-handle class is reportable as hardening next to the existing CVE-2025-37780 fix. Reject block >= ISOFS_SB(sb)->s_nzones in isofs_export_iget() so the check covers both isofs_fh_to_dentry() and isofs_fh_to_parent() call sites with a single line. Fixes: 0405d4b63d08 ("isofs: Prevent the use of too small fid") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419212155.2169382-3-michael.bommarito@gmail.com Signed-off-by: Jan Kara --- fs/isofs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 421d247fae52..78f80c1a5c54 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -24,7 +24,7 @@ isofs_export_iget(struct super_block *sb, { struct inode *inode; - if (block == 0) + if (block == 0 || block >= ISOFS_SB(sb)->s_nzones) return ERR_PTR(-ESTALE); inode = isofs_iget(sb, block, offset); if (IS_ERR(inode)) -- cgit v1.2.3 From cc85e337278001c325afecfbc9a739a3b1b205f2 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 20 Apr 2026 12:25:46 +0200 Subject: isofs: use QSTR_LEN() in isofs_cmp Use QSTR_LEN() and inline the code in isofs_cmp(). Remove the stale function comment while at it. Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20260420102544.8924-3-thorsten.blum@linux.dev Signed-off-by: Jan Kara --- fs/isofs/namei.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 8dd3911717e0..3ace3d6a55e7 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -10,20 +10,13 @@ #include #include "isofs.h" -/* - * ok, we cannot use strncmp, as the name is not in our data space. - * Thus we'll have to use isofs_match. No big problem. Match also makes - * some sanity tests. - */ static int isofs_cmp(struct dentry *dentry, const char *compare, int dlen) { - struct qstr qstr; - qstr.name = compare; - qstr.len = dlen; if (likely(!dentry->d_op)) return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen); - return dentry->d_op->d_compare(NULL, dentry->d_name.len, dentry->d_name.name, &qstr); + return dentry->d_op->d_compare(NULL, dentry->d_name.len, dentry->d_name.name, + &QSTR_LEN(compare, dlen)); } /* -- cgit v1.2.3 From 4aca914ac152f5d055ddcb36704d1e539ac08977 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 20 Apr 2026 14:58:00 +0200 Subject: fsnotify: fix inode reference leak in fsnotify_recalc_mask() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fsnotify_recalc_mask() fails to handle the return value of __fsnotify_recalc_mask(), which may return an inode pointer that needs to be released via fsnotify_drop_object() when the connector's HAS_IREF flag transitions from set to cleared. This manifests as a hung task with the following call trace: INFO: task umount:1234 blocked for more than 120 seconds. Call Trace: __schedule schedule fsnotify_sb_delete generic_shutdown_super kill_anon_super cleanup_mnt task_work_run do_exit do_group_exit The race window that triggers the iref leak: Thread A (adding mark) Thread B (removing mark) ────────────────────── ──────────────────────── fsnotify_add_mark_locked(): fsnotify_add_mark_list(): spin_lock(conn->lock) add mark_B(evictable) to list spin_unlock(conn->lock) return /* ---- gap: no lock held ---- */ fsnotify_detach_mark(mark_A): spin_lock(mark_A->lock) clear ATTACHED flag on mark_A spin_unlock(mark_A->lock) fsnotify_put_mark(mark_A) fsnotify_recalc_mask(): spin_lock(conn->lock) __fsnotify_recalc_mask(): /* mark_A skipped: ATTACHED cleared */ /* only mark_B(evictable) remains */ want_iref = false has_iref = true /* not yet cleared */ -> HAS_IREF transitions true -> false -> returns inode pointer spin_unlock(conn->lock) /* BUG: return value discarded! * iput() and fsnotify_put_sb_watched_objects() * are never called */ Fix this by deferring the transition true -> false of HAS_IREF flag from fsnotify_recalc_mask() (Thread A) to fsnotify_put_mark() (thread B). Fixes: c3638b5b1374 ("fsnotify: allow adding an inode mark without pinning inode") Signed-off-by: Xin Yin Signed-off-by: Amir Goldstein Link: https://patch.msgid.link/CAOQ4uxiPsbHb0o5voUKyPFMvBsDkG914FYDcs4C5UpBMNm0Vcg@mail.gmail.com Signed-off-by: Jan Kara --- fs/notify/mark.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 622f05977f86..e256b420100d 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -238,7 +238,12 @@ static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, return inode; } -static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +/* + * Calculate mask of events for a list of marks. + * + * Return true if any of the attached marks want to hold an inode reference. + */ +static bool __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; bool want_iref = false; @@ -262,6 +267,34 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) */ WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); + return want_iref; +} + +/* + * Calculate mask of events for a list of marks after attach/modify mark + * and get an inode reference for the connector if needed. + * + * A concurrent add of evictable mark and detach of non-evictable mark can + * lead to __fsnotify_recalc_mask() returning false want_iref, but in this + * case we defer clearing iref to fsnotify_recalc_mask_clear_iref() called + * from fsnotify_put_mark(). + */ +static void fsnotify_recalc_mask_set_iref(struct fsnotify_mark_connector *conn) +{ + bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; + bool want_iref = __fsnotify_recalc_mask(conn) || has_iref; + + (void) fsnotify_update_iref(conn, want_iref); +} + +/* + * Calculate mask of events for a list of marks after detach mark + * and return the inode object if its reference is no longer needed. + */ +static void *fsnotify_recalc_mask_clear_iref(struct fsnotify_mark_connector *conn) +{ + bool want_iref = __fsnotify_recalc_mask(conn); + return fsnotify_update_iref(conn, want_iref); } @@ -298,7 +331,7 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) spin_lock(&conn->lock); update_children = !fsnotify_conn_watches_children(conn); - __fsnotify_recalc_mask(conn); + fsnotify_recalc_mask_set_iref(conn); update_children &= fsnotify_conn_watches_children(conn); spin_unlock(&conn->lock); /* @@ -419,7 +452,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) /* Update watched objects after detaching mark */ if (sb) fsnotify_update_sb_watchers(sb, conn); - objp = __fsnotify_recalc_mask(conn); + objp = fsnotify_recalc_mask_clear_iref(conn); type = conn->type; } WRITE_ONCE(mark->connector, NULL); -- cgit v1.2.3 From 09a65adc7d8bbfce06392cb6d375468e2728ead5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 20 Apr 2026 19:56:44 +0200 Subject: dm-thin: fix metadata refcount underflow There's a bug in dm-thin in the function rebalance_children. If the internal btree node has one entry, the code tries to copy all btree entries from the node's child to the node itself and then decrement the child's reference count. If the child node is shared (it has reference count > 1), we won't free it, so there would be two pointers to each of the grandchildren nodes. But the reference counts of the grandchildren is not increased, thus the reference count doesn't match the number of pointers that point to the grandchildren. This results in "device mapper: space map common: unable to decrement block" errors. Fix this bug by incrementing reference counts on the grandchildren if the btree node is shared. Signed-off-by: Mikulas Patocka Fixes: 3241b1d3e0aa ("dm: add persistent data library") Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-btree-remove.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 942cd47eb52d..aeec5b9a1dd5 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -490,12 +490,20 @@ static int rebalance_children(struct shadow_spine *s, if (le32_to_cpu(n->header.nr_entries) == 1) { struct dm_block *child; + int is_shared; dm_block_t b = value64(n, 0); + r = dm_tm_block_is_shared(info->tm, b, &is_shared); + if (r) + return r; + r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child); if (r) return r; + if (is_shared) + inc_children(info->tm, dm_block_data(child), vt); + memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); -- cgit v1.2.3 From 2d2b026c3ea792a0c91d4acf4430d8b65bedf271 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Mon, 20 Apr 2026 17:28:47 +0800 Subject: sched_ext: Deny SCX kfuncs to non-SCX struct_ops programs scx_kfunc_context_filter() currently allows non-SCX struct_ops programs (e.g. tcp_congestion_ops) to call SCX unlocked kfuncs. This is wrong for two reasons: - It is semantically incorrect: a TCP congestion control program has no business calling SCX kfuncs such as scx_bpf_kick_cpu(). - With CONFIG_EXT_SUB_SCHED=y, kfuncs like scx_bpf_kick_cpu() call scx_prog_sched(aux), which invokes bpf_prog_get_assoc_struct_ops(aux) and casts the result to struct sched_ext_ops * before reading ops->priv. For a non-SCX struct_ops program the returned pointer is the kdata of that struct_ops type, which is far smaller than sched_ext_ops, making the read an out-of-bounds access (confirmed with KASAN). Extend the filter to cover scx_kfunc_set_any and scx_kfunc_set_idle as well, and deny all SCX kfuncs for any struct_ops program that is not the SCX struct_ops. This addresses both issues: the semantic contract is enforced at the verifier level, and the runtime out-of-bounds access becomes unreachable. Fixes: d1d3c1c6ae36 ("sched_ext: Add verifier-time kfunc context filter") Suggested-by: Tejun Heo Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 32 ++++++++++++++++++-------------- kernel/sched/ext_idle.c | 1 + kernel/sched/ext_idle.h | 1 + 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7edd46f3ac43..d66fea57ee69 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -9480,6 +9480,7 @@ BTF_KFUNCS_END(scx_kfunc_ids_any) static const struct btf_kfunc_id_set scx_kfunc_set_any = { .owner = THIS_MODULE, .set = &scx_kfunc_ids_any, + .filter = scx_kfunc_context_filter, }; /* @@ -9527,13 +9528,12 @@ static const u32 scx_kf_allow_flags[] = { }; /* - * Verifier-time filter for context-sensitive SCX kfuncs. Registered via the - * .filter field on each per-group btf_kfunc_id_set. The BPF core invokes this - * for every kfunc call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or + * Verifier-time filter for SCX kfuncs. Registered via the .filter field on + * each per-group btf_kfunc_id_set. The BPF core invokes this for every kfunc + * call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or * BPF_PROG_TYPE_SYSCALL), regardless of which set originally introduced the - * kfunc - so the filter must short-circuit on kfuncs it doesn't govern (e.g. - * scx_kfunc_ids_any) by falling through to "allow" when none of the - * context-sensitive sets contain the kfunc. + * kfunc - so the filter must short-circuit on kfuncs it doesn't govern by + * falling through to "allow" when none of the SCX sets contain the kfunc. */ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) { @@ -9542,18 +9542,21 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) bool in_enqueue = btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id); bool in_dispatch = btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id); bool in_cpu_release = btf_id_set8_contains(&scx_kfunc_ids_cpu_release, kfunc_id); + bool in_idle = btf_id_set8_contains(&scx_kfunc_ids_idle, kfunc_id); + bool in_any = btf_id_set8_contains(&scx_kfunc_ids_any, kfunc_id); u32 moff, flags; - /* Not a context-sensitive kfunc (e.g. from scx_kfunc_ids_any) - allow. */ - if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || in_cpu_release)) + /* Not an SCX kfunc - allow. */ + if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || + in_cpu_release || in_idle || in_any)) return 0; /* SYSCALL progs (e.g. BPF test_run()) may call unlocked and select_cpu kfuncs. */ if (prog->type == BPF_PROG_TYPE_SYSCALL) - return (in_unlocked || in_select_cpu) ? 0 : -EACCES; + return (in_unlocked || in_select_cpu || in_idle || in_any) ? 0 : -EACCES; if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) - return -EACCES; + return (in_any || in_idle) ? 0 : -EACCES; /* * add_subprog_and_kfunc() collects all kfunc calls, including dead code @@ -9566,14 +9569,15 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) return 0; /* - * Non-SCX struct_ops: only unlocked kfuncs are safe. The other - * context-sensitive kfuncs assume the rq lock is held by the SCX - * dispatch path, which doesn't apply to other struct_ops users. + * Non-SCX struct_ops: SCX kfuncs are not permitted. */ if (prog->aux->st_ops != &bpf_sched_ext_ops) - return in_unlocked ? 0 : -EACCES; + return -EACCES; /* SCX struct_ops: check the per-op allow list. */ + if (in_any || in_idle) + return 0; + moff = prog->aux->attach_st_ops_member_off; flags = scx_kf_allow_flags[SCX_MOFF_IDX(moff)]; diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 443d12a3df67..c43d62d90e40 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -1467,6 +1467,7 @@ BTF_KFUNCS_END(scx_kfunc_ids_idle) static const struct btf_kfunc_id_set scx_kfunc_set_idle = { .owner = THIS_MODULE, .set = &scx_kfunc_ids_idle, + .filter = scx_kfunc_context_filter, }; /* diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h index dc35f850481e..8d169d3bbdf9 100644 --- a/kernel/sched/ext_idle.h +++ b/kernel/sched/ext_idle.h @@ -12,6 +12,7 @@ struct sched_ext_ops; +extern struct btf_id_set8 scx_kfunc_ids_idle; extern struct btf_id_set8 scx_kfunc_ids_select_cpu; void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops); -- cgit v1.2.3 From 5897ca15d2c444af95eaae5f0a384401765afa00 Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Mon, 20 Apr 2026 17:28:48 +0800 Subject: selftests/sched_ext: Add non_scx_kfunc_deny test Verify that the BPF verifier rejects a non-SCX struct_ops program (tcp_congestion_ops) that attempts to call an SCX kfunc (scx_bpf_kick_cpu). The test expects the load to fail with -EACCES from scx_kfunc_context_filter. Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/Makefile | 1 + .../selftests/sched_ext/non_scx_kfunc_deny.bpf.c | 44 ++++++++++++++++++++ .../selftests/sched_ext/non_scx_kfunc_deny.c | 47 ++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c create mode 100644 tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 789037be44c7..5d2dffca0e91 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -175,6 +175,7 @@ auto-test-targets := \ maximal \ maybe_null \ minimal \ + non_scx_kfunc_deny \ numa \ allowed_cpus \ peek_dsq \ diff --git a/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c new file mode 100644 index 000000000000..9f16d39255e7 --- /dev/null +++ b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.bpf.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Verify that context-sensitive SCX kfuncs (even "unlocked" ones) are + * restricted to only SCX struct_ops programs. Non-SCX struct_ops programs, + * such as TCP congestion control programs, should be rejected by the BPF + * verifier when attempting to call these kfuncs. + * + * Copyright (C) 2026 Ching-Chun (Jim) Huang + * Copyright (C) 2026 Cheng-Yang Chou + */ + +#include +#include +#include + +/* SCX kfunc from scx_kfunc_ids_any set */ +void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; + +SEC("struct_ops/ssthresh") +__u32 BPF_PROG(tcp_ca_ssthresh, struct sock *sk) +{ + /* + * This call should be rejected by the verifier because this is a + * TCP congestion control program (non-SCX struct_ops). + */ + scx_bpf_kick_cpu(0, 0); + return 2; +} + +SEC("struct_ops/cong_avoid") +void BPF_PROG(tcp_ca_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) {} + +SEC("struct_ops/undo_cwnd") +__u32 BPF_PROG(tcp_ca_undo_cwnd, struct sock *sk) { return 2; } + +SEC(".struct_ops") +struct tcp_congestion_ops tcp_non_scx_ca = { + .ssthresh = (void *)tcp_ca_ssthresh, + .cong_avoid = (void *)tcp_ca_cong_avoid, + .undo_cwnd = (void *)tcp_ca_undo_cwnd, + .name = "tcp_kfunc_deny", +}; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c new file mode 100644 index 000000000000..1c031575fb87 --- /dev/null +++ b/tools/testing/selftests/sched_ext/non_scx_kfunc_deny.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Verify that context-sensitive SCX kfuncs (even "unlocked" ones) are + * restricted to only SCX struct_ops programs. Non-SCX struct_ops programs, + * such as TCP congestion control programs, should be rejected by the BPF + * verifier when attempting to call these kfuncs. + * + * Copyright (C) 2026 Ching-Chun (Jim) Huang + * Copyright (C) 2026 Cheng-Yang Chou + */ + +#include +#include +#include +#include +#include +#include "non_scx_kfunc_deny.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status run(void *ctx) +{ + struct non_scx_kfunc_deny *skel; + int err; + + skel = non_scx_kfunc_deny__open(); + if (!skel) { + SCX_ERR("Failed to open skel"); + return SCX_TEST_FAIL; + } + + err = non_scx_kfunc_deny__load(skel); + non_scx_kfunc_deny__destroy(skel); + + if (err == 0) { + SCX_ERR("non-SCX BPF program loaded when it should have been rejected"); + return SCX_TEST_FAIL; + } + + return SCX_TEST_PASS; +} + +struct scx_test non_scx_kfunc_deny = { + .name = "non_scx_kfunc_deny", + .description = "Verify that non-SCX struct_ops programs cannot call SCX kfuncs", + .run = run, +}; +REGISTER_SCX_TEST(&non_scx_kfunc_deny) -- cgit v1.2.3 From 5567fc9dcd7ed46678cd68e6ca0662331d42f0ac Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:46 -0700 Subject: KVM: selftests: Use gva_t instead of vm_vaddr_t Replace all occurrences of vm_vaddr_t with gva_t to align with KVM code and with the conversion helpers (e.g. addr_gva2hva()). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/vm_vaddr_/gva_/g' Then by manually adjusting whitespace to make checkpatch.pl happy, and dropping renames of functions that allocate memory within a given VM. No functional change intended. Signed-off-by: David Matlack [sean: drop renames of allocator APIs] Link: https://patch.msgid.link/20260420212004.3938325-2-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/vgic_irq.c | 6 ++-- .../selftests/kvm/include/arm64/processor.h | 4 +-- tools/testing/selftests/kvm/include/arm64/ucall.h | 4 +-- tools/testing/selftests/kvm/include/kvm_util.h | 32 ++++++++++------------ .../testing/selftests/kvm/include/kvm_util_types.h | 2 +- .../selftests/kvm/include/loongarch/ucall.h | 4 +-- tools/testing/selftests/kvm/include/riscv/ucall.h | 2 +- tools/testing/selftests/kvm/include/s390/ucall.h | 2 +- tools/testing/selftests/kvm/include/ucall_common.h | 4 +-- tools/testing/selftests/kvm/include/x86/hyperv.h | 10 +++---- .../selftests/kvm/include/x86/kvm_util_arch.h | 6 ++-- tools/testing/selftests/kvm/include/x86/svm_util.h | 2 +- tools/testing/selftests/kvm/include/x86/vmx.h | 2 +- tools/testing/selftests/kvm/kvm_page_table_test.c | 2 +- tools/testing/selftests/kvm/lib/arm64/processor.c | 18 ++++++------ tools/testing/selftests/kvm/lib/arm64/ucall.c | 6 ++-- tools/testing/selftests/kvm/lib/elf.c | 6 ++-- tools/testing/selftests/kvm/lib/kvm_util.c | 32 ++++++++++------------ .../selftests/kvm/lib/loongarch/processor.c | 8 +++--- tools/testing/selftests/kvm/lib/loongarch/ucall.c | 6 ++-- tools/testing/selftests/kvm/lib/riscv/processor.c | 8 +++--- tools/testing/selftests/kvm/lib/s390/processor.c | 2 +- tools/testing/selftests/kvm/lib/ucall_common.c | 8 +++--- tools/testing/selftests/kvm/lib/x86/hyperv.c | 4 +-- tools/testing/selftests/kvm/lib/x86/memstress.c | 2 +- tools/testing/selftests/kvm/lib/x86/processor.c | 14 +++++----- tools/testing/selftests/kvm/lib/x86/svm.c | 4 +-- tools/testing/selftests/kvm/lib/x86/ucall.c | 2 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 4 +-- tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 2 +- tools/testing/selftests/kvm/s390/memop.c | 6 ++-- tools/testing/selftests/kvm/s390/tprot.c | 6 ++-- tools/testing/selftests/kvm/steal_time.c | 2 +- tools/testing/selftests/kvm/x86/amx_test.c | 2 +- tools/testing/selftests/kvm/x86/aperfmperf_test.c | 2 +- tools/testing/selftests/kvm/x86/cpuid_test.c | 6 ++-- .../selftests/kvm/x86/evmcs_smm_controls_test.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_clock.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_evmcs.c | 6 ++-- .../selftests/kvm/x86/hyperv_extended_hypercalls.c | 6 ++-- tools/testing/selftests/kvm/x86/hyperv_features.c | 6 ++-- tools/testing/selftests/kvm/x86/hyperv_ipi.c | 8 +++--- tools/testing/selftests/kvm/x86/hyperv_svm_test.c | 6 ++-- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 12 ++++---- tools/testing/selftests/kvm/x86/kvm_buslock_test.c | 2 +- tools/testing/selftests/kvm/x86/kvm_clock_test.c | 2 +- .../selftests/kvm/x86/nested_close_kvm_test.c | 2 +- .../selftests/kvm/x86/nested_dirty_log_test.c | 10 +++---- .../selftests/kvm/x86/nested_emulation_test.c | 2 +- .../selftests/kvm/x86/nested_exceptions_test.c | 2 +- .../selftests/kvm/x86/nested_invalid_cr3_test.c | 2 +- .../selftests/kvm/x86/nested_tsc_adjust_test.c | 2 +- .../selftests/kvm/x86/nested_tsc_scaling_test.c | 2 +- .../selftests/kvm/x86/nested_vmsave_vmload_test.c | 2 +- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 2 +- tools/testing/selftests/kvm/x86/smm_test.c | 2 +- tools/testing/selftests/kvm/x86/state_test.c | 2 +- tools/testing/selftests/kvm/x86/svm_int_ctl_test.c | 2 +- .../selftests/kvm/x86/svm_lbr_nested_state.c | 2 +- .../selftests/kvm/x86/svm_nested_clear_efer_svme.c | 2 +- .../selftests/kvm/x86/svm_nested_shutdown_test.c | 2 +- .../kvm/x86/svm_nested_soft_inject_test.c | 4 +-- .../selftests/kvm/x86/svm_nested_vmcb12_gpa.c | 6 ++-- tools/testing/selftests/kvm/x86/svm_vmcall_test.c | 2 +- .../selftests/kvm/x86/triple_fault_event_test.c | 4 +-- .../selftests/kvm/x86/vmx_apic_access_test.c | 2 +- .../selftests/kvm/x86/vmx_apicv_updates_test.c | 2 +- .../kvm/x86/vmx_invalid_nested_guest_state.c | 2 +- .../selftests/kvm/x86/vmx_nested_la57_state_test.c | 2 +- .../selftests/kvm/x86/vmx_preemption_timer_test.c | 2 +- tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 2 +- 71 files changed, 172 insertions(+), 178 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index 2fb2c7939fe9..da87d049d246 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -731,7 +731,7 @@ static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, struct kvm_inject_args *args) { struct kvm_inject_args *kvm_args_hva; - vm_vaddr_t kvm_args_gva; + gva_t kvm_args_gva; kvm_args_gva = uc->args[1]; kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva); @@ -752,7 +752,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_inject_args inject_args; - vm_vaddr_t args_gva; + gva_t args_gva; struct test_args args = { .nr_irqs = nr_irqs, @@ -986,7 +986,7 @@ static void test_vgic_two_cpus(void *gcode) struct kvm_vcpu *vcpus[2]; struct test_args args = {}; struct kvm_vm *vm; - vm_vaddr_t args_gva; + gva_t args_gva; int gic_fd, ret; vm = vm_create_with_vcpus(2, gcode, vcpus); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index ac97a1c436fc..5b18ffe68789 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -179,8 +179,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); -uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level); +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva); static inline void cpu_relax(void) { diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h index 4ec801f37f00..2210d3d94c40 100644 --- a/tools/testing/selftests/kvm/include/arm64/ucall.h +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h @@ -10,9 +10,9 @@ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each * VM), it must not be accessed from host code. */ -extern vm_vaddr_t *ucall_exit_mmio_addr; +extern gva_t *ucall_exit_mmio_addr; -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { WRITE_ONCE(*ucall_exit_mmio_addr, uc); } diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index f861242b4ae8..2378dd42c988 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -112,7 +112,7 @@ struct kvm_vm { struct sparsebit *vpages_mapped; bool has_irqchip; vm_paddr_t ucall_mmio_addr; - vm_vaddr_t handlers; + gva_t handlers; uint32_t dirty_ring_size; uint64_t gpa_tag_mask; @@ -716,22 +716,20 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); void vm_populate_vaddr_bitmap(struct kvm_vm *vm); -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, +gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); +gva_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); +gva_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type); +gva_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); +gva_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +gva_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); +gva_t vm_vaddr_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, unsigned int npages); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +void *addr_gva2hva(struct kvm_vm *vm, gva_t gva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); @@ -1131,12 +1129,12 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) } #define sync_global_to_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \ memcpy(_p, &(g), sizeof(g)); \ }) #define sync_global_from_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \ memcpy(&(g), _p, sizeof(g)); \ }) @@ -1147,7 +1145,7 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) * undesirable to change the host's copy of the global. */ #define write_guest_global(vm, g, val) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \ typeof(g) _val = val; \ \ memcpy(_p, &(_val), sizeof(g)); \ @@ -1242,9 +1240,9 @@ static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr * Returns the VM physical address of the translated VM virtual * address given by @gva. */ -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva); -static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, gva_t gva) { return addr_arch_gva2gpa(vm, gva); } diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h index 0366e9bce7f9..f27bd035ea10 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_types.h +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -15,7 +15,7 @@ #define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +typedef uint64_t gva_t; /* Virtual Machine (Guest) virtual address */ #define INVALID_GPA (~(uint64_t)0) diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h index 4ec801f37f00..2210d3d94c40 100644 --- a/tools/testing/selftests/kvm/include/loongarch/ucall.h +++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h @@ -10,9 +10,9 @@ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each * VM), it must not be accessed from host code. */ -extern vm_vaddr_t *ucall_exit_mmio_addr; +extern gva_t *ucall_exit_mmio_addr; -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { WRITE_ONCE(*ucall_exit_mmio_addr, uc); } diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h index a695ae36f3e0..41d56254968e 100644 --- a/tools/testing/selftests/kvm/include/riscv/ucall.h +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -11,7 +11,7 @@ static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, KVM_RISCV_SELFTESTS_SBI_UCALL, diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h index 8035a872a351..befee84c4609 100644 --- a/tools/testing/selftests/kvm/include/s390/ucall.h +++ b/tools/testing/selftests/kvm/include/s390/ucall.h @@ -10,7 +10,7 @@ static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index d9d6581b8d4f..e5499f170834 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -30,7 +30,7 @@ struct ucall { }; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); -void ucall_arch_do_ucall(vm_vaddr_t uc); +void ucall_arch_do_ucall(gva_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); @@ -48,7 +48,7 @@ int ucall_nr_pages_required(uint64_t page_size); * the full ucall() are problematic and/or unwanted. Note, this will come out * as UCALL_NONE on the backend. */ -#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL) +#define GUEST_UCALL_NONE() ucall_arch_do_ucall((gva_t)NULL) #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index f13e532be240..eedfff3cf102 100644 --- a/tools/testing/selftests/kvm/include/x86/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -254,8 +254,8 @@ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' * is set to the hypercall status (if no exception occurred). */ -static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, +static inline uint8_t __hyperv_hypercall(u64 control, gva_t input_address, + gva_t output_address, uint64_t *hv_status) { uint64_t error_code; @@ -274,8 +274,8 @@ static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, } /* Issue a Hyper-V hypercall and assert that it succeeded. */ -static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address) +static inline void hyperv_hypercall(u64 control, gva_t input_address, + gva_t output_address) { uint64_t hv_status; uint8_t vector; @@ -347,7 +347,7 @@ struct hyperv_test_pages { }; struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva); + gva_t *p_hv_pages_gva); /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index be35d26bb320..4c605f624956 100644 --- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -33,9 +33,9 @@ struct kvm_mmu_arch { struct kvm_mmu; struct kvm_vm_arch { - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; + gva_t gdt; + gva_t tss; + gva_t idt; uint64_t c_bit; uint64_t s_bit; diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index 5d7c42534bc4..a25b83e2c233 100644 --- a/tools/testing/selftests/kvm/include/x86/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -56,7 +56,7 @@ static inline void vmmcall(void) "clgi\n" \ ) -struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); +struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva); void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 92b918700d24..f194723da3d0 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -550,7 +550,7 @@ union vmx_ctrl_msr { }; }; -struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); +struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva); bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index c60a24a92829..61915fc89c17 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -292,7 +292,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) ret = sem_init(&test_stage_completed, 0, 0); TEST_ASSERT(ret == 0, "Error in sem_init"); - current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage)); + current_stage = addr_gva2hva(vm, (gva_t)(&guest_test_stage)); *current_stage = NUM_TEST_STAGES; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 43ea40edc533..3645acae09ce 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -19,9 +19,9 @@ #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; -static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) +static uint64_t pgd_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; @@ -29,7 +29,7 @@ static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> shift) & mask; } -static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) +static uint64_t pud_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; @@ -40,7 +40,7 @@ static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> shift) & mask; } -static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) +static uint64_t pmd_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = (vm->page_shift - 3) + vm->page_shift; uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; @@ -51,7 +51,7 @@ static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> shift) & mask; } -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) +static uint64_t pte_index(struct kvm_vm *vm, gva_t gva) { uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; return (gva >> vm->page_shift) & mask; @@ -181,7 +181,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) { uint64_t *ptep; @@ -225,12 +225,12 @@ unmapped_gva: exit(EXIT_FAILURE); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva) { return virt_get_pte_hva_at_level(vm, gva, 3); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva); @@ -539,7 +539,7 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), vm->page_size, MEM_REGION_DATA); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index ddab0ce89d4d..9ea747982d00 100644 --- a/tools/testing/selftests/kvm/lib/arm64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -6,17 +6,17 @@ */ #include "kvm_util.h" -vm_vaddr_t *ucall_exit_mmio_addr; +gva_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + gva_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); virt_map(vm, mmio_gva, mmio_gpa, 1); vm->ucall_mmio_addr = mmio_gpa; - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); + write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva); } void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index f34d926d9735..ff90fba3a5c6 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -157,12 +157,12 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); - vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; + gva_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); + gva_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, + gva_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f5e076591c64..04a59603e93e 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1386,8 +1386,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * TEST_ASSERT failure occurs for invalid input or no area of at least * sz unallocated bytes >= vaddr_min is available. */ -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) +gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) { uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; @@ -1452,10 +1451,8 @@ va_found: return pgidx_start * vm->page_size; } -static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type, - bool protected) +static gva_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type, bool protected) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); @@ -1468,10 +1465,10 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, * Find an unused range of virtual page addresses of at least * pages in length. */ - vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); + gva_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); /* Map the virtual pages. */ - for (vm_vaddr_t vaddr = vaddr_start; pages > 0; + for (gva_t vaddr = vaddr_start; pages > 0; pages--, vaddr += vm->page_size, paddr += vm->page_size) { virt_pg_map(vm, vaddr, paddr); @@ -1480,16 +1477,15 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, return vaddr_start; } -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +gva_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type) { return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, vm_arch_has_protected_memory(vm)); } -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +gva_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type) { return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false); } @@ -1513,7 +1509,7 @@ vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, * a unique set of pages, with the minimum real allocation being at least * a page. The allocated physical space comes from the TEST_DATA memory region. */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +gva_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) { return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); } @@ -1532,12 +1528,12 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) * Allocates at least N system pages worth of bytes within the virtual address * space of the vm. */ -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) +gva_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) { return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +gva_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) { return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); } @@ -1556,7 +1552,7 @@ vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type typ * Allocates at least one system page worth of bytes within the virtual address * space of the vm. */ -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) +gva_t vm_vaddr_alloc_page(struct kvm_vm *vm) { return vm_vaddr_alloc_pages(vm, 1); } @@ -2161,7 +2157,7 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) * Return: * Equivalent host virtual address */ -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) +void *addr_gva2hva(struct kvm_vm *vm, gva_t gva) { return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index ee4ad3b1d2a4..3b67720fbbe1 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -13,9 +13,9 @@ #define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 static vm_paddr_t invalid_pgtable[4]; -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; -static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +static uint64_t virt_pte_index(struct kvm_vm *vm, gva_t gva, int level) { unsigned int shift; uint64_t mask; @@ -72,7 +72,7 @@ static int virt_pte_none(uint64_t *ptep, int level) return *ptep == invalid_pgtable[level]; } -static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc) +static uint64_t *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc) { int level; uint64_t *ptep; @@ -106,7 +106,7 @@ unmapped_gva: exit(EXIT_FAILURE); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { uint64_t *ptep; diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c index fc6cbb50573f..a5aa568f437b 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/ucall.c +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -9,17 +9,17 @@ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each * VM), it must not be accessed from host code. */ -vm_vaddr_t *ucall_exit_mmio_addr; +gva_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + gva_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); virt_map(vm, mmio_gva, mmio_gpa, 1); vm->ucall_mmio_addr = mmio_gpa; - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); + write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva); } void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 067c6b2c15b0..552628dda4a0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -15,7 +15,7 @@ #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) { @@ -52,7 +52,7 @@ static uint32_t pte_index_shift[] = { PGTBL_L3_INDEX_SHIFT, }; -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +static uint64_t pte_index(struct kvm_vm *vm, gva_t gva, int level) { TEST_ASSERT(level > -1, "Negative page table level (%d) not possible", level); @@ -119,7 +119,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { uint64_t *ptep; int level = vm->mmu.pgtable_levels - 1; @@ -452,7 +452,7 @@ void vm_init_vector_tables(struct kvm_vm *vm) vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), vm->page_size, MEM_REGION_DATA); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler) diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 6a9a660413a7..e8d3c1d333d5 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -86,7 +86,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) entry[idx] = gpa; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int ri, idx; uint64_t *entry; diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 42151e571953..997444178c78 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -29,7 +29,7 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { struct ucall_header *hdr; struct ucall *uc; - vm_vaddr_t vaddr; + gva_t vaddr; int i; vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, @@ -96,7 +96,7 @@ void ucall_assert(uint64_t cmd, const char *exp, const char *file, guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } @@ -113,7 +113,7 @@ void ucall_fmt(uint64_t cmd, const char *fmt, ...) guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } @@ -135,7 +135,7 @@ void ucall(uint64_t cmd, int nargs, ...) WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index 15bc8cd583aa..be8b31572588 100644 --- a/tools/testing/selftests/kvm/lib/x86/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -76,9 +76,9 @@ bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) } struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva) + gva_t *p_hv_pages_gva) { - vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + gva_t hv_pages_gva = vm_vaddr_alloc_page(vm); struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); /* Setup of a region of guest memory for the VP Assist page. */ diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index f53414ba7103..73a82730927d 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -104,7 +104,7 @@ static void memstress_setup_ept_mappings(struct kvm_vm *vm) void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { struct kvm_regs regs; - vm_vaddr_t nested_gva; + gva_t nested_gva; int vcpu_id; TEST_REQUIRE(kvm_cpu_has_tdp()); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 01f0f97d4430..7a01f83cab0b 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -21,7 +21,7 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -vm_vaddr_t exception_handlers; +gva_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; bool host_cpu_is_hygon; @@ -618,7 +618,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) segp->present = true; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_NONE; uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); @@ -633,7 +633,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); } -static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) +static void kvm_seg_set_tss_64bit(gva_t base, struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); segp->base = base; @@ -755,7 +755,7 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; kvm_seg_set_kernel_code_64bit(&seg); kvm_seg_fill_gdt_64bit(vm, &seg); @@ -770,9 +770,9 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)) { - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + gva_t *handlers = (gva_t *)addr_gva2hva(vm, vm->handlers); - handlers[vector] = (vm_vaddr_t)handler; + handlers[vector] = (gva_t)handler; } void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) @@ -825,7 +825,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { struct kvm_mp_state mp_state; struct kvm_regs regs; - vm_vaddr_t stack_vaddr; + gva_t stack_vaddr; struct kvm_vcpu *vcpu; stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index eb20b00112c7..4a3b1a2738a2 100644 --- a/tools/testing/selftests/kvm/lib/x86/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -28,9 +28,9 @@ u64 rflags; * Pointer to structure with the addresses of the SVM areas. */ struct svm_test_data * -vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) +vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva) { - vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm); + gva_t svm_gva = vm_vaddr_alloc_page(vm); struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); svm->vmcb = (void *)vm_vaddr_alloc_page(vm); diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1265cecc7dd1..1af2a6880cdf 100644 --- a/tools/testing/selftests/kvm/lib/x86/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c @@ -8,7 +8,7 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_arch_do_ucall(vm_vaddr_t uc) +void ucall_arch_do_ucall(gva_t uc) { /* * FIXME: Revert this hack (the entire commit that added it) once nVMX diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index c87b340362a9..a6bb649e62c3 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -79,9 +79,9 @@ void vm_enable_ept(struct kvm_vm *vm) * Pointer to structure with the addresses of the VMX areas. */ struct vmx_pages * -vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) +vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva) { - vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); + gva_t vmx_gva = vm_vaddr_alloc_page(vm); struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); /* Setup of a region of guest memory for the vmxon region. */ diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index cec1621ace23..8366c11131ff 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -610,7 +610,7 @@ static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu) virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1); snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE); - snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva); + snapshot_gpa = addr_gva2gpa(vcpu->vm, (gva_t)snapshot_gva); sync_global_to_guest(vcpu->vm, snapshot_gva); sync_global_to_guest(vcpu->vm, snapshot_gpa); } diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 4374b4cd2a80..0e8dc8e5d8bd 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -878,7 +878,7 @@ static void guest_copy_key_fetch_prot_override(void) static void test_copy_key_fetch_prot_override(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; + gva_t guest_0_page, guest_last_page; guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); @@ -917,7 +917,7 @@ out: static void test_errors_key_fetch_prot_override_not_enabled(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; + gva_t guest_0_page, guest_last_page; guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); @@ -938,7 +938,7 @@ out: static void test_errors_key_fetch_prot_override_enabled(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; + gva_t guest_0_page, guest_last_page; guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 12d5e1cb62e3..fd8e997de693 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -207,7 +207,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; - vm_vaddr_t guest_0_page; + gva_t guest_0_page; ksft_print_header(); ksft_set_plan(STAGE_END); @@ -216,7 +216,7 @@ int main(int argc, char *argv[]) run = vcpu->run; HOST_SYNC(vcpu, STAGE_INIT_SIMPLE); - mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); + mprotect(addr_gva2hva(vm, (gva_t)pages), PAGE_SIZE * 2, PROT_READ); HOST_SYNC(vcpu, TEST_SIMPLE); guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); @@ -229,7 +229,7 @@ int main(int argc, char *argv[]) HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); } if (guest_0_page == 0) - mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); + mprotect(addr_gva2hva(vm, (gva_t)0), PAGE_SIZE, PROT_READ); run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; run->kvm_dirty_regs = KVM_SYNC_CRS; HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index efe56a10d13e..d2a513ec7dd5 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -309,7 +309,7 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) { /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); - st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]); + st_gpa[i] = addr_gva2gpa(vcpu->vm, (gva_t)st_gva[i]); sync_global_to_guest(vcpu->vm, st_gva[i]); sync_global_to_guest(vcpu->vm, st_gpa[i]); } diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index 37b166260ee3..6f934732c014 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -236,7 +236,7 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; struct kvm_x86_state *tile_state = NULL; int xsave_restore_size; - vm_vaddr_t amx_cfg, tiledata, xstate; + gva_t amx_cfg, tiledata, xstate; struct ucall uc; int ret; diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c index 8b15a13df939..2b547fc93ba8 100644 --- a/tools/testing/selftests/kvm/x86/aperfmperf_test.c +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -123,7 +123,7 @@ int main(int argc, char *argv[]) { const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); uint64_t host_aperf_before, host_mperf_before; - vm_vaddr_t nested_test_data_gva; + gva_t nested_test_data_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; int msr_fd, cpu, i; diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index f9ed14996977..3c45249a42c4 100644 --- a/tools/testing/selftests/kvm/x86/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -140,10 +140,10 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) } } -struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, gva_t *p_gva, struct kvm_cpuid2 *cpuid) { int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); - vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); + gva_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); memcpy(guest_cpuids, cpuid, size); @@ -217,7 +217,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu) int main(void) { struct kvm_vcpu *vcpu; - vm_vaddr_t cpuid_gva; + gva_t cpuid_gva; struct kvm_vm *vm; int stage; diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c index af7c90103396..62cfde273f71 100644 --- a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c +++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c @@ -73,7 +73,7 @@ static void guest_code(struct vmx_pages *vmx_pages, int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; + gva_t vmx_pages_gva = 0, hv_pages_gva = 0; struct hyperv_test_pages *hv; struct hv_enlightened_vmcs *evmcs; struct kvm_vcpu *vcpu; diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index e058bc676cd6..b68844924dc5 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c @@ -208,7 +208,7 @@ int main(void) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - vm_vaddr_t tsc_page_gva; + gva_t tsc_page_gva; int stage; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 74cf19661309..c2de5ac799ee 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -76,7 +76,7 @@ void l2_guest_code(void) } void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, - vm_vaddr_t hv_hcall_page_gpa) + gva_t hv_hcall_page_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -231,8 +231,8 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; - vm_vaddr_t hcall_page; + gva_t vmx_pages_gva = 0, hv_pages_gva = 0; + gva_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 949e08e98f31..7762c168bbf3 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -16,7 +16,7 @@ #define EXT_CAPABILITIES 0xbull static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, - vm_vaddr_t out_pg_gva) + gva_t out_pg_gva) { uint64_t *output_gva; @@ -35,8 +35,8 @@ static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, int main(void) { - vm_vaddr_t hcall_out_page; - vm_vaddr_t hcall_in_page; + gva_t hcall_out_page; + gva_t hcall_in_page; struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 130b9ce7e5dd..1059fcc460e3 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -82,7 +82,7 @@ done: GUEST_DONE(); } -static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) +static void guest_hcall(gva_t pgs_gpa, struct hcall_data *hcall) { u64 res, input, output; uint8_t vector; @@ -134,7 +134,7 @@ static void guest_test_msrs_access(void) struct kvm_vm *vm; struct ucall uc; int stage = 0; - vm_vaddr_t msr_gva; + gva_t msr_gva; struct msr_data *msr; bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC); @@ -523,7 +523,7 @@ static void guest_test_hcalls_access(void) struct kvm_vm *vm; struct ucall uc; int stage = 0; - vm_vaddr_t hcall_page, hcall_params; + gva_t hcall_page, hcall_params; struct hcall_data *hcall; while (true) { diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index ca61836c4e32..7d648219833c 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -45,13 +45,13 @@ struct hv_send_ipi_ex { struct hv_vpset vp_set; }; -static inline void hv_init(vm_vaddr_t pgs_gpa) +static inline void hv_init(gva_t pgs_gpa) { wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); } -static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) +static void receiver_code(void *hcall_page, gva_t pgs_gpa) { u32 vcpu_id; @@ -85,7 +85,7 @@ static inline void nop_loop(void) asm volatile("nop"); } -static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) +static void sender_guest_code(void *hcall_page, gva_t pgs_gpa) { struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; @@ -243,7 +243,7 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; struct kvm_vcpu *vcpu[3]; - vm_vaddr_t hcall_page; + gva_t hcall_page; pthread_t threads[2]; int stage = 1, r; struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index 0ddb63229bcb..e0caf5ea14bd 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -67,7 +67,7 @@ void l2_guest_code(void) static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, struct hyperv_test_pages *hv_pages, - vm_vaddr_t pgs_gpa) + gva_t pgs_gpa) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; @@ -149,8 +149,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; - vm_vaddr_t hcall_page; + gva_t nested_gva = 0, hv_pages_gva = 0; + gva_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index c542cc4762b1..7f58a5efe6d5 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -61,14 +61,14 @@ struct hv_tlb_flush_ex { * - GVAs of the test pages' PTEs */ struct test_data { - vm_vaddr_t hcall_gva; + gva_t hcall_gva; vm_paddr_t hcall_gpa; - vm_vaddr_t test_pages; - vm_vaddr_t test_pages_pte[NTEST_PAGES]; + gva_t test_pages; + gva_t test_pages_pte[NTEST_PAGES]; }; /* 'Worker' vCPU code checking the contents of the test page */ -static void worker_guest_code(vm_vaddr_t test_data) +static void worker_guest_code(gva_t test_data) { struct test_data *data = (struct test_data *)test_data; u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); @@ -196,7 +196,7 @@ static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) #define TESTVAL2 0x0202020202020202 /* Main vCPU doing the test */ -static void sender_guest_code(vm_vaddr_t test_data) +static void sender_guest_code(gva_t test_data) { struct test_data *data = (struct test_data *)test_data; struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; @@ -581,7 +581,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct kvm_vcpu *vcpu[3]; pthread_t threads[2]; - vm_vaddr_t test_data_page, gva; + gva_t test_data_page, gva; vm_paddr_t gpa; uint64_t *pte; struct test_data *data; diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c index d88500c118eb..52014a3210c8 100644 --- a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c @@ -73,7 +73,7 @@ static void guest_code(void *test_data) int main(int argc, char *argv[]) { const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); - vm_vaddr_t nested_test_data_gva; + gva_t nested_test_data_gva; struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5bc12222d87a..e14f7330302e 100644 --- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -135,7 +135,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) int main(void) { struct kvm_vcpu *vcpu; - vm_vaddr_t pvti_gva; + gva_t pvti_gva; vm_paddr_t pvti_gpa; struct kvm_vm *vm; int flags; diff --git a/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c index f001cb836bfa..761fec293408 100644 --- a/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c +++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c @@ -67,7 +67,7 @@ static void l1_guest_code(void *data) int main(int argc, char *argv[]) { - vm_vaddr_t guest_gva; + gva_t guest_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c index 619229bbd693..0e67cce83570 100644 --- a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c @@ -47,10 +47,10 @@ #define TEST_SYNC_WRITE_FAULT BIT(1) #define TEST_SYNC_NO_FAULT BIT(2) -static void l2_guest_code(vm_vaddr_t base) +static void l2_guest_code(gva_t base) { - vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0); - vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1); + gva_t page0 = TEST_GUEST_ADDR(base, 0); + gva_t page1 = TEST_GUEST_ADDR(base, 1); READ_ONCE(*(u64 *)page0); GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT); @@ -143,7 +143,7 @@ static void l1_guest_code(void *data) static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg, unsigned long *bmap) { - vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1); + gva_t gva = arg & ~(PAGE_SIZE - 1); int page_nr, i; /* @@ -198,7 +198,7 @@ static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg, static void test_dirty_log(bool nested_tdp) { - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; unsigned long *bmap; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c index abc824dba04f..d398add21e4c 100644 --- a/tools/testing/selftests/kvm/x86/nested_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -122,7 +122,7 @@ static void guest_code(void *test_data) int main(int argc, char *argv[]) { - vm_vaddr_t nested_test_data_gva; + gva_t nested_test_data_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3641a42934ac..646cfb0022b3 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -216,7 +216,7 @@ static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject) */ int main(int argc, char *argv[]) { - vm_vaddr_t nested_test_data_gva; + gva_t nested_test_data_gva; struct kvm_vcpu_events events; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c index a6b6da9cf7fe..11fd2467d823 100644 --- a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -78,7 +78,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t guest_gva = 0; + gva_t guest_gva = 0; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index 2839f650e5c9..d9238116d30d 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -125,7 +125,7 @@ static void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva; + gva_t nested_gva; struct kvm_vcpu *vcpu; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index 4260c9e4f489..b76f29e1e775 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -152,7 +152,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t guest_gva = 0; + gva_t guest_gva = 0; uint64_t tsc_start, tsc_end; uint64_t tsc_khz; diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c index 71717118d692..85d3f4cc76f3 100644 --- a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c +++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c @@ -128,7 +128,7 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; struct vmcb *test_vmcb[2]; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 8bd37a476f15..dcb3aee699b9 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -108,7 +108,7 @@ static void test_sync_vmsa(uint32_t type, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t gva; + gva_t gva; void *hva; double x87val = M_PI; diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index ade8412bf94a..3ede3ed8ae5c 100644 --- a/tools/testing/selftests/kvm/x86/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -113,7 +113,7 @@ static void guest_code(void *arg) int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; struct kvm_vcpu *vcpu; struct kvm_regs regs; diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 992a52504a4a..6797da4bd9d9 100644 --- a/tools/testing/selftests/kvm/x86/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -258,7 +258,7 @@ void check_nested_state(int stage, struct kvm_x86_state *state) int main(int argc, char *argv[]) { uint64_t *xstate_bv, saved_xstate_bv; - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; struct kvm_vcpu *vcpu, *vcpuN; diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 917b6066cfc1..d3cc5e4f7883 100644 --- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -82,7 +82,7 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; + gva_t svm_gva; struct kvm_vm *vm; struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c index ff99438824d3..7fbfaa054c95 100644 --- a/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c +++ b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c @@ -97,9 +97,9 @@ void test_lbrv_nested_state(bool nested_lbrv) { struct kvm_x86_state *state = NULL; struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; struct kvm_vm *vm; struct ucall uc; + gva_t svm_gva; pr_info("Testing with nested LBRV %s\n", nested_lbrv ? "enabled" : "disabled"); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c index a521a9eed061..6a89eaffc657 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c index 00135cbba35e..c6ea3d609a62 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c @@ -42,7 +42,7 @@ static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; + gva_t svm_gva; struct kvm_vm *vm; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 4bd1655f9e6d..c739d071d3b3 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -144,8 +144,8 @@ static void run_test(bool is_nmi) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t svm_gva; - vm_vaddr_t idt_alt_vm; + gva_t svm_gva; + gva_t idt_alt_vm; struct kvm_guest_debug debug; pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int"); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c index 569869bed20b..ae8a10913af7 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c @@ -74,7 +74,7 @@ static u64 unmappable_gpa(struct kvm_vcpu *vcpu) static void test_invalid_vmcb12(struct kvm_vcpu *vcpu) { - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; struct ucall uc; @@ -90,7 +90,7 @@ static void test_invalid_vmcb12(struct kvm_vcpu *vcpu) static void test_unmappable_vmcb12(struct kvm_vcpu *vcpu) { - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; vcpu_alloc_svm(vcpu->vm, &nested_gva); vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu)); @@ -103,7 +103,7 @@ static void test_unmappable_vmcb12(struct kvm_vcpu *vcpu) static void test_unmappable_vmcb12_vmexit(struct kvm_vcpu *vcpu) { struct kvm_x86_state *state; - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; struct ucall uc; /* diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c index 8a62cca28cfb..b1887242f3b8 100644 --- a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c @@ -36,7 +36,7 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; + gva_t svm_gva; struct kvm_vm *vm; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c index 56306a19144a..f1c488e0d497 100644 --- a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c @@ -72,13 +72,13 @@ int main(void) if (has_vmx) { - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vcpu, 1, vmx_pages_gva); } else { - vm_vaddr_t svm_gva; + gva_t svm_gva; vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); vcpu_alloc_svm(vm, &svm_gva); diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index a81a24761aac..dc5c3d1db346 100644 --- a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c @@ -72,7 +72,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) int main(int argc, char *argv[]) { unsigned long apic_access_addr = ~0ul; - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; unsigned long high_gpa; struct vmx_pages *vmx; bool done = false; diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c index 337c53fddeff..7f84cc92feaf 100644 --- a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c @@ -110,7 +110,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; struct vmx_pages *vmx; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c index a100ee5f0009..a2eaceed9ad5 100644 --- a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c @@ -52,7 +52,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; struct kvm_run *run; diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c index 915c42001dba..4ffa11a6bcd8 100644 --- a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -73,7 +73,7 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0; + gva_t vmx_pages_gva = 0; struct kvm_vm *vm; struct kvm_vcpu *vcpu; struct kvm_x86_state *state; diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c index 00dd2ac07a61..1b7b6ba23de7 100644 --- a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c @@ -152,7 +152,7 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0; + gva_t vmx_pages_gva = 0; struct kvm_regs regs1, regs2; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index ae4a4b6c05ca..0b10dfbfa3ea 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -393,7 +393,7 @@ int main(int argc, char *argv[]) int run_secs = 0; int delay_usecs = 0; struct test_data_page *data; - vm_vaddr_t test_data_page_vaddr; + gva_t test_data_page_vaddr; bool migrate = false; pthread_t threads[2]; struct thread_params params[2]; -- cgit v1.2.3 From 97dcda3fdce5f4f0d689a097f1ff13e1f76f8f49 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:47 -0700 Subject: KVM: selftests: Use gpa_t instead of vm_paddr_t Replace all occurrences of vm_paddr_t with gpa_t to align with KVM code and with the conversion helpers (e.g. addr_hva2gpa()). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/vm_paddr_/gpa_/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack [sean: drop bogus changelog blurb about renaming functions] Link: https://patch.msgid.link/20260420212004.3938325-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/sea_to_user.c | 4 +-- .../testing/selftests/kvm/arm64/vgic_lpi_stress.c | 20 +++++++------- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- tools/testing/selftests/kvm/include/arm64/gic.h | 4 +-- .../selftests/kvm/include/arm64/gic_v3_its.h | 7 +++-- tools/testing/selftests/kvm/include/kvm_util.h | 31 +++++++++++----------- .../testing/selftests/kvm/include/kvm_util_types.h | 2 +- tools/testing/selftests/kvm/include/riscv/ucall.h | 2 +- tools/testing/selftests/kvm/include/s390/ucall.h | 2 +- tools/testing/selftests/kvm/include/ucall_common.h | 4 +-- tools/testing/selftests/kvm/include/x86/sev.h | 4 +-- tools/testing/selftests/kvm/include/x86/ucall.h | 2 +- tools/testing/selftests/kvm/kvm_page_table_test.c | 2 +- tools/testing/selftests/kvm/lib/arm64/gic_v3.c | 4 +-- tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c | 11 ++++---- tools/testing/selftests/kvm/lib/arm64/processor.c | 2 +- tools/testing/selftests/kvm/lib/arm64/ucall.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 27 +++++++++---------- .../selftests/kvm/lib/loongarch/processor.c | 10 +++---- tools/testing/selftests/kvm/lib/loongarch/ucall.c | 2 +- tools/testing/selftests/kvm/lib/memstress.c | 2 +- tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +- tools/testing/selftests/kvm/lib/s390/processor.c | 4 +-- tools/testing/selftests/kvm/lib/ucall_common.c | 2 +- tools/testing/selftests/kvm/lib/x86/processor.c | 2 +- tools/testing/selftests/kvm/lib/x86/sev.c | 2 +- tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 4 +-- tools/testing/selftests/kvm/s390/irq_routing.c | 2 +- tools/testing/selftests/kvm/s390/ucontrol_test.c | 2 +- tools/testing/selftests/kvm/steal_time.c | 4 +-- tools/testing/selftests/kvm/x86/hyperv_clock.c | 2 +- .../selftests/kvm/x86/hyperv_extended_hypercalls.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 8 +++--- tools/testing/selftests/kvm/x86/kvm_clock_test.c | 4 +-- .../selftests/kvm/x86/vmx_nested_la57_state_test.c | 2 +- 35 files changed, 92 insertions(+), 96 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c index 573dd790aeb8..f41987dc726a 100644 --- a/tools/testing/selftests/kvm/arm64/sea_to_user.c +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -51,7 +51,7 @@ #define EINJ_OFFSET 0x01234badUL #define EINJ_GVA ((START_GVA) + (EINJ_OFFSET)) -static vm_paddr_t einj_gpa; +static gpa_t einj_gpa; static void *einj_hva; static uint64_t einj_hpa; static bool far_invalid; @@ -254,7 +254,7 @@ static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) size_t guest_page_size; size_t alignment; uint64_t num_guest_pages; - vm_paddr_t start_gpa; + gpa_t start_gpa; enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index e857a605f577..d64d434d3f06 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -23,7 +23,7 @@ #define GIC_LPI_OFFSET 8192 static size_t nr_iterations = 1000; -static vm_paddr_t gpa_base; +static gpa_t gpa_base; static struct kvm_vm *vm; static struct kvm_vcpu **vcpus; @@ -35,14 +35,14 @@ static struct test_data { u32 nr_devices; u32 nr_event_ids; - vm_paddr_t device_table; - vm_paddr_t collection_table; - vm_paddr_t cmdq_base; + gpa_t device_table; + gpa_t collection_table; + gpa_t cmdq_base; void *cmdq_base_va; - vm_paddr_t itt_tables; + gpa_t itt_tables; - vm_paddr_t lpi_prop_table; - vm_paddr_t lpi_pend_tables; + gpa_t lpi_prop_table; + gpa_t lpi_pend_tables; } test_data = { .nr_cpus = 1, .nr_devices = 1, @@ -73,7 +73,7 @@ static void guest_setup_its_mappings(void) /* Round-robin the LPIs to all of the vCPUs in the VM */ coll_id = 0; for (device_id = 0; device_id < nr_devices; device_id++) { - vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); + gpa_t itt_base = test_data.itt_tables + (device_id * SZ_64K); its_send_mapd_cmd(test_data.cmdq_base_va, device_id, itt_base, SZ_64K, true); @@ -188,7 +188,7 @@ static void setup_test_data(void) size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); u32 nr_devices = test_data.nr_devices; u32 nr_cpus = test_data.nr_cpus; - vm_paddr_t cmdq_base; + gpa_t cmdq_base; test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, @@ -224,7 +224,7 @@ static void setup_gic(void) static void signal_lpi(u32 device_id, u32 event_id) { - vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; + gpa_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; struct kvm_msi msi = { .address_lo = db_addr, diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 7627b328f18a..9b6b9a597175 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -667,7 +667,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem); /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index cc7a7f34ed37..6408f952cb64 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -59,7 +59,7 @@ bool gic_irq_get_pending(unsigned int intid); void gic_irq_set_config(unsigned int intid, bool is_edge); void gic_irq_set_group(unsigned int intid, bool group); -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table); +void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size, + gpa_t pend_table); #endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h index 58feef3eb386..a43a407e2d5c 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h @@ -5,11 +5,10 @@ #include -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size); +void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl, + size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size); -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base, size_t itt_size, bool valid); void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 2378dd42c988..9f602c73fbb4 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -111,7 +111,7 @@ struct kvm_vm { struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; bool has_irqchip; - vm_paddr_t ucall_mmio_addr; + gpa_t ucall_mmio_addr; gva_t handlers; uint32_t dirty_ring_size; uint64_t gpa_tag_mask; @@ -728,16 +728,16 @@ gva_t vm_vaddr_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, unsigned int npages); -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa); void *addr_gva2hva(struct kvm_vm *vm, gva_t gva); -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); +gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa); #ifndef vcpu_arch_put_guest #define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0) #endif -static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) +static inline gpa_t vm_untag_gpa(struct kvm_vm *vm, gpa_t gpa) { return gpa & ~vm->gpa_tag_mask; } @@ -988,15 +988,14 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); const char *exit_reason_str(unsigned int exit_reason); -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot); -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected); -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, uint32_t memslot); +gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + gpa_t paddr_min, uint32_t memslot, + bool protected); +gpa_t vm_alloc_page_table(struct kvm_vm *vm); -static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot) +static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + gpa_t paddr_min, uint32_t memslot) { /* * By default, allocate memory as protected for VMs that support @@ -1240,9 +1239,9 @@ static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr * Returns the VM physical address of the translated VM virtual * address given by @gva. */ -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva); +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva); -static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, gva_t gva) +static inline gpa_t addr_gva2gpa(struct kvm_vm *vm, gva_t gva) { return addr_arch_gva2gpa(vm, gva); } @@ -1291,7 +1290,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); void kvm_arch_vm_release(struct kvm_vm *vm); -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t paddr); uint32_t guest_get_vcpuid(void); diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h index f27bd035ea10..1d9eedb4885e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_types.h +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -14,7 +14,7 @@ #define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) #define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) -typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t gpa_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t gva_t; /* Virtual Machine (Guest) virtual address */ #define INVALID_GPA (~(uint64_t)0) diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h index 41d56254968e..2de7c6a36096 100644 --- a/tools/testing/selftests/kvm/include/riscv/ucall.h +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -7,7 +7,7 @@ #define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h index befee84c4609..3907d629304f 100644 --- a/tools/testing/selftests/kvm/include/s390/ucall.h +++ b/tools/testing/selftests/kvm/include/s390/ucall.h @@ -6,7 +6,7 @@ #define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index e5499f170834..1db399c00d02 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -29,7 +29,7 @@ struct ucall { struct ucall *hva; }; -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa); void ucall_arch_do_ucall(gva_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); @@ -39,7 +39,7 @@ __printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp, const char *file, unsigned int line, const char *fmt, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); -void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa); int ucall_nr_pages_required(uint64_t page_size); /* diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 008b4169f5e2..289ff5b3f10c 100644 --- a/tools/testing/selftests/kvm/include/x86/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -120,7 +120,7 @@ static inline void sev_register_encrypted_memory(struct kvm_vm *vm, vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range); } -static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, +static inline void sev_launch_update_data(struct kvm_vm *vm, gpa_t gpa, uint64_t size) { struct kvm_sev_launch_update_data update_data = { @@ -131,7 +131,7 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); } -static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, +static inline void snp_launch_update_data(struct kvm_vm *vm, gpa_t gpa, uint64_t hva, uint64_t size, uint8_t type) { struct kvm_sev_snp_launch_update update_data = { diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h index d3825dcc3cd9..0e4950041e3e 100644 --- a/tools/testing/selftests/kvm/include/x86/ucall.h +++ b/tools/testing/selftests/kvm/include/x86/ucall.h @@ -6,7 +6,7 @@ #define UCALL_EXIT_REASON KVM_EXIT_IO -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 61915fc89c17..e8a60d5ccbe6 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -281,7 +281,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem); /* Export shared structure test_args to guest */ sync_global_to_guest(vm, test_args); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 50754a27f493..ae3959f3bb11 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -424,8 +424,8 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_set_group = gicv3_set_group, }; -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table) +void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size, + gpa_t pend_table) { volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 7f9fdcf42ae6..1188b578121d 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -54,7 +54,7 @@ static unsigned long its_find_baser(unsigned int type) return -1; } -static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +static void its_install_table(unsigned int type, gpa_t base, size_t size) { unsigned long offset = its_find_baser(type); u64 baser; @@ -69,7 +69,7 @@ static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) its_write_u64(offset, baser); } -static void its_install_cmdq(vm_paddr_t base, size_t size) +static void its_install_cmdq(gpa_t base, size_t size) { u64 cbaser; @@ -82,9 +82,8 @@ static void its_install_cmdq(vm_paddr_t base, size_t size) its_write_u64(GITS_CBASER, cbaser); } -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size) +void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl, + size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size) { u32 ctlr; @@ -204,7 +203,7 @@ static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) } } -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base, size_t itt_size, bool valid) { struct its_cmd_block cmd = {}; diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 3645acae09ce..0e8603788134 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -230,7 +230,7 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva) return virt_get_pte_hva_at_level(vm, gva, 3); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva); diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index 9ea747982d00..5f85fa7a9449 100644 --- a/tools/testing/selftests/kvm/lib/arm64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -8,7 +8,7 @@ gva_t *ucall_exit_mmio_addr; -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { gva_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 04a59603e93e..89c4e6f01739 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1457,9 +1457,9 @@ static gva_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); - vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, - vm->memslots[type], protected); + gpa_t paddr = __vm_phy_pages_alloc(vm, pages, + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type], protected); /* * Find an unused range of virtual page addresses of at least @@ -1607,7 +1607,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, * address providing the memory to the vm physical address is returned. * A TEST_ASSERT failure occurs if no region containing gpa exists. */ -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) +void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa) { struct userspace_mem_region *region; @@ -1640,7 +1640,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) * VM physical address is returned. A TEST_ASSERT failure occurs if no * region containing hva exists. */ -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) +gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva) { struct rb_node *node; @@ -1651,7 +1651,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) if (hva >= region->host_mem) { if (hva <= (region->host_mem + region->region.memory_size - 1)) - return (vm_paddr_t)((uintptr_t) + return (gpa_t)((uintptr_t) region->region.guest_phys_addr + (hva - (uintptr_t)region->host_mem)); @@ -1683,7 +1683,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) * memory without mapping said memory in the guest's address space. And, for * userfaultfd-based demand paging, to do so without triggering userfaults. */ -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) +void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa) { struct userspace_mem_region *region; uintptr_t offset; @@ -2087,9 +2087,9 @@ const char *exit_reason_str(unsigned int exit_reason) * and their base address is returned. A TEST_ASSERT failure occurs if * not enough pages are available at or above paddr_min. */ -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected) +gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + gpa_t paddr_min, uint32_t memslot, + bool protected) { struct userspace_mem_region *region; sparsebit_idx_t pg, base; @@ -2133,13 +2133,12 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, return base * vm->page_size; } -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot) +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, uint32_t memslot) { return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); } -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) +gpa_t vm_alloc_page_table(struct kvm_vm *vm) { return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, vm->memslots[MEM_REGION_PT]); @@ -2353,7 +2352,7 @@ void __attribute((constructor)) kvm_selftest_init(void) kvm_selftest_arch_init(); } -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t paddr) { sparsebit_idx_t pg = 0; struct userspace_mem_region *region; diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 3b67720fbbe1..28a384e9704f 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -12,7 +12,7 @@ #define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 #define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 -static vm_paddr_t invalid_pgtable[4]; +static gpa_t invalid_pgtable[4]; static gva_t exception_handlers; static uint64_t virt_pte_index(struct kvm_vm *vm, gva_t gva, int level) @@ -35,7 +35,7 @@ static uint64_t ptrs_per_pte(struct kvm_vm *vm) return 1 << (vm->page_shift - 3); } -static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child) +static void virt_set_pgtable(struct kvm_vm *vm, gpa_t table, gpa_t child) { uint64_t *ptep; int i, ptrs_per_pte; @@ -49,7 +49,7 @@ static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t chi void virt_arch_pgd_alloc(struct kvm_vm *vm) { int i; - vm_paddr_t child, table; + gpa_t child, table; if (vm->mmu.pgd_created) return; @@ -76,7 +76,7 @@ static uint64_t *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc) { int level; uint64_t *ptep; - vm_paddr_t child; + gpa_t child; if (!vm->mmu.pgd_created) goto unmapped_gva; @@ -106,7 +106,7 @@ unmapped_gva: exit(EXIT_FAILURE); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { uint64_t *ptep; diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c index a5aa568f437b..2c8abe9f5382 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/ucall.c +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -11,7 +11,7 @@ */ gva_t *ucall_exit_mmio_addr; -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { gva_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 1ea735d66e15..b7bfeade85f7 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -203,7 +203,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; + gpa_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, MEMSTRESS_MEM_SLOT_INDEX + i, diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 552628dda4a0..25749439fdbf 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -119,7 +119,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { uint64_t *ptep; int level = vm->mmu.pgtable_levels - 1; diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index e8d3c1d333d5..153cef5c2328 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -12,7 +12,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { - vm_paddr_t paddr; + gpa_t paddr; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); @@ -86,7 +86,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) entry[idx] = gpa; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int ri, idx; uint64_t *entry; diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 997444178c78..9afcae844d72 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -25,7 +25,7 @@ int ucall_nr_pages_required(uint64_t page_size) */ static struct ucall_header *ucall_pool; -void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa) { struct ucall_header *hdr; struct ucall *uc; diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 7a01f83cab0b..d1de157fedff 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -618,7 +618,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) segp->present = true; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_NONE; uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index c3a9838f4806..aecef6048ff1 100644 --- a/tools/testing/selftests/kvm/lib/x86/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -18,7 +18,7 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio uint8_t page_type, bool private) { const struct sparsebit *protected_phy_pages = region->protected_phy_pages; - const vm_paddr_t gpa_base = region->region.guest_phys_addr; + const gpa_t gpa_base = region->region.guest_phys_addr; const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift; sparsebit_idx_t i, j; diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 8366c11131ff..207dc5cd36f0 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -24,7 +24,7 @@ union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS]; /* Snapshot shared memory data */ #define PMU_SNAPSHOT_GPA_BASE BIT(30) static void *snapshot_gva; -static vm_paddr_t snapshot_gpa; +static gpa_t snapshot_gpa; static int vcpu_shared_irq_count; static int counter_in_use; @@ -259,7 +259,7 @@ static inline void verify_sbi_requirement_assert(void) __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot"); } -static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags) +static void snapshot_set_shmem(gpa_t gpa, unsigned long flags) { unsigned long lo = (unsigned long)gpa; #if __riscv_xlen == 32 diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c index 7819a0af19a8..f3839284ac08 100644 --- a/tools/testing/selftests/kvm/s390/irq_routing.c +++ b/tools/testing/selftests/kvm/s390/irq_routing.c @@ -27,7 +27,7 @@ static void test(void) struct kvm_irq_routing *routing; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_paddr_t mem; + gpa_t mem; int ret; struct kvm_irq_routing_entry ue = { diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 50bc1c38225a..f773ba0f4641 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -111,7 +111,7 @@ FIXTURE(uc_kvm) uintptr_t base_hva; uintptr_t code_hva; int kvm_run_size; - vm_paddr_t pgd; + gpa_t pgd; void *vm_mem; int vcpu_fd; int kvm_fd; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index d2a513ec7dd5..f461eb7a0f6e 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -239,7 +239,7 @@ static void check_steal_time_uapi(void) /* SBI STA shmem must have 64-byte alignment */ #define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63) -static vm_paddr_t st_gpa[NR_VCPUS]; +static gpa_t st_gpa[NR_VCPUS]; struct sta_struct { uint32_t sequence; @@ -249,7 +249,7 @@ struct sta_struct { uint8_t pad[47]; } __packed; -static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags) +static void sta_set_shmem(gpa_t gpa, unsigned long flags) { unsigned long lo = (unsigned long)gpa; #if __riscv_xlen == 32 diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index b68844924dc5..6bb1ca11256f 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c @@ -98,7 +98,7 @@ static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); } -static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) +static void guest_main(struct ms_hyperv_tsc_page *tsc_page, gpa_t tsc_page_gpa) { u64 tsc_scale, tsc_offset; diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 7762c168bbf3..5f561fcda55a 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -15,7 +15,7 @@ /* Any value is fine */ #define EXT_CAPABILITIES 0xbull -static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, +static void guest_code(gpa_t in_pg_gpa, gpa_t out_pg_gpa, gva_t out_pg_gva) { uint64_t *output_gva; diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 7f58a5efe6d5..2de01da9d11d 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -62,7 +62,7 @@ struct hv_tlb_flush_ex { */ struct test_data { gva_t hcall_gva; - vm_paddr_t hcall_gpa; + gpa_t hcall_gpa; gva_t test_pages; gva_t test_pages_pte[NTEST_PAGES]; }; @@ -133,7 +133,7 @@ static void set_expected_val(void *addr, u64 val, int vcpu_id) * Update PTEs swapping two test pages. * TODO: use swap()/xchg() when these are provided. */ -static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) +static void swap_two_test_pages(gpa_t pte_gva1, gpa_t pte_gva2) { uint64_t tmp = *(uint64_t *)pte_gva1; @@ -201,7 +201,7 @@ static void sender_guest_code(gva_t test_data) struct test_data *data = (struct test_data *)test_data; struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; - vm_paddr_t hcall_gpa = data->hcall_gpa; + gpa_t hcall_gpa = data->hcall_gpa; int i, stage = 1; wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); @@ -582,7 +582,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu[3]; pthread_t threads[2]; gva_t test_data_page, gva; - vm_paddr_t gpa; + gpa_t gpa; uint64_t *pte; struct test_data *data; struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index e14f7330302e..5721e035e38c 100644 --- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -31,7 +31,7 @@ static struct test_case test_cases[] = { #define GUEST_SYNC_CLOCK(__stage, __val) \ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0) -static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti) +static void guest_main(gpa_t pvti_pa, struct pvclock_vcpu_time_info *pvti) { int i; @@ -136,7 +136,7 @@ int main(void) { struct kvm_vcpu *vcpu; gva_t pvti_gva; - vm_paddr_t pvti_gpa; + gpa_t pvti_gpa; struct kvm_vm *vm; int flags; diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c index 4ffa11a6bcd8..f13dee317383 100644 --- a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -30,7 +30,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; u64 guest_cr4; - vm_paddr_t pml5_pa, pml4_pa; + gpa_t pml5_pa, pml4_pa; u64 *pml5; u64 exit_reason; -- cgit v1.2.3 From 6d3494255ac0180d3047ea632367718e0625bd2c Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:48 -0700 Subject: KVM: selftests: Use gpa_t for GPAs in Hyper-V selftests Fix various Hyper-V selftests to use gpa_t for variables that contain guest physical addresses, rather than gva_t. In practice, the bugs are benign as both gva_t and gpa_t are u64 typedefs, i.e. gpa_t and gva_t are interchangeable from a functional perspective, the code is just confusing. No functional change intended. Signed-off-by: David Matlack [sean: call out that both are u64 typedefs] Link: https://patch.msgid.link/20260420212004.3938325-4-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/hyperv_evmcs.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_features.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_ipi.c | 6 +++--- tools/testing/selftests/kvm/x86/hyperv_svm_test.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index c2de5ac799ee..2d1733f9303a 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -76,7 +76,7 @@ void l2_guest_code(void) } void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, - gva_t hv_hcall_page_gpa) + gpa_t hv_hcall_page_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 1059fcc460e3..0360fa5915c0 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -82,7 +82,7 @@ done: GUEST_DONE(); } -static void guest_hcall(gva_t pgs_gpa, struct hcall_data *hcall) +static void guest_hcall(gpa_t pgs_gpa, struct hcall_data *hcall) { u64 res, input, output; uint8_t vector; diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 7d648219833c..5369867efac3 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -45,13 +45,13 @@ struct hv_send_ipi_ex { struct hv_vpset vp_set; }; -static inline void hv_init(gva_t pgs_gpa) +static inline void hv_init(gpa_t pgs_gpa) { wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); } -static void receiver_code(void *hcall_page, gva_t pgs_gpa) +static void receiver_code(void *hcall_page, gpa_t pgs_gpa) { u32 vcpu_id; @@ -85,7 +85,7 @@ static inline void nop_loop(void) asm volatile("nop"); } -static void sender_guest_code(void *hcall_page, gva_t pgs_gpa) +static void sender_guest_code(void *hcall_page, gpa_t pgs_gpa) { struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index e0caf5ea14bd..54a1a6dad4d5 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -67,7 +67,7 @@ void l2_guest_code(void) static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, struct hyperv_test_pages *hv_pages, - gva_t pgs_gpa) + gpa_t pgs_gpa) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; -- cgit v1.2.3 From 26f8453288d4c1fb8c96802eae15ddc988f5e068 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:49 -0700 Subject: KVM: selftests: Use u64 instead of uint64_t Use u64 instead of uint64_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/uint64_t/u64/g' Then by manually adjusting whitespace to make checkpatch.pl happy. Include in include/kvm_util_types.h, iinclude/test_util.h, and include/x86/pmu.h to pick up the tools-defined u64. Arguably, all headers (especially kvm_util_types.h) should have already been including stdint.h to get uint64_t from the libc headers, but the missing dependency only rears its head once KVM uses u64 instead of uint64_t. No functional change intended. Signed-off-by: David Matlack [sean: rename pread_uint64() => pread_u64, expand on types.h include] Link: https://patch.msgid.link/20260420212004.3938325-5-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/access_tracking_perf_test.c | 44 +++--- .../testing/selftests/kvm/arm64/aarch32_id_regs.c | 14 +- tools/testing/selftests/kvm/arm64/arch_timer.c | 2 +- .../selftests/kvm/arm64/arch_timer_edge_cases.c | 119 ++++++++-------- .../testing/selftests/kvm/arm64/debug-exceptions.c | 54 ++++---- tools/testing/selftests/kvm/arm64/hypercalls.c | 18 +-- tools/testing/selftests/kvm/arm64/idreg-idst.c | 4 +- tools/testing/selftests/kvm/arm64/no-vgic.c | 8 +- .../testing/selftests/kvm/arm64/page_fault_test.c | 74 +++++----- tools/testing/selftests/kvm/arm64/psci_test.c | 26 ++-- tools/testing/selftests/kvm/arm64/sea_to_user.c | 26 ++-- tools/testing/selftests/kvm/arm64/set_id_regs.c | 56 ++++---- tools/testing/selftests/kvm/arm64/vgic_init.c | 26 ++-- tools/testing/selftests/kvm/arm64/vgic_irq.c | 36 ++--- tools/testing/selftests/kvm/arm64/vgic_v5.c | 2 +- .../selftests/kvm/arm64/vpmu_counter_access.c | 54 ++++---- tools/testing/selftests/kvm/coalesced_io_test.c | 14 +- tools/testing/selftests/kvm/demand_paging_test.c | 10 +- tools/testing/selftests/kvm/dirty_log_perf_test.c | 12 +- tools/testing/selftests/kvm/dirty_log_test.c | 44 +++--- tools/testing/selftests/kvm/guest_memfd_test.c | 16 +-- tools/testing/selftests/kvm/guest_print_test.c | 14 +- .../selftests/kvm/include/arm64/arch_timer.h | 16 +-- tools/testing/selftests/kvm/include/arm64/delay.h | 4 +- tools/testing/selftests/kvm/include/arm64/gic.h | 2 +- .../selftests/kvm/include/arm64/processor.h | 16 +-- tools/testing/selftests/kvm/include/arm64/vgic.h | 6 +- tools/testing/selftests/kvm/include/kvm_util.h | 152 +++++++++++---------- .../testing/selftests/kvm/include/kvm_util_types.h | 8 +- .../selftests/kvm/include/loongarch/arch_timer.h | 4 +- tools/testing/selftests/kvm/include/memstress.h | 20 +-- .../selftests/kvm/include/riscv/arch_timer.h | 20 +-- .../selftests/kvm/include/riscv/processor.h | 9 +- .../kvm/include/s390/diag318_test_handler.h | 2 +- .../testing/selftests/kvm/include/s390/facility.h | 4 +- tools/testing/selftests/kvm/include/sparsebit.h | 6 +- tools/testing/selftests/kvm/include/test_util.h | 14 +- tools/testing/selftests/kvm/include/timer_test.h | 6 +- tools/testing/selftests/kvm/include/ucall_common.h | 14 +- .../selftests/kvm/include/userfaultfd_util.h | 6 +- tools/testing/selftests/kvm/include/x86/apic.h | 8 +- tools/testing/selftests/kvm/include/x86/evmcs.h | 18 +-- tools/testing/selftests/kvm/include/x86/hyperv.h | 14 +- .../selftests/kvm/include/x86/kvm_util_arch.h | 30 ++-- tools/testing/selftests/kvm/include/x86/pmu.h | 9 +- .../testing/selftests/kvm/include/x86/processor.h | 137 +++++++++---------- tools/testing/selftests/kvm/include/x86/sev.h | 10 +- tools/testing/selftests/kvm/include/x86/smm.h | 3 +- tools/testing/selftests/kvm/include/x86/svm_util.h | 10 +- tools/testing/selftests/kvm/include/x86/vmx.h | 50 +++---- tools/testing/selftests/kvm/kvm_page_table_test.c | 48 +++---- tools/testing/selftests/kvm/lib/arm64/gic.c | 4 +- .../testing/selftests/kvm/lib/arm64/gic_private.h | 4 +- tools/testing/selftests/kvm/lib/arm64/gic_v3.c | 30 ++-- tools/testing/selftests/kvm/lib/arm64/processor.c | 84 ++++++------ tools/testing/selftests/kvm/lib/arm64/ucall.c | 4 +- tools/testing/selftests/kvm/lib/arm64/vgic.c | 18 +-- tools/testing/selftests/kvm/lib/elf.c | 2 +- tools/testing/selftests/kvm/lib/guest_sprintf.c | 10 +- tools/testing/selftests/kvm/lib/kvm_util.c | 88 ++++++------ .../selftests/kvm/lib/loongarch/processor.c | 46 +++---- tools/testing/selftests/kvm/lib/loongarch/ucall.c | 4 +- tools/testing/selftests/kvm/lib/memstress.c | 32 ++--- tools/testing/selftests/kvm/lib/riscv/processor.c | 32 ++--- .../selftests/kvm/lib/s390/diag318_test_handler.c | 12 +- tools/testing/selftests/kvm/lib/s390/facility.c | 2 +- tools/testing/selftests/kvm/lib/s390/processor.c | 22 +-- tools/testing/selftests/kvm/lib/sparsebit.c | 12 +- tools/testing/selftests/kvm/lib/test_util.c | 2 +- tools/testing/selftests/kvm/lib/ucall_common.c | 16 +-- tools/testing/selftests/kvm/lib/userfaultfd_util.c | 12 +- tools/testing/selftests/kvm/lib/x86/apic.c | 2 +- tools/testing/selftests/kvm/lib/x86/hyperv.c | 4 +- tools/testing/selftests/kvm/lib/x86/memstress.c | 12 +- tools/testing/selftests/kvm/lib/x86/pmu.c | 8 +- tools/testing/selftests/kvm/lib/x86/processor.c | 124 +++++++++-------- tools/testing/selftests/kvm/lib/x86/sev.c | 10 +- tools/testing/selftests/kvm/lib/x86/svm.c | 6 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 14 +- tools/testing/selftests/kvm/loongarch/arch_timer.c | 8 +- tools/testing/selftests/kvm/loongarch/pmu_test.c | 4 +- .../kvm/memslot_modification_stress_test.c | 10 +- tools/testing/selftests/kvm/memslot_perf_test.c | 114 ++++++++-------- tools/testing/selftests/kvm/mmu_stress_test.c | 26 ++-- .../testing/selftests/kvm/pre_fault_memory_test.c | 8 +- tools/testing/selftests/kvm/riscv/arch_timer.c | 2 +- tools/testing/selftests/kvm/riscv/ebreak_test.c | 6 +- tools/testing/selftests/kvm/riscv/get-reg-list.c | 4 +- tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 2 +- tools/testing/selftests/kvm/s390/debug_test.c | 8 +- tools/testing/selftests/kvm/s390/memop.c | 32 ++--- tools/testing/selftests/kvm/s390/resets.c | 4 +- tools/testing/selftests/kvm/s390/tprot.c | 2 +- .../testing/selftests/kvm/set_memory_region_test.c | 30 ++-- tools/testing/selftests/kvm/steal_time.c | 32 ++--- .../selftests/kvm/system_counter_offset_test.c | 12 +- tools/testing/selftests/kvm/x86/amx_test.c | 2 +- tools/testing/selftests/kvm/x86/aperfmperf_test.c | 12 +- .../selftests/kvm/x86/apic_bus_clock_test.c | 12 +- tools/testing/selftests/kvm/x86/debug_regs.c | 2 +- .../kvm/x86/dirty_log_page_splitting_test.c | 16 +-- .../selftests/kvm/x86/evmcs_smm_controls_test.c | 2 +- tools/testing/selftests/kvm/x86/fastops_test.c | 38 +++--- .../testing/selftests/kvm/x86/feature_msrs_test.c | 4 +- .../testing/selftests/kvm/x86/fix_hypercall_test.c | 10 +- tools/testing/selftests/kvm/x86/flds_emulation.h | 4 +- tools/testing/selftests/kvm/x86/hwcr_msr_test.c | 10 +- .../selftests/kvm/x86/hyperv_extended_hypercalls.c | 8 +- tools/testing/selftests/kvm/x86/hyperv_features.c | 6 +- tools/testing/selftests/kvm/x86/hyperv_ipi.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 8 +- tools/testing/selftests/kvm/x86/kvm_clock_test.c | 4 +- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 6 +- .../testing/selftests/kvm/x86/monitor_mwait_test.c | 2 +- .../selftests/kvm/x86/nested_set_state_test.c | 4 +- .../selftests/kvm/x86/nested_tsc_adjust_test.c | 2 +- .../selftests/kvm/x86/nested_tsc_scaling_test.c | 20 +-- .../testing/selftests/kvm/x86/nx_huge_pages_test.c | 18 +-- .../testing/selftests/kvm/x86/platform_info_test.c | 4 +- .../testing/selftests/kvm/x86/pmu_counters_test.c | 32 ++--- .../selftests/kvm/x86/pmu_event_filter_test.c | 78 +++++------ .../kvm/x86/private_mem_conversions_test.c | 50 +++---- .../selftests/kvm/x86/private_mem_kvm_exits_test.c | 8 +- tools/testing/selftests/kvm/x86/set_sregs_test.c | 6 +- tools/testing/selftests/kvm/x86/sev_init2_tests.c | 4 +- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 14 +- .../kvm/x86/smaller_maxphyaddr_emulation_test.c | 8 +- tools/testing/selftests/kvm/x86/smm_test.c | 4 +- tools/testing/selftests/kvm/x86/state_test.c | 8 +- .../kvm/x86/svm_nested_soft_inject_test.c | 4 +- tools/testing/selftests/kvm/x86/tsc_msrs_test.c | 2 +- tools/testing/selftests/kvm/x86/tsc_scaling_sync.c | 4 +- .../selftests/kvm/x86/ucna_injection_test.c | 43 +++--- .../selftests/kvm/x86/userspace_msr_exit_test.c | 24 ++-- tools/testing/selftests/kvm/x86/vmx_msrs_test.c | 18 +-- .../testing/selftests/kvm/x86/vmx_pmu_caps_test.c | 10 +- tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 38 +++--- tools/testing/selftests/kvm/x86/xapic_state_test.c | 16 +-- tools/testing/selftests/kvm/x86/xapic_tpr_test.c | 2 +- tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c | 8 +- tools/testing/selftests/kvm/x86/xen_shinfo_test.c | 12 +- tools/testing/selftests/kvm/x86/xss_msr_test.c | 2 +- 142 files changed, 1415 insertions(+), 1415 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index b058f27b2141..4479aed94625 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -101,15 +101,15 @@ struct test_params { enum vm_mem_backing_src_type backing_src; /* The amount of memory to allocate for each vCPU. */ - uint64_t vcpu_memory_bytes; + u64 vcpu_memory_bytes; /* The number of vCPUs to create in the VM. */ int nr_vcpus; }; -static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +static u64 pread_u64(int fd, const char *filename, u64 index) { - uint64_t value; + u64 value; off_t offset = index * sizeof(value); TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), @@ -123,13 +123,13 @@ static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) #define PAGEMAP_PRESENT (1ULL << 63) #define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) -static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +static u64 lookup_pfn(int pagemap_fd, struct kvm_vm *vm, u64 gva) { - uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); - uint64_t entry; - uint64_t pfn; + u64 hva = (u64)addr_gva2hva(vm, gva); + u64 entry; + u64 pfn; - entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + entry = pread_u64(pagemap_fd, "pagemap", hva / getpagesize()); if (!(entry & PAGEMAP_PRESENT)) return 0; @@ -139,16 +139,16 @@ static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) return pfn; } -static bool is_page_idle(int page_idle_fd, uint64_t pfn) +static bool is_page_idle(int page_idle_fd, u64 pfn) { - uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + u64 bits = pread_u64(page_idle_fd, "page_idle", pfn / 64); return !!((bits >> (pfn % 64)) & 1); } -static void mark_page_idle(int page_idle_fd, uint64_t pfn) +static void mark_page_idle(int page_idle_fd, u64 pfn) { - uint64_t bits = 1ULL << (pfn % 64); + u64 bits = 1ULL << (pfn % 64); TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, "Set page_idle bits for PFN 0x%" PRIx64, pfn); @@ -174,11 +174,11 @@ static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; - uint64_t base_gva = vcpu_args->gva; - uint64_t pages = vcpu_args->pages; - uint64_t page; - uint64_t still_idle = 0; - uint64_t no_pfn = 0; + u64 base_gva = vcpu_args->gva; + u64 pages = vcpu_args->pages; + u64 page; + u64 still_idle = 0; + u64 no_pfn = 0; int page_idle_fd; int pagemap_fd; @@ -193,8 +193,8 @@ static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - uint64_t gva = base_gva + page * memstress_args.guest_page_size; - uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + u64 gva = base_gva + page * memstress_args.guest_page_size; + u64 pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { no_pfn++; @@ -297,10 +297,10 @@ static void lru_gen_mark_memory_idle(struct kvm_vm *vm) lru_gen_last_gen = new_gen; } -static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall) +static void assert_ucall(struct kvm_vcpu *vcpu, u64 expected_ucall) { struct ucall uc; - uint64_t actual_ucall = get_ucall(vcpu, &uc); + u64 actual_ucall = get_ucall(vcpu, &uc); TEST_ASSERT(expected_ucall == actual_ucall, "Guest exited unexpectedly (expected ucall %" PRIu64 @@ -417,7 +417,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) */ test_pages = params->nr_vcpus * params->vcpu_memory_bytes / max(memstress_args.guest_page_size, - (uint64_t)getpagesize()); + (u64)getpagesize()); memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index 713005b6f508..8a019cbaf4c4 100644 --- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -66,7 +66,7 @@ static void test_guest_raz(struct kvm_vcpu *vcpu) } } -static uint64_t raz_wi_reg_ids[] = { +static u64 raz_wi_reg_ids[] = { KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1), KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1), KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1), @@ -94,8 +94,8 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) int i; for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) { - uint64_t reg_id = raz_wi_reg_ids[i]; - uint64_t val; + u64 reg_id = raz_wi_reg_ids[i]; + u64 val; val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); @@ -111,7 +111,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) } } -static uint64_t raz_invariant_reg_ids[] = { +static u64 raz_invariant_reg_ids[] = { KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1), KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)), KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1), @@ -123,8 +123,8 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) int i, r; for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) { - uint64_t reg_id = raz_invariant_reg_ids[i]; - uint64_t val; + u64 reg_id = raz_invariant_reg_ids[i]; + u64 val; val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); @@ -142,7 +142,7 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { - uint64_t val, el0; + u64 val, el0; val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index d592a4515399..3e5f32bd2352 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -56,7 +56,7 @@ static void guest_validate_irq(unsigned int intid, struct test_vcpu_shared_data *shared_data) { enum guest_stage stage = shared_data->guest_stage; - uint64_t xcnt = 0, xcnt_diff_us, cval = 0; + u64 xcnt = 0, xcnt_diff_us, cval = 0; unsigned long xctl = 0; unsigned int timer_irq = 0; unsigned int accessor; diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 993c9e38e729..7aed5dd2a347 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -23,7 +23,7 @@ #include "vgic.h" /* Depends on counter width. */ -static uint64_t CVAL_MAX; +static u64 CVAL_MAX; /* tval is a signed 32-bit int. */ static const int32_t TVAL_MAX = INT32_MAX; static const int32_t TVAL_MIN = INT32_MIN; @@ -32,7 +32,7 @@ static const int32_t TVAL_MIN = INT32_MIN; static const uint32_t TIMEOUT_NO_IRQ_US = 50000; /* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ -static uint64_t DEF_CNT; +static u64 DEF_CNT; /* Number of runs. */ static const uint32_t NR_TEST_ITERS_DEF = 5; @@ -53,9 +53,9 @@ struct test_args { /* Virtual or physical timer and counter tests. */ enum arch_timer timer; /* Delay used for most timer tests. */ - uint64_t wait_ms; + u64 wait_ms; /* Delay used in the test_long_timer_delays test. */ - uint64_t long_wait_ms; + u64 long_wait_ms; /* Number of iterations. */ int iterations; /* Whether to test the physical timer. */ @@ -82,12 +82,12 @@ enum sync_cmd { NO_USERSPACE_CMD, }; -typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); +typedef void (*sleep_method_t)(enum arch_timer timer, u64 usec); -static void sleep_poll(enum arch_timer timer, uint64_t usec); -static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); -static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); -static void sleep_migrate(enum arch_timer timer, uint64_t usec); +static void sleep_poll(enum arch_timer timer, u64 usec); +static void sleep_sched_poll(enum arch_timer timer, u64 usec); +static void sleep_in_userspace(enum arch_timer timer, u64 usec); +static void sleep_migrate(enum arch_timer timer, u64 usec); sleep_method_t sleep_method[] = { sleep_poll, @@ -122,7 +122,7 @@ static void assert_irqs_handled(uint32_t n) __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); } -static void userspace_cmd(uint64_t cmd) +static void userspace_cmd(u64 cmd) { GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); } @@ -132,12 +132,12 @@ static void userspace_migrate_vcpu(void) userspace_cmd(USERSPACE_MIGRATE_SELF); } -static void userspace_sleep(uint64_t usecs) +static void userspace_sleep(u64 usecs) { GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); } -static void set_counter(enum arch_timer timer, uint64_t counter) +static void set_counter(enum arch_timer timer, u64 counter) { GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); } @@ -146,7 +146,7 @@ static void guest_irq_handler(struct ex_regs *regs) { unsigned int intid = gic_get_and_ack_irq(); enum arch_timer timer; - uint64_t cnt, cval; + u64 cnt, cval; uint32_t ctl; bool timer_condition, istatus; @@ -178,7 +178,7 @@ out: gic_set_eoi(intid); } -static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, +static void set_cval_irq(enum arch_timer timer, u64 cval_cycles, uint32_t ctl) { atomic_set(&shared_data.handled, 0); @@ -187,7 +187,7 @@ static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, timer_set_ctl(timer, ctl); } -static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, +static void set_tval_irq(enum arch_timer timer, u64 tval_cycles, uint32_t ctl) { atomic_set(&shared_data.handled, 0); @@ -196,7 +196,7 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, timer_set_ctl(timer, ctl); } -static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, +static void set_xval_irq(enum arch_timer timer, u64 xval, uint32_t ctl, enum timer_view tv) { switch (tv) { @@ -275,13 +275,13 @@ static void wait_migrate_poll_for_irq(void) * Sleep for usec microseconds by polling in the guest or in * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). */ -static void guest_poll(enum arch_timer test_timer, uint64_t usec, +static void guest_poll(enum arch_timer test_timer, u64 usec, enum sync_cmd usp_cmd) { - uint64_t cycles = usec_to_cycles(usec); + u64 cycles = usec_to_cycles(usec); /* Whichever timer we are testing with, sleep with the other. */ enum arch_timer sleep_timer = 1 - test_timer; - uint64_t start = timer_get_cntct(sleep_timer); + u64 start = timer_get_cntct(sleep_timer); while ((timer_get_cntct(sleep_timer) - start) < cycles) { if (usp_cmd == NO_USERSPACE_CMD) @@ -291,22 +291,22 @@ static void guest_poll(enum arch_timer test_timer, uint64_t usec, } } -static void sleep_poll(enum arch_timer timer, uint64_t usec) +static void sleep_poll(enum arch_timer timer, u64 usec) { guest_poll(timer, usec, NO_USERSPACE_CMD); } -static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +static void sleep_sched_poll(enum arch_timer timer, u64 usec) { guest_poll(timer, usec, USERSPACE_SCHED_YIELD); } -static void sleep_migrate(enum arch_timer timer, uint64_t usec) +static void sleep_migrate(enum arch_timer timer, u64 usec) { guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); } -static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +static void sleep_in_userspace(enum arch_timer timer, u64 usec) { userspace_sleep(usec); } @@ -315,15 +315,15 @@ static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) * Reset the timer state to some nice values like the counter not being close * to the edge, and the control register masked and disabled. */ -static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +static void reset_timer_state(enum arch_timer timer, u64 cnt) { set_counter(timer, cnt); timer_set_ctl(timer, CTL_IMASK); } -static void test_timer_xval(enum arch_timer timer, uint64_t xval, +static void test_timer_xval(enum arch_timer timer, u64 xval, enum timer_view tv, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) + u64 reset_cnt) { local_irq_disable(); @@ -348,23 +348,23 @@ static void test_timer_xval(enum arch_timer timer, uint64_t xval, * the "runner", like: tools/testing/selftests/kselftest/runner.sh. */ -static void test_timer_cval(enum arch_timer timer, uint64_t cval, +static void test_timer_cval(enum arch_timer timer, u64 cval, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) + u64 reset_cnt) { test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); } static void test_timer_tval(enum arch_timer timer, int32_t tval, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) + u64 reset_cnt) { - test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + test_timer_xval(timer, (u64)tval, TIMER_TVAL, wm, reset_state, reset_cnt); } -static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, - uint64_t usec, enum timer_view timer_view, +static void test_xval_check_no_irq(enum arch_timer timer, u64 xval, + u64 usec, enum timer_view timer_view, sleep_method_t guest_sleep) { local_irq_disable(); @@ -379,17 +379,17 @@ static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, assert_irqs_handled(0); } -static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, - uint64_t usec, sleep_method_t wm) +static void test_cval_no_irq(enum arch_timer timer, u64 cval, + u64 usec, sleep_method_t wm) { test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); } -static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, u64 usec, sleep_method_t wm) { /* tval will be cast to an int32_t in test_xval_check_no_irq */ - test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); + test_xval_check_no_irq(timer, (u64)tval, usec, TIMER_TVAL, wm); } /* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ @@ -488,7 +488,7 @@ static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm static void test_reprogram_timers(enum arch_timer timer) { int i; - uint64_t base_wait = test_args.wait_ms; + u64 base_wait = test_args.wait_ms; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { /* @@ -505,7 +505,7 @@ static void test_reprogram_timers(enum arch_timer timer) static void test_basic_functionality(enum arch_timer timer) { int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); - uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + u64 cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); int i; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { @@ -593,7 +593,7 @@ static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t reset_timer_state(timer, DEF_CNT); set_cval_irq(timer, - (uint64_t) TVAL_MAX + + (u64)TVAL_MAX + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); set_counter(timer, TVAL_MAX); @@ -608,7 +608,7 @@ static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t /* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ static void test_timers_above_tval_max(enum arch_timer timer) { - uint64_t cval; + u64 cval; int i; /* @@ -638,8 +638,8 @@ static void test_timers_above_tval_max(enum arch_timer timer) * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and * then waits for an IRQ. */ -static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, - uint64_t xval, uint64_t cnt_2, +static void test_set_cnt_after_xval(enum arch_timer timer, u64 cnt_1, + u64 xval, u64 cnt_2, irq_wait_method_t wm, enum timer_view tv) { local_irq_disable(); @@ -662,8 +662,8 @@ static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, * then waits for an IRQ. */ static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, - uint64_t cnt_1, uint64_t xval, - uint64_t cnt_2, + u64 cnt_1, u64 xval, + u64 cnt_2, sleep_method_t guest_sleep, enum timer_view tv) { @@ -684,31 +684,31 @@ static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, timer_set_ctl(timer, CTL_IMASK); } -static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, - int32_t tval, uint64_t cnt_2, +static void test_set_cnt_after_tval(enum arch_timer timer, u64 cnt_1, + int32_t tval, u64 cnt_2, irq_wait_method_t wm) { test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); } -static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, - uint64_t cval, uint64_t cnt_2, +static void test_set_cnt_after_cval(enum arch_timer timer, u64 cnt_1, + u64 cval, u64 cnt_2, irq_wait_method_t wm) { test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); } static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, - uint64_t cnt_1, int32_t tval, - uint64_t cnt_2, sleep_method_t wm) + u64 cnt_1, int32_t tval, + u64 cnt_2, sleep_method_t wm) { test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); } static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, - uint64_t cnt_1, uint64_t cval, - uint64_t cnt_2, sleep_method_t wm) + u64 cnt_1, u64 cval, + u64 cnt_2, sleep_method_t wm) { test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); @@ -730,8 +730,7 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer) test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); tval = TVAL_MAX; - test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, - wm); + test_set_cnt_after_tval(timer, 0, tval, (u64)tval + 1, wm); } } @@ -755,7 +754,7 @@ static void test_move_counters_behind_timers(enum arch_timer timer) static void test_timers_in_the_past(enum arch_timer timer) { int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); - uint64_t cval; + u64 cval; int i; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { @@ -791,7 +790,7 @@ static void test_timers_in_the_past(enum arch_timer timer) static void test_long_timer_delays(enum arch_timer timer) { int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); - uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + u64 cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); int i; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { @@ -862,7 +861,7 @@ static uint32_t next_pcpu(void) return next; } -static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, u64 cnt, enum arch_timer timer) { if (timer == PHYSICAL) @@ -874,7 +873,7 @@ static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) { enum sync_cmd cmd = uc->args[1]; - uint64_t val = uc->args[2]; + u64 val = uc->args[2]; enum arch_timer timer = uc->args[3]; switch (cmd) { @@ -1018,8 +1017,8 @@ static bool parse_args(int argc, char *argv[]) static void set_counter_defaults(void) { - const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; - uint64_t freq = read_sysreg(CNTFRQ_EL0); + const u64 MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + u64 freq = read_sysreg(CNTFRQ_EL0); int width = ilog2(MIN_ROLLOVER_SECS * freq); width = clamp(width, 56, 64); diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index 1d431de8729c..c19f143bed25 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -31,14 +31,14 @@ extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; extern unsigned char iter_ss_begin, iter_ss_end; -static volatile uint64_t sw_bp_addr, hw_bp_addr; -static volatile uint64_t wp_addr, wp_data_addr; -static volatile uint64_t svc_addr; -static volatile uint64_t ss_addr[4], ss_idx; -#define PC(v) ((uint64_t)&(v)) +static volatile u64 sw_bp_addr, hw_bp_addr; +static volatile u64 wp_addr, wp_data_addr; +static volatile u64 svc_addr; +static volatile u64 ss_addr[4], ss_idx; +#define PC(v) ((u64)&(v)) #define GEN_DEBUG_WRITE_REG(reg_name) \ -static void write_##reg_name(int num, uint64_t val) \ +static void write_##reg_name(int num, u64 val) \ { \ switch (num) { \ case 0: \ @@ -103,7 +103,7 @@ GEN_DEBUG_WRITE_REG(dbgwvr) static void reset_debug_state(void) { uint8_t brps, wrps, i; - uint64_t dfr0; + u64 dfr0; asm volatile("msr daifset, #8"); @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint64_t mdscr; + u64 mdscr; asm volatile("msr daifclr, #8"); @@ -149,7 +149,7 @@ static void enable_monitor_debug_exceptions(void) isb(); } -static void install_wp(uint8_t wpn, uint64_t addr) +static void install_wp(uint8_t wpn, u64 addr) { uint32_t wcr; @@ -162,7 +162,7 @@ static void install_wp(uint8_t wpn, uint64_t addr) enable_monitor_debug_exceptions(); } -static void install_hw_bp(uint8_t bpn, uint64_t addr) +static void install_hw_bp(uint8_t bpn, u64 addr) { uint32_t bcr; @@ -174,11 +174,11 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr) enable_monitor_debug_exceptions(); } -static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, - uint64_t ctx) +static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, u64 addr, + u64 ctx) { uint32_t wcr; - uint64_t ctx_bcr; + u64 ctx_bcr; /* Setup a context-aware breakpoint for Linked Context ID Match */ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | @@ -196,8 +196,8 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, enable_monitor_debug_exceptions(); } -void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, - uint64_t ctx) +void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, u64 addr, + u64 ctx) { uint32_t addr_bcr, ctx_bcr; @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint64_t mdscr; + u64 mdscr; asm volatile("msr daifclr, #8"); @@ -236,7 +236,7 @@ static volatile char write_data; static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) { - uint64_t ctx = 0xabcdef; /* a random context number */ + u64 ctx = 0xabcdef; /* a random context number */ /* Software-breakpoint */ reset_debug_state(); @@ -377,8 +377,8 @@ static void guest_svc_handler(struct ex_regs *regs) static void guest_code_ss(int test_cnt) { - uint64_t i; - uint64_t bvr, wvr, w_bvr, w_wvr; + u64 i; + u64 bvr, wvr, w_bvr, w_wvr; for (i = 0; i < test_cnt; i++) { /* Bits [1:0] of dbg{b,w}vr are RES0 */ @@ -416,7 +416,7 @@ static void guest_code_ss(int test_cnt) GUEST_DONE(); } -static int debug_version(uint64_t id_aa64dfr0) +static int debug_version(u64 id_aa64dfr0) { return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } @@ -468,8 +468,8 @@ void test_single_step_from_userspace(int test_cnt) struct kvm_vm *vm; struct ucall uc; struct kvm_run *run; - uint64_t pc, cmd; - uint64_t test_pc = 0; + u64 pc, cmd; + u64 test_pc = 0; bool ss_enable = false; struct kvm_guest_debug debug = {}; @@ -506,7 +506,7 @@ void test_single_step_from_userspace(int test_cnt) "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); - if ((pc + 4) == (uint64_t)&iter_ss_end) { + if ((pc + 4) == (u64)&iter_ss_end) { test_pc = 0; debug.control = KVM_GUESTDBG_ENABLE; ss_enable = false; @@ -519,8 +519,8 @@ void test_single_step_from_userspace(int test_cnt) * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should * be the current pc + 4. */ - if ((pc >= (uint64_t)&iter_ss_begin) && - (pc < (uint64_t)&iter_ss_end)) + if ((pc >= (u64)&iter_ss_begin) && + (pc < (u64)&iter_ss_end)) test_pc = pc + 4; else test_pc = 0; @@ -533,7 +533,7 @@ void test_single_step_from_userspace(int test_cnt) * Run debug testing using the various breakpoint#, watchpoint# and * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. */ -void test_guest_debug_exceptions_all(uint64_t aa64dfr0) +void test_guest_debug_exceptions_all(u64 aa64dfr0) { uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; int b, w, c; @@ -580,7 +580,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; int opt; int ss_iteration = 10000; - uint64_t aa64dfr0; + u64 aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index bf038a0371f4..4f24105baaf3 100644 --- a/tools/testing/selftests/kvm/arm64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -29,9 +29,9 @@ #define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0 struct kvm_fw_reg_info { - uint64_t reg; /* Register definition */ - uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ - uint64_t reset_val; /* Reset value for the register */ + u64 reg; /* Register definition */ + u64 max_feat_bit; /* Bit that represents the upper limit of the feature-map */ + u64 reset_val; /* Reset value for the register */ }; #define FW_REG_INFO(r) \ @@ -60,7 +60,7 @@ static int stage = TEST_STAGE_REG_IFACE; struct test_hvc_info { uint32_t func_id; - uint64_t arg1; + u64 arg1; }; #define TEST_HVC_INFO(f, a1) \ @@ -154,7 +154,7 @@ static void guest_code(void) struct st_time { uint32_t rev; uint32_t attr; - uint64_t st_time; + u64 st_time; }; #define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) @@ -162,7 +162,7 @@ struct st_time { static void steal_time_init(struct kvm_vcpu *vcpu) { - uint64_t st_ipa = (ulong)ST_GPA_BASE; + u64 st_ipa = (ulong)ST_GPA_BASE; unsigned int gpages; gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE); @@ -174,13 +174,13 @@ static void steal_time_init(struct kvm_vcpu *vcpu) static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) { - uint64_t val; + u64 val; unsigned int i; int ret; for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; - uint64_t set_val; + u64 set_val; /* First 'read' should be the reset value for the reg */ val = vcpu_get_reg(vcpu, reg_info->reg); @@ -229,7 +229,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) { - uint64_t val; + u64 val; unsigned int i; int ret; diff --git a/tools/testing/selftests/kvm/arm64/idreg-idst.c b/tools/testing/selftests/kvm/arm64/idreg-idst.c index 9ca9f125abdb..a3e84701d814 100644 --- a/tools/testing/selftests/kvm/arm64/idreg-idst.c +++ b/tools/testing/selftests/kvm/arm64/idreg-idst.c @@ -13,7 +13,7 @@ static volatile bool sys64, undef; #define __check_sr_read(r) \ ({ \ - uint64_t val; \ + u64 val; \ \ sys64 = false; \ undef = false; \ @@ -101,7 +101,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t mmfr2; + u64 mmfr2; test_disable_default_vgic(); diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c index b14686ef17d1..25b2e3222f68 100644 --- a/tools/testing/selftests/kvm/arm64/no-vgic.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic.c @@ -15,7 +15,7 @@ static volatile bool handled; #define __check_sr_read(r) \ ({ \ - uint64_t val; \ + u64 val; \ \ handled = false; \ dsb(sy); \ @@ -33,7 +33,7 @@ static volatile bool handled; #define __check_gicv5_gicr_op(r) \ ({ \ - uint64_t val; \ + u64 val; \ \ handled = false; \ dsb(sy); \ @@ -82,7 +82,7 @@ static volatile bool handled; static void guest_code_gicv3(void) { - uint64_t val; + u64 val; /* * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having @@ -262,7 +262,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool has_v3, has_v5; - uint64_t pfr; + u64 pfr; test_disable_default_vgic(); diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index 4ccbd389d133..5d629ea95c4d 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -23,7 +23,7 @@ #define TEST_PTE_GVA 0xb0000000 #define TEST_DATA 0x0123456789ABCDEF -static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; +static u64 *guest_test_memory = (u64 *)TEST_GVA; #define CMD_NONE (0) #define CMD_SKIP_TEST (1ULL << 1) @@ -48,7 +48,7 @@ static struct event_cnt { struct test_desc { const char *name; - uint64_t mem_mark_cmd; + u64 mem_mark_cmd; /* Skip the test if any prepare function returns false */ bool (*guest_prepare[PREPARE_FN_NR])(void); void (*guest_test)(void); @@ -70,9 +70,9 @@ struct test_params { struct test_desc *test_desc; }; -static inline void flush_tlb_page(uint64_t vaddr) +static inline void flush_tlb_page(u64 vaddr) { - uint64_t page = vaddr >> 12; + u64 page = vaddr >> 12; dsb(ishst); asm volatile("tlbi vaae1is, %0" :: "r" (page)); @@ -82,7 +82,7 @@ static inline void flush_tlb_page(uint64_t vaddr) static void guest_write64(void) { - uint64_t val; + u64 val; WRITE_ONCE(*guest_test_memory, TEST_DATA); val = READ_ONCE(*guest_test_memory); @@ -92,8 +92,8 @@ static void guest_write64(void) /* Check the system for atomic instructions. */ static bool guest_check_lse(void) { - uint64_t isar0 = read_sysreg(id_aa64isar0_el1); - uint64_t atomic; + u64 isar0 = read_sysreg(id_aa64isar0_el1); + u64 atomic; atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; @@ -101,8 +101,8 @@ static bool guest_check_lse(void) static bool guest_check_dc_zva(void) { - uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid); + u64 dczid = read_sysreg(dczid_el0); + u64 dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -110,7 +110,7 @@ static bool guest_check_dc_zva(void) /* Compare and swap instruction. */ static void guest_cas(void) { - uint64_t val; + u64 val; GUEST_ASSERT(guest_check_lse()); asm volatile(".arch_extension lse\n" @@ -122,7 +122,7 @@ static void guest_cas(void) static void guest_read64(void) { - uint64_t val; + u64 val; val = READ_ONCE(*guest_test_memory); GUEST_ASSERT_EQ(val, 0); @@ -131,7 +131,7 @@ static void guest_read64(void) /* Address translation instruction */ static void guest_at(void) { - uint64_t par; + u64 par; asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); isb(); @@ -164,8 +164,8 @@ static void guest_dc_zva(void) */ static void guest_ld_preidx(void) { - uint64_t val; - uint64_t addr = TEST_GVA - 8; + u64 val; + u64 addr = TEST_GVA - 8; /* * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is @@ -179,8 +179,8 @@ static void guest_ld_preidx(void) static void guest_st_preidx(void) { - uint64_t val = TEST_DATA; - uint64_t addr = TEST_GVA - 8; + u64 val = TEST_DATA; + u64 addr = TEST_GVA - 8; asm volatile("str %0, [%1, #8]!" : "+r" (val), "+r" (addr)); @@ -191,8 +191,8 @@ static void guest_st_preidx(void) static bool guest_set_ha(void) { - uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); - uint64_t hadbs, tcr; + u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1); + u64 hadbs, tcr; /* Skip if HA is not supported. */ hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); @@ -208,7 +208,7 @@ static bool guest_set_ha(void) static bool guest_clear_pte_af(void) { - *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + *((u64 *)TEST_PTE_GVA) &= ~PTE_AF; flush_tlb_page(TEST_GVA); return true; @@ -217,7 +217,7 @@ static bool guest_clear_pte_af(void) static void guest_check_pte_af(void) { dsb(ish); - GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); + GUEST_ASSERT_EQ(*((u64 *)TEST_PTE_GVA) & PTE_AF, PTE_AF); } static void guest_check_write_in_dirty_log(void) @@ -302,26 +302,26 @@ static void no_iabt_handler(struct ex_regs *regs) static struct uffd_args { char *copy; void *hva; - uint64_t paging_size; + u64 paging_size; } pt_args, data_args; /* Returns true to continue the test, and false if it should be skipped. */ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, struct uffd_args *args) { - uint64_t addr = msg->arg.pagefault.address; - uint64_t flags = msg->arg.pagefault.flags; + u64 addr = msg->arg.pagefault.address; + u64 flags = msg->arg.pagefault.flags; struct uffdio_copy copy; int ret; TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING"); - TEST_ASSERT_EQ(addr, (uint64_t)args->hva); + TEST_ASSERT_EQ(addr, (u64)args->hva); pr_debug("uffd fault: addr=%p write=%d\n", (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); - copy.src = (uint64_t)args->copy; + copy.src = (u64)args->copy; copy.dst = addr; copy.len = args->paging_size; copy.mode = 0; @@ -407,7 +407,7 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, struct userspace_mem_region *region) { void *hva = (void *)region->region.userspace_addr; - uint64_t paging_size = region->region.memory_size; + u64 paging_size = region->region.memory_size; int ret, fd = region->fd; if (fd != -1) { @@ -438,7 +438,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) { - uint64_t data; + u64 data; memcpy(&data, run->mmio.data, sizeof(data)); pr_debug("addr=%lld len=%d w=%d data=%lx\n", @@ -449,11 +449,11 @@ static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) static bool check_write_in_dirty_log(struct kvm_vm *vm, struct userspace_mem_region *region, - uint64_t host_pg_nr) + u64 host_pg_nr) { unsigned long *bmap; bool first_page_dirty; - uint64_t size = region->region.memory_size; + u64 size = region->region.memory_size; /* getpage_size() is not always equal to vm->page_size */ bmap = bitmap_zalloc(size / getpagesize()); @@ -468,7 +468,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) { struct userspace_mem_region *data_region, *pt_region; bool continue_test = true; - uint64_t pte_gpa, pte_pg; + u64 pte_gpa, pte_pg; data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); pt_region = vm_get_mem_region(vm, MEM_REGION_PT); @@ -525,7 +525,7 @@ noinline void __return_0x77(void) */ static void load_exec_code_for_test(struct kvm_vm *vm) { - uint64_t *code; + u64 *code; struct userspace_mem_region *region; void *hva; @@ -552,7 +552,7 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, static void setup_gva_maps(struct kvm_vm *vm) { struct userspace_mem_region *region; - uint64_t pte_gpa; + u64 pte_gpa; region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); /* Map TEST_GVA first. This will install a new PTE. */ @@ -574,12 +574,12 @@ enum pf_test_memslots { */ static void setup_memslots(struct kvm_vm *vm, struct test_params *p) { - uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); - uint64_t guest_page_size = vm->page_size; - uint64_t max_gfn = vm_compute_max_gfn(vm); + u64 backing_src_pagesz = get_backing_src_pagesz(p->src_type); + u64 guest_page_size = vm->page_size; + u64 max_gfn = vm_compute_max_gfn(vm); /* Enough for 2M of code when using 4K guest pages. */ - uint64_t code_npages = 512; - uint64_t pt_size, data_size, data_gpa; + u64 code_npages = 512; + u64 pt_size, data_size, data_gpa; /* * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index 98e49f710aef..0a67dc3136d4 100644 --- a/tools/testing/selftests/kvm/arm64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -22,8 +22,7 @@ #define CPU_ON_ENTRY_ADDR 0xfeedf00dul #define CPU_ON_CONTEXT_ID 0xdeadc0deul -static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, - uint64_t context_id) +static u64 psci_cpu_on(u64 target_cpu, u64 entry_addr, u64 context_id) { struct arm_smccc_res res; @@ -33,8 +32,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, return res.a0; } -static uint64_t psci_affinity_info(uint64_t target_affinity, - uint64_t lowest_affinity_level) +static u64 psci_affinity_info(u64 target_affinity, u64 lowest_affinity_level) { struct arm_smccc_res res; @@ -44,7 +42,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity, return res.a0; } -static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) +static u64 psci_system_suspend(u64 entry_addr, u64 context_id) { struct arm_smccc_res res; @@ -54,7 +52,7 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) return res.a0; } -static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +static u64 psci_system_off2(u64 type, u64 cookie) { struct arm_smccc_res res; @@ -63,7 +61,7 @@ static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) return res.a0; } -static uint64_t psci_features(uint32_t func_id) +static u64 psci_features(uint32_t func_id) { struct arm_smccc_res res; @@ -110,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { - uint64_t obs_pc, obs_x0; + u64 obs_pc, obs_x0; obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); @@ -123,9 +121,9 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) obs_x0, CPU_ON_CONTEXT_ID); } -static void guest_test_cpu_on(uint64_t target_cpu) +static void guest_test_cpu_on(u64 target_cpu) { - uint64_t target_state; + u64 target_state; GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); @@ -142,7 +140,7 @@ static void guest_test_cpu_on(uint64_t target_cpu) static void host_test_cpu_on(void) { struct kvm_vcpu *source, *target; - uint64_t target_mpidr; + u64 target_mpidr; struct kvm_vm *vm; struct ucall uc; @@ -166,7 +164,7 @@ static void host_test_cpu_on(void) static void guest_test_system_suspend(void) { - uint64_t ret; + u64 ret; /* assert that SYSTEM_SUSPEND is discoverable */ GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND)); @@ -200,7 +198,7 @@ static void host_test_system_suspend(void) static void guest_test_system_off2(void) { - uint64_t ret; + u64 ret; /* assert that SYSTEM_OFF2 is discoverable */ GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & @@ -238,7 +236,7 @@ static void host_test_system_off2(void) { struct kvm_vcpu *source, *target; struct kvm_mp_state mps; - uint64_t psci_version = 0; + u64 psci_version = 0; int nr_shutdowns = 0; struct kvm_run *run; struct ucall uc; diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c index f41987dc726a..61954f2221e4 100644 --- a/tools/testing/selftests/kvm/arm64/sea_to_user.c +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -53,16 +53,16 @@ static gpa_t einj_gpa; static void *einj_hva; -static uint64_t einj_hpa; +static u64 einj_hpa; static bool far_invalid; -static uint64_t translate_to_host_paddr(unsigned long vaddr) +static u64 translate_to_host_paddr(unsigned long vaddr) { - uint64_t pinfo; + u64 pinfo; int64_t offset = vaddr / getpagesize() * sizeof(pinfo); int fd; - uint64_t page_addr; - uint64_t paddr; + u64 page_addr; + u64 paddr; fd = open("/proc/self/pagemap", O_RDONLY); if (fd < 0) @@ -82,7 +82,7 @@ static uint64_t translate_to_host_paddr(unsigned long vaddr) return paddr; } -static void write_einj_entry(const char *einj_path, uint64_t val) +static void write_einj_entry(const char *einj_path, u64 val) { char cmd[256] = {0}; FILE *cmdfile = NULL; @@ -96,7 +96,7 @@ static void write_einj_entry(const char *einj_path, uint64_t val) ksft_exit_fail_perror("Failed to write EINJ entry"); } -static void inject_uer(uint64_t paddr) +static void inject_uer(u64 paddr) { if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) ksft_test_result_skip("EINJ table no available in firmware"); @@ -145,10 +145,10 @@ static void setup_sigbus_handler(void) static void guest_code(void) { - uint64_t guest_data; + u64 guest_data; /* Consumes error will cause a SEA. */ - guest_data = *(uint64_t *)EINJ_GVA; + guest_data = *(u64 *)EINJ_GVA; GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n", EINJ_GVA, guest_data); @@ -253,7 +253,7 @@ static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) size_t backing_page_size; size_t guest_page_size; size_t alignment; - uint64_t num_guest_pages; + u64 num_guest_pages; gpa_t start_gpa; enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB; struct kvm_vm *vm; @@ -292,14 +292,14 @@ static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) static void vm_inject_memory_uer(struct kvm_vm *vm) { - uint64_t guest_data; + u64 guest_data; einj_gpa = addr_gva2gpa(vm, EINJ_GVA); einj_hva = addr_gva2hva(vm, EINJ_GVA); /* Populate certain data before injecting UER. */ - *(uint64_t *)einj_hva = 0xBAADCAFE; - guest_data = *(uint64_t *)einj_hva; + *(u64 *)einj_hva = 0xBAADCAFE; + guest_data = *(u64 *)einj_hva; ksft_print_msg("Before EINJect: data=%#lx\n", guest_data); diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 7899d557c70b..9b9c04c963a1 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -31,7 +31,7 @@ struct reg_ftr_bits { bool sign; enum ftr_type type; uint8_t shift; - uint64_t mask; + u64 mask; /* * For FTR_EXACT, safe_val is used as the exact safe value. * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value. @@ -274,9 +274,9 @@ static void guest_code(void) } /* Return a safe value to a given ftr_bits an ftr value */ -uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +u64 get_safe_value(const struct reg_ftr_bits *ftr_bits, u64 ftr) { - uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + u64 ftr_max = ftr_bits->mask >> ftr_bits->shift; TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); @@ -328,16 +328,16 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) } /* Return an invalid value to a given ftr_bits an ftr value */ -uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +u64 get_invalid_value(const struct reg_ftr_bits *ftr_bits, u64 ftr) { - uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + u64 ftr_max = ftr_bits->mask >> ftr_bits->shift; TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { case FTR_EXACT: - ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + ftr = max((u64)ftr_bits->safe_val + 1, ftr + 1); break; case FTR_LOWER_SAFE: ftr++; @@ -357,7 +357,7 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) } else if (ftr != ftr_max) { switch (ftr_bits->type) { case FTR_EXACT: - ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + ftr = max((u64)ftr_bits->safe_val + 1, ftr + 1); break; case FTR_LOWER_SAFE: ftr++; @@ -381,12 +381,12 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) return ftr; } -static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, - const struct reg_ftr_bits *ftr_bits) +static u64 test_reg_set_success(struct kvm_vcpu *vcpu, u64 reg, + const struct reg_ftr_bits *ftr_bits) { uint8_t shift = ftr_bits->shift; - uint64_t mask = ftr_bits->mask; - uint64_t val, new_val, ftr; + u64 mask = ftr_bits->mask; + u64 val, new_val, ftr; val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; @@ -404,12 +404,12 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, return new_val; } -static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, +static void test_reg_set_fail(struct kvm_vcpu *vcpu, u64 reg, const struct reg_ftr_bits *ftr_bits) { uint8_t shift = ftr_bits->shift; - uint64_t mask = ftr_bits->mask; - uint64_t val, old_val, ftr; + u64 mask = ftr_bits->mask; + u64 val, old_val, ftr; int r; val = vcpu_get_reg(vcpu, reg); @@ -430,7 +430,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT_EQ(val, old_val); } -static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; +static u64 test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; #define encoding_to_range_idx(encoding) \ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ @@ -440,7 +440,7 @@ static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) { - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { .addr = (__u64)masks, }; @@ -458,7 +458,7 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; uint32_t reg_id = test_regs[i].reg; - uint64_t reg = KVM_ARM64_SYS_REG(reg_id); + u64 reg = KVM_ARM64_SYS_REG(reg_id); int idx; /* Get the index to masks array for the idreg */ @@ -488,11 +488,11 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) #define MPAM_IDREG_TEST 6 static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) { - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { .addr = (__u64)masks, }; - uint64_t val; + u64 val; int idx, err; /* @@ -583,13 +583,13 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) #define MTE_IDREG_TEST 1 static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) { - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { .addr = (__u64)masks, }; - uint64_t val; - uint64_t mte; - uint64_t mte_frac; + u64 val; + u64 mte; + u64 mte_frac; int idx, err; val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); @@ -643,7 +643,7 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); } -static uint64_t reset_mutable_bits(uint32_t id, uint64_t val) +static u64 reset_mutable_bits(uint32_t id, u64 val) { struct test_feature_reg *reg = NULL; @@ -673,7 +673,7 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) struct ucall uc; while (!done) { - uint64_t val; + u64 val; vcpu_run(vcpu); @@ -706,7 +706,7 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) static void test_clidr(struct kvm_vcpu *vcpu) { - uint64_t clidr; + u64 clidr; int level; clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); @@ -774,7 +774,7 @@ static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu) static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) { size_t idx = encoding_to_range_idx(encoding); - uint64_t observed; + u64 observed; observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); TEST_ASSERT_EQ(reset_mutable_bits(encoding, test_reg_vals[idx]), @@ -807,7 +807,7 @@ int main(void) struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool aarch64_only; - uint64_t val, el0; + u64 val, el0; int test_cnt, i, j; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index 8d6d3a4ae4db..0d8ddb371b89 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -19,7 +19,7 @@ #define NR_VCPUS 4 -#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) +#define REG_OFFSET(vcpu, offset) (((u64)vcpu << 32) | offset) #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -30,7 +30,7 @@ struct vm_gic { uint32_t gic_dev_type; }; -static uint64_t max_phys_size; +static u64 max_phys_size; /* * Helpers to access a redistributor register and verify the ioctl() failed or @@ -103,9 +103,9 @@ static void vm_gic_destroy(struct vm_gic *v) } struct vgic_region_attr { - uint64_t attr; - uint64_t size; - uint64_t alignment; + u64 attr; + u64 size; + u64 alignment; }; struct vgic_region_attr gic_v3_dist_region = { @@ -143,7 +143,7 @@ struct vgic_region_attr gic_v2_cpu_region = { static void subtest_dist_rdist(struct vm_gic *v) { int ret; - uint64_t addr; + u64 addr; struct vgic_region_attr rdist; /* CPU interface in GICv2*/ struct vgic_region_attr dist; @@ -223,7 +223,7 @@ static void subtest_dist_rdist(struct vm_gic *v) /* Test the new REDIST region API */ static void subtest_v3_redist_regions(struct vm_gic *v) { - uint64_t addr, expected_addr; + u64 addr, expected_addr; int ret; ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, @@ -408,7 +408,7 @@ static void test_v3_new_redist_regions(void) struct kvm_vcpu *vcpus[NR_VCPUS]; void *dummy = NULL; struct vm_gic v; - uint64_t addr; + u64 addr; int ret; v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); @@ -460,7 +460,7 @@ static void test_v3_new_redist_regions(void) static void test_v3_typer_accesses(void) { struct vm_gic v; - uint64_t addr; + u64 addr; int ret, i; v.vm = vm_create(NR_VCPUS); @@ -546,7 +546,7 @@ static void test_v3_last_bit_redist_regions(void) { uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; - uint64_t addr; + u64 addr; v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); @@ -580,7 +580,7 @@ static void test_v3_last_bit_single_rdist(void) { uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; - uint64_t addr; + u64 addr; v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); @@ -606,7 +606,7 @@ static void test_v3_redist_ipa_range_check_at_vcpu_run(void) struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; int ret, i; - uint64_t addr; + u64 addr; v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus); @@ -638,7 +638,7 @@ static void test_v3_its_region(void) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; - uint64_t addr; + u64 addr; int its_fd, ret; v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index da87d049d246..eae4aeb44926 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -133,7 +133,7 @@ static struct kvm_inject_desc set_active_fns[] = { for_each_supported_inject_fn((args), (t), (f)) /* Shared between the guest main thread and the IRQ handlers. */ -volatile uint64_t irq_handled; +volatile u64 irq_handled; volatile uint32_t irqnr_received[MAX_SPI + 1]; static void reset_stats(void) @@ -145,15 +145,15 @@ static void reset_stats(void) irqnr_received[i] = 0; } -static uint64_t gic_read_ap1r0(void) +static u64 gic_read_ap1r0(void) { - uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); + u64 reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); dsb(sy); return reg; } -static void gic_write_ap1r0(uint64_t val) +static void gic_write_ap1r0(u64 val) { write_sysreg_s(val, SYS_ICC_AP1R0_EL1); isb(); @@ -578,12 +578,12 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, { struct kvm_irq_routing *routing; int ret; - uint64_t i; + u64 i; assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES); routing = kvm_gsi_routing_create(); - for (i = intid; i < (uint64_t)intid + num; i++) + for (i = intid; i < (u64)intid + num; i++) kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI); if (!expect_failure) { @@ -591,7 +591,7 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, } else { ret = _kvm_gsi_routing_write(vm, routing); /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ - if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) + if (((u64)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) TEST_ASSERT(ret != 0 && errno == EINVAL, "Bad intid %u did not cause KVM_SET_GSI_ROUTING " "error: rc: %i errno: %i", intid, ret, errno); @@ -622,9 +622,9 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, bool expect_failure) { int fd[MAX_SPI]; - uint64_t val; + u64 val; int ret, f; - uint64_t i; + u64 i; /* * There is no way to try injecting an SGI or PPI as the interface @@ -643,29 +643,29 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) fd[f] = kvm_new_eventfd(); - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - assert(i <= (uint64_t)UINT_MAX); + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) { + assert(i <= (u64)UINT_MAX); kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) { val = 1; - ret = write(fd[f], &val, sizeof(uint64_t)); - TEST_ASSERT(ret == sizeof(uint64_t), + ret = write(fd[f], &val, sizeof(u64)); + TEST_ASSERT(ret == sizeof(u64), __KVM_SYSCALL_ERROR("write()", ret)); } - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) kvm_close(fd[f]); } /* handles the valid case: intid=0xffffffff num=1 */ #define for_each_intid(first, num, tmp, i) \ for ((tmp) = (i) = (first); \ - (tmp) < (uint64_t)(first) + (uint64_t)(num); \ + (tmp) < (u64)(first) + (u64)(num); \ (tmp)++, (i)++) static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, @@ -678,7 +678,7 @@ static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, int level = inject_args->level; bool expect_failure = inject_args->expect_failure; struct kvm_vm *vm = vcpu->vm; - uint64_t tmp; + u64 tmp; uint32_t i; /* handles the valid case: intid=0xffffffff num=1 */ diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c index 3ce6cf37a629..80be71704450 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_v5.c +++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c @@ -20,7 +20,7 @@ struct vm_gic { uint32_t gic_dev_type; }; -static uint64_t max_phys_size; +static u64 max_phys_size; #define GUEST_CMD_IRQ_CDIA 10 #define GUEST_CMD_IRQ_DIEOI 11 diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index ae36325c022f..4ceab0760447 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -33,20 +33,20 @@ struct vpmu_vm { static struct vpmu_vm vpmu_vm; struct pmreg_sets { - uint64_t set_reg_id; - uint64_t clr_reg_id; + u64 set_reg_id; + u64 clr_reg_id; }; #define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr} -static uint64_t get_pmcr_n(uint64_t pmcr) +static u64 get_pmcr_n(u64 pmcr) { return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } -static uint64_t get_counters_mask(uint64_t n) +static u64 get_counters_mask(u64 n) { - uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); + u64 mask = BIT(ARMV8_PMU_CYCLE_IDX); if (n) mask |= GENMASK(n - 1, 0); @@ -89,7 +89,7 @@ static inline void write_sel_evtyper(int sel, unsigned long val) static void pmu_disable_reset(void) { - uint64_t pmcr = read_sysreg(pmcr_el0); + u64 pmcr = read_sysreg(pmcr_el0); /* Reset all counters, disabling them */ pmcr &= ~ARMV8_PMU_PMCR_E; @@ -169,7 +169,7 @@ struct pmc_accessor pmc_accessors[] = { #define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \ { \ - uint64_t _tval = read_sysreg(regname); \ + u64 _tval = read_sysreg(regname); \ \ if (set_expected) \ __GUEST_ASSERT((_tval & mask), \ @@ -185,7 +185,7 @@ struct pmc_accessor pmc_accessors[] = { * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers * are set or cleared as specified in @set_expected. */ -static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) +static void check_bitmap_pmu_regs(u64 mask, bool set_expected) { GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected); GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected); @@ -207,7 +207,7 @@ static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) */ static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) { - uint64_t pmcr_n, test_bit = BIT(pmc_idx); + u64 pmcr_n, test_bit = BIT(pmc_idx); bool set_expected = false; if (set_op) { @@ -232,7 +232,7 @@ static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) */ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) { - uint64_t write_data, read_data; + u64 write_data, read_data; /* Disable all PMCs and reset all PMCs to zero. */ pmu_disable_reset(); @@ -287,11 +287,11 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) } #define INVALID_EC (-1ul) -uint64_t expected_ec = INVALID_EC; +u64 expected_ec = INVALID_EC; static void guest_sync_handler(struct ex_regs *regs) { - uint64_t esr, ec; + u64 esr, ec; esr = read_sysreg(esr_el1); ec = ESR_ELx_EC(esr); @@ -351,9 +351,9 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) * if reading/writing PMU registers for implemented or unimplemented * counters works as expected. */ -static void guest_code(uint64_t expected_pmcr_n) +static void guest_code(u64 expected_pmcr_n) { - uint64_t pmcr, pmcr_n, unimp_mask; + u64 pmcr, pmcr_n, unimp_mask; int i, pmc; __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS, @@ -403,11 +403,11 @@ static void create_vpmu_vm(void *guest_code) { struct kvm_vcpu_init init; uint8_t pmuver, ec; - uint64_t dfr0, irq = 23; + u64 dfr0, irq = 23; struct kvm_device_attr irq_attr = { .group = KVM_ARM_VCPU_PMU_V3_CTRL, .attr = KVM_ARM_VCPU_PMU_V3_IRQ, - .addr = (uint64_t)&irq, + .addr = (u64)&irq, }; /* The test creates the vpmu_vm multiple times. Ensure a clean state */ @@ -443,7 +443,7 @@ static void destroy_vpmu_vm(void) kvm_vm_free(vpmu_vm.vm); } -static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) +static void run_vcpu(struct kvm_vcpu *vcpu, u64 pmcr_n) { struct ucall uc; @@ -489,9 +489,9 @@ static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n, * and run the test. */ -static void run_access_test(uint64_t pmcr_n) +static void run_access_test(u64 pmcr_n) { - uint64_t sp; + u64 sp; struct kvm_vcpu *vcpu; struct kvm_vcpu_init init; @@ -514,7 +514,7 @@ static void run_access_test(uint64_t pmcr_n) aarch64_vcpu_setup(vcpu, &init); vcpu_init_descriptor_tables(vcpu); vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code); run_vcpu(vcpu, pmcr_n); @@ -531,12 +531,12 @@ static struct pmreg_sets validity_check_reg_sets[] = { * Create a VM, and check if KVM handles the userspace accesses of * the PMU register sets in @validity_check_reg_sets[] correctly. */ -static void run_pmregs_validity_test(uint64_t pmcr_n) +static void run_pmregs_validity_test(u64 pmcr_n) { int i; struct kvm_vcpu *vcpu; - uint64_t set_reg_id, clr_reg_id, reg_val; - uint64_t valid_counters_mask, max_counters_mask; + u64 set_reg_id, clr_reg_id, reg_val; + u64 valid_counters_mask, max_counters_mask; test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; @@ -588,7 +588,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) * the vCPU to @pmcr_n, which is larger than the host value. * The attempt should fail as @pmcr_n is too big to set for the vCPU. */ -static void run_error_test(uint64_t pmcr_n) +static void run_error_test(u64 pmcr_n) { pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); @@ -600,9 +600,9 @@ static void run_error_test(uint64_t pmcr_n) * Return the default number of implemented PMU event counters excluding * the cycle counter (i.e. PMCR_EL0.N value) for the guest. */ -static uint64_t get_pmcr_n_limit(void) +static u64 get_pmcr_n_limit(void) { - uint64_t pmcr; + u64 pmcr; create_vpmu_vm(guest_code); pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); @@ -624,7 +624,7 @@ static bool kvm_supports_nr_counters_attr(void) int main(void) { - uint64_t i, pmcr_n; + u64 i, pmcr_n; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); TEST_REQUIRE(kvm_supports_vgic_v3()); diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c index 60cb25454899..ed6a66020b1e 100644 --- a/tools/testing/selftests/kvm/coalesced_io_test.c +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -15,8 +15,8 @@ struct kvm_coalesced_io { struct kvm_coalesced_mmio_ring *ring; uint32_t ring_size; - uint64_t mmio_gpa; - uint64_t *mmio; + u64 mmio_gpa; + u64 *mmio; /* * x86-only, but define pio_port for all architectures to minimize the @@ -94,7 +94,7 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && - *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + *(u64 *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && pio_value == io->pio_port + io->ring_size - 1)), @@ -105,7 +105,7 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", - run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(u64 *)run->mmio.data, run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); } @@ -143,7 +143,7 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", io->mmio_gpa, io->mmio_gpa + i, i, entry->len, entry->pio ? "PIO" : "MMIO", - entry->phys_addr, *(uint64_t *)entry->data); + entry->phys_addr, *(u64 *)entry->data); } } @@ -219,11 +219,11 @@ int main(int argc, char *argv[]) * the MMIO GPA identity mapped in the guest. */ .mmio_gpa = 4ull * SZ_1G, - .mmio = (uint64_t *)(4ull * SZ_1G), + .mmio = (u64 *)(4ull * SZ_1G), .pio_port = 0x80, }; - virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + virt_map(vm, (u64)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); sync_global_to_guest(vm, kvm_builtin_io_ring); vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 0202b78f8680..302c4923d093 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -24,7 +24,7 @@ #ifdef __NR_userfaultfd static int nr_vcpus = 1; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; @@ -58,7 +58,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, struct uffd_msg *msg) { pid_t tid = syscall(__NR_gettid); - uint64_t addr = msg->arg.pagefault.address; + u64 addr = msg->arg.pagefault.address; struct timespec start; struct timespec ts_diff; int r; @@ -68,7 +68,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { struct uffdio_copy copy; - copy.src = (uint64_t)guest_data_prototype; + copy.src = (u64)guest_data_prototype; copy.dst = addr; copy.len = demand_paging_size; copy.mode = 0; @@ -138,7 +138,7 @@ struct test_params { bool partition_vcpu_memory_access; }; -static void prefault_mem(void *alias, uint64_t len) +static void prefault_mem(void *alias, u64 len) { size_t p; @@ -154,7 +154,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct memstress_vcpu_args *vcpu_args; struct test_params *p = arg; struct uffd_desc **uffd_descs = NULL; - uint64_t uffd_region_size; + u64 uffd_region_size; struct timespec start; struct timespec ts_diff; double vcpu_paging_rate; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 0a1ea1d1e2d8..7d170694e9c1 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -24,7 +24,7 @@ #define TEST_HOST_LOOP_N 2UL static int nr_vcpus = 1; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus_while_disabling_dirty_logging; /* Host variables */ @@ -37,7 +37,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; - uint64_t pages_count = 0; + u64 pages_count = 0; struct kvm_run *run; struct timespec start; struct timespec ts_diff; @@ -93,7 +93,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) struct test_params { unsigned long iterations; - uint64_t phys_offset; + u64 phys_offset; bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; @@ -106,9 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vm *vm; unsigned long **bitmaps; - uint64_t guest_num_pages; - uint64_t host_num_pages; - uint64_t pages_per_slot; + u64 guest_num_pages; + u64 host_num_pages; + u64 pages_per_slot; struct timespec start; struct timespec ts_diff; struct timespec get_dirty_log_total = (struct timespec){0}; diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 9b6b9a597175..fae924ddb64e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -74,11 +74,11 @@ * the host. READ/WRITE_ONCE() should also be used with anything * that may change. */ -static uint64_t host_page_size; -static uint64_t guest_page_size; -static uint64_t guest_num_pages; -static uint64_t iteration; -static uint64_t nr_writes; +static u64 host_page_size; +static u64 guest_page_size; +static u64 guest_num_pages; +static u64 iteration; +static u64 nr_writes; static bool vcpu_stop; /* @@ -86,13 +86,13 @@ static bool vcpu_stop; * This will be set to the topmost valid physical address minus * the test memory size. */ -static uint64_t guest_test_phys_mem; +static u64 guest_test_phys_mem; /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; /* * Continuously write to the first 8 bytes of a random pages within @@ -100,10 +100,10 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; */ static void guest_code(void) { - uint64_t addr; + u64 addr; #ifdef __s390x__ - uint64_t i; + u64 i; /* * On s390x, all pages of a 1M segment are initially marked as dirty @@ -113,7 +113,7 @@ static void guest_code(void) */ for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; - vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + vcpu_arch_put_guest(*(u64 *)addr, READ_ONCE(iteration)); nr_writes++; } #endif @@ -125,7 +125,7 @@ static void guest_code(void) * guest_page_size; addr = align_down(addr, host_page_size); - vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + vcpu_arch_put_guest(*(u64 *)addr, READ_ONCE(iteration)); nr_writes++; } @@ -138,11 +138,11 @@ static bool host_quit; /* Points to the test VM memory region on which we track dirty logs */ static void *host_test_mem; -static uint64_t host_num_pages; +static u64 host_num_pages; /* For statistics only */ -static uint64_t host_dirty_count; -static uint64_t host_clear_count; +static u64 host_dirty_count; +static u64 host_clear_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; @@ -169,7 +169,7 @@ static bool dirty_ring_vcpu_ring_full; * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass. */ -static uint64_t dirty_ring_last_page = -1ULL; +static u64 dirty_ring_last_page = -1ULL; /* * In addition to the above, it is possible (especially if this @@ -213,7 +213,7 @@ static uint64_t dirty_ring_last_page = -1ULL; * and also don't fail when it is reported in the next iteration, together with * an outdated iteration count. */ -static uint64_t dirty_ring_prev_iteration_last_page; +static u64 dirty_ring_prev_iteration_last_page; enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ @@ -297,7 +297,7 @@ static bool dirty_ring_supported(void) static void dirty_ring_create_vm_done(struct kvm_vm *vm) { - uint64_t pages; + u64 pages; uint32_t limit; /* @@ -494,11 +494,11 @@ static void *vcpu_worker(void *data) static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { - uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0; - uint64_t step = vm_num_host_pages(mode, 1); + u64 page, nr_dirty_pages = 0, nr_clean_pages = 0; + u64 step = vm_num_host_pages(mode, 1); for (page = 0; page < host_num_pages; page += step) { - uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size); + u64 val = *(u64 *)(host_test_mem + page * host_page_size); bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); /* @@ -575,7 +575,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) } static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, void *guest_code) + u64 extra_mem_pages, void *guest_code) { struct kvm_vm *vm; @@ -592,7 +592,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, struct test_params { unsigned long iterations; unsigned long interval; - uint64_t phys_offset; + u64 phys_offset; }; static void run_test(enum vm_guest_mode mode, void *arg) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ec7644aae999..ad17ea62555f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -171,7 +171,7 @@ static void test_numa_allocation(int fd, size_t total_size) kvm_munmap(mem, total_size); } -static void test_collapse(int fd, uint64_t flags) +static void test_collapse(int fd, u64 flags) { const size_t pmd_size = get_trans_hugepagesz(); void *reserved_addr; @@ -346,7 +346,7 @@ static void test_invalid_punch_hole(int fd, size_t total_size) } static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, - uint64_t guest_memfd_flags) + u64 guest_memfd_flags) { size_t size; int fd; @@ -389,8 +389,8 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) static void test_guest_memfd_flags(struct kvm_vm *vm) { - uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); - uint64_t flag; + u64 valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + u64 flag; int fd; for (flag = BIT(0); flag; flag <<= 1) { @@ -419,7 +419,7 @@ do { \ #define gmem_test(__test, __vm, __flags) \ __gmem_test(__test, __vm, __flags, page_size * 4) -static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) +static void __test_guest_memfd(struct kvm_vm *vm, u64 flags) { test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags); @@ -452,7 +452,7 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) static void test_guest_memfd(unsigned long vm_type) { struct kvm_vm *vm = vm_create_barebones_type(vm_type); - uint64_t flags; + u64 flags; test_guest_memfd_flags(vm); @@ -470,7 +470,7 @@ static void test_guest_memfd(unsigned long vm_type) kvm_vm_free(vm); } -static void guest_code(uint8_t *mem, uint64_t size) +static void guest_code(uint8_t *mem, u64 size) { size_t i; @@ -489,7 +489,7 @@ static void test_guest_memfd_guest(void) * the guest's code, stack, and page tables, and low memory contains * the PCI hole and other MMIO regions that need to be avoided. */ - const uint64_t gpa = SZ_4G; + const u64 gpa = SZ_4G; const int slot = 1; struct kvm_vcpu *vcpu; diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index bcf582852db9..894ef7d2481e 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -16,9 +16,9 @@ #include "ucall_common.h" struct guest_vals { - uint64_t a; - uint64_t b; - uint64_t type; + u64 a; + u64 b; + u64 type; }; static struct guest_vals vals; @@ -26,9 +26,9 @@ static struct guest_vals vals; /* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */ #define TYPE_LIST \ TYPE(test_type_i64, I64, "%ld", int64_t) \ -TYPE(test_type_u64, U64u, "%lu", uint64_t) \ -TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \ -TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \ +TYPE(test_type_u64, U64u, "%lu", u64) \ +TYPE(test_type_x64, U64x, "0x%lx", u64) \ +TYPE(test_type_X64, U64X, "0x%lX", u64) \ TYPE(test_type_u32, U32u, "%u", uint32_t) \ TYPE(test_type_x32, U32x, "0x%x", uint32_t) \ TYPE(test_type_X32, U32X, "0x%X", uint32_t) \ @@ -56,7 +56,7 @@ static void fn(struct kvm_vcpu *vcpu, T a, T b) \ \ snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \ snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \ - vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \ + vals = (struct guest_vals){ (u64)a, (u64)b, TYPE_##ext }; \ sync_global_to_guest(vcpu->vm, vals); \ run_test(vcpu, expected_printf, expected_assert); \ } diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index e2c4e9f0010f..290b65e58d3c 100644 --- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -18,20 +18,20 @@ enum arch_timer { #define CTL_ISTATUS (1 << 2) #define msec_to_cycles(msec) \ - (timer_get_cntfrq() * (uint64_t)(msec) / 1000) + (timer_get_cntfrq() * (u64)(msec) / 1000) #define usec_to_cycles(usec) \ - (timer_get_cntfrq() * (uint64_t)(usec) / 1000000) + (timer_get_cntfrq() * (u64)(usec) / 1000000) #define cycles_to_usec(cycles) \ - ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq()) + ((u64)(cycles) * 1000000 / timer_get_cntfrq()) static inline uint32_t timer_get_cntfrq(void) { return read_sysreg(cntfrq_el0); } -static inline uint64_t timer_get_cntct(enum arch_timer timer) +static inline u64 timer_get_cntct(enum arch_timer timer) { isb(); @@ -48,7 +48,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer) return 0; } -static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) +static inline void timer_set_cval(enum arch_timer timer, u64 cval) { switch (timer) { case VIRTUAL: @@ -64,7 +64,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) isb(); } -static inline uint64_t timer_get_cval(enum arch_timer timer) +static inline u64 timer_get_cval(enum arch_timer timer) { switch (timer) { case VIRTUAL: @@ -144,8 +144,8 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer) static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec) { - uint64_t now_ct = timer_get_cntct(timer); - uint64_t next_ct = now_ct + msec_to_cycles(msec); + u64 now_ct = timer_get_cntct(timer); + u64 next_ct = now_ct + msec_to_cycles(msec); timer_set_cval(timer, next_ct); } diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h index 329e4f5079ea..6a5d4634af2c 100644 --- a/tools/testing/selftests/kvm/include/arm64/delay.h +++ b/tools/testing/selftests/kvm/include/arm64/delay.h @@ -8,10 +8,10 @@ #include "arch_timer.h" -static inline void __delay(uint64_t cycles) +static inline void __delay(u64 cycles) { enum arch_timer timer = VIRTUAL; - uint64_t start = timer_get_cntct(timer); + u64 start = timer_get_cntct(timer); while ((timer_get_cntct(timer) - start) < cycles) cpu_relax(); diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index 6408f952cb64..c36a4eafeb5a 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -48,7 +48,7 @@ void gic_set_dir(unsigned int intid); * split is true, EOI drops the priority and deactivates the interrupt. */ void gic_set_eoi_split(bool split); -void gic_set_priority_mask(uint64_t mask); +void gic_set_priority_mask(u64 mask); void gic_set_priority(uint32_t intid, uint32_t prio); void gic_irq_set_active(unsigned int intid); void gic_irq_clear_active(unsigned int intid); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 5b18ffe68789..618d112cb08a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -179,8 +179,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); -uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level); -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva); +u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level); +u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva); static inline void cpu_relax(void) { @@ -287,9 +287,9 @@ struct arm_smccc_res { * @res: pointer to write the return values from registers x0-x3 * */ -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res); +void smccc_hvc(uint32_t function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res); /** * smccc_smc - Invoke a SMCCC function using the smc conduit @@ -298,9 +298,9 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, * @res: pointer to write the return values from registers x0-x3 * */ -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res); +void smccc_smc(uint32_t function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res); /* Execute a Wait For Interrupt instruction. */ void wfi(void); diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index 688beccc9436..2fde1fd8f96a 100644 --- a/tools/testing/selftests/kvm/include/arm64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -11,9 +11,9 @@ #include "kvm_util.h" #define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \ - (((uint64_t)(count) << 52) | \ - ((uint64_t)((base) >> 16) << 16) | \ - ((uint64_t)(flags) << 12) | \ + (((u64)(count) << 52) | \ + ((u64)((base) >> 16) << 16) | \ + ((u64)(flags) << 12) | \ index) bool kvm_supports_vgic_v3(void); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 9f602c73fbb4..40bac473b460 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -90,7 +90,7 @@ enum kvm_mem_region_type { struct kvm_mmu { bool pgd_created; - uint64_t pgd; + u64 pgd; int pgtable_levels; struct kvm_mmu_arch arch; @@ -105,7 +105,7 @@ struct kvm_vm { unsigned int page_shift; unsigned int pa_bits; unsigned int va_bits; - uint64_t max_gfn; + u64 max_gfn; struct list_head vcpus; struct userspace_mem_regions regions; struct sparsebit *vpages_valid; @@ -114,7 +114,7 @@ struct kvm_vm { gpa_t ucall_mmio_addr; gva_t handlers; uint32_t dirty_ring_size; - uint64_t gpa_tag_mask; + u64 gpa_tag_mask; /* * "mmu" is the guest's stage-1, with a short name because the vast @@ -219,7 +219,7 @@ struct vm_shape { uint16_t pad1; }; -kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); +kvm_static_assert(sizeof(struct vm_shape) == sizeof(u64)); #define VM_TYPE_DEFAULT 0 @@ -404,21 +404,22 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap) return ret; } -static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } -static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) + +static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } -static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, - uint64_t size, uint64_t attributes) +static inline void vm_set_memory_attributes(struct kvm_vm *vm, u64 gpa, + u64 size, u64 attributes) { struct kvm_memory_attributes attr = { .attributes = attributes, @@ -438,29 +439,29 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, } -static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_mem_set_private(struct kvm_vm *vm, u64 gpa, + u64 size) { vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); } -static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_mem_set_shared(struct kvm_vm *vm, u64 gpa, + u64 size) { vm_set_memory_attributes(vm, gpa, size, 0); } -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, +void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 gpa, u64 size, bool punch_hole); -static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, u64 gpa, + u64 size) { vm_guest_mem_fallocate(vm, gpa, size, true); } -static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, u64 gpa, + u64 size) { vm_guest_mem_fallocate(vm, gpa, size, false); } @@ -484,7 +485,7 @@ static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) } static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages) + u64 first_page, uint32_t num_pages) { struct kvm_clear_dirty_log args = { .dirty_bitmap = log, @@ -502,8 +503,8 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) } static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, - uint64_t address, - uint64_t size, bool pio) + u64 address, + u64 size, bool pio) { struct kvm_coalesced_mmio_zone zone = { .addr = address, @@ -515,8 +516,8 @@ static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, } static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, - uint64_t address, - uint64_t size, bool pio) + u64 address, + u64 size, bool pio) { struct kvm_coalesced_mmio_zone zone = { .addr = address, @@ -610,15 +611,15 @@ static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc } void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, + struct kvm_stats_desc *desc, u64 *data, size_t max_elements); void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, - uint64_t *data, size_t max_elements); + u64 *data, size_t max_elements); #define __get_stat(stats, stat) \ ({ \ - uint64_t data; \ + u64 data; \ \ kvm_get_stat(stats, #stat, &data, 1); \ data; \ @@ -664,8 +665,8 @@ static inline bool is_smt_on(void) void vm_create_irqchip(struct kvm_vm *vm); -static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, u64 size, + u64 flags) { struct kvm_create_guest_memfd guest_memfd = { .size = size, @@ -675,8 +676,8 @@ static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); } -static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) +static inline int vm_create_guest_memfd(struct kvm_vm *vm, u64 size, + u64 flags) { int fd = __vm_create_guest_memfd(vm, size, flags); @@ -685,23 +686,23 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, } void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); + u64 gpa, u64 size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); + u64 gpa, u64 size, void *hva); void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); + u64 gpa, u64 size, void *hva, + uint32_t guest_memfd, u64 guest_memfd_offset); int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); + u64 gpa, u64 size, void *hva, + uint32_t guest_memfd, u64 guest_memfd_offset); void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t gpa, uint32_t slot, uint64_t npages, + u64 gpa, uint32_t slot, u64 npages, uint32_t flags); void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, - int guest_memfd_fd, uint64_t guest_memfd_offset); + u64 gpa, uint32_t slot, u64 npages, uint32_t flags, + int guest_memfd_fd, u64 guest_memfd_offset); #ifndef vm_arch_has_protected_memory static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) @@ -712,7 +713,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, u64 new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); void vm_populate_vaddr_bitmap(struct kvm_vm *vm); @@ -726,7 +727,7 @@ gva_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); gva_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); gva_t vm_vaddr_alloc_page(struct kvm_vm *vm); -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, +void virt_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, unsigned int npages); void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa); void *addr_gva2hva(struct kvm_vm *vm, gva_t gva); @@ -754,7 +755,7 @@ void vcpu_run_complete_io(struct kvm_vcpu *vcpu); struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, - uint64_t arg0) + u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; @@ -809,31 +810,34 @@ static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); } -static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, u64 id, void *addr) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + struct kvm_one_reg reg = { .id = id, .addr = (u64)addr }; return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); } -static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) + +static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + struct kvm_one_reg reg = { .id = id, .addr = (u64)&val }; return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) + +static inline u64 vcpu_get_reg(struct kvm_vcpu *vcpu, u64 id) { - uint64_t val; - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + u64 val; + struct kvm_one_reg reg = { .id = id, .addr = (u64)&val }; TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); return val; } -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) + +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + struct kvm_one_reg reg = { .id = id, .addr = (u64)&val }; TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); @@ -878,29 +882,29 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) return fd; } -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); +int __kvm_has_device_attr(int dev_fd, uint32_t group, u64 attr); -static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +static inline void kvm_has_device_attr(int dev_fd, uint32_t group, u64 attr) { int ret = __kvm_has_device_attr(dev_fd, group, attr); TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); } -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); +int __kvm_device_attr_get(int dev_fd, uint32_t group, u64 attr, void *val); static inline void kvm_device_attr_get(int dev_fd, uint32_t group, - uint64_t attr, void *val) + u64 attr, void *val) { int ret = __kvm_device_attr_get(dev_fd, group, attr, val); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); +int __kvm_device_attr_set(int dev_fd, uint32_t group, u64 attr, void *val); static inline void kvm_device_attr_set(int dev_fd, uint32_t group, - uint64_t attr, void *val) + u64 attr, void *val) { int ret = __kvm_device_attr_set(dev_fd, group, attr, val); @@ -908,45 +912,45 @@ static inline void kvm_device_attr_set(int dev_fd, uint32_t group, } static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) + u64 attr) { return __kvm_has_device_attr(vcpu->fd, group, attr); } static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) + u64 attr) { kvm_has_device_attr(vcpu->fd, group, attr); } static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) + u64 attr, void *val) { return __kvm_device_attr_get(vcpu->fd, group, attr, val); } static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) + u64 attr, void *val) { kvm_device_attr_get(vcpu->fd, group, attr, val); } static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) + u64 attr, void *val) { return __kvm_device_attr_set(vcpu->fd, group, attr, val); } static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) + u64 attr, void *val) { kvm_device_attr_set(vcpu->fd, group, attr, val); } -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); -int __kvm_create_device(struct kvm_vm *vm, uint64_t type); +int __kvm_test_create_device(struct kvm_vm *vm, u64 type); +int __kvm_create_device(struct kvm_vm *vm, u64 type); -static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) +static inline int kvm_create_device(struct kvm_vm *vm, u64 type) { int fd = __kvm_create_device(vm, type); @@ -962,7 +966,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); * Input Args: * vcpu - vCPU * num - number of arguments - * ... - arguments, each of type uint64_t + * ... - arguments, each of type u64 * * Output Args: None * @@ -970,7 +974,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); * * Sets the first @num input parameters for the function at @vcpu's entry point, * per the C calling convention of the architecture, to the values given as - * variable args. Each of the variable args is expected to be of type uint64_t. + * variable args. Each of the variable args is expected to be of type u64. * The maximum @num can be is specific to the architecture. */ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); @@ -1014,7 +1018,7 @@ static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, */ struct kvm_vm *____vm_create(struct vm_shape shape); struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages); + u64 nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { @@ -1037,7 +1041,7 @@ static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) } struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]); static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, @@ -1051,7 +1055,7 @@ static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code); /* @@ -1059,7 +1063,7 @@ struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, * additional pages of guest memory. Returns the VM and vCPU (via out param). */ static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code) { return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, @@ -1215,9 +1219,9 @@ static inline void virt_pgd_alloc(struct kvm_vm *vm) * Within @vm, creates a virtual translation for the page starting * at @vaddr to the page starting at @paddr. */ -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); +void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr); -static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +static inline void virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) { virt_arch_pg_map(vm, vaddr, paddr); sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); @@ -1274,7 +1278,7 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } -static inline uint64_t vm_page_align(struct kvm_vm *vm, uint64_t v) +static inline u64 vm_page_align(struct kvm_vm *vm, u64 v) { return (v + vm->page_size - 1) & ~(vm->page_size - 1); } diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h index 1d9eedb4885e..ed0087e31674 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_types.h +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -2,6 +2,8 @@ #ifndef SELFTEST_KVM_UTIL_TYPES_H #define SELFTEST_KVM_UTIL_TYPES_H +#include + /* * Provide a version of static_assert() that is guaranteed to have an optional * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE @@ -14,9 +16,9 @@ #define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) #define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) -typedef uint64_t gpa_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t gva_t; /* Virtual Machine (Guest) virtual address */ +typedef u64 gpa_t; /* Virtual Machine (Guest) physical address */ +typedef u64 gva_t; /* Virtual Machine (Guest) virtual address */ -#define INVALID_GPA (~(uint64_t)0) +#define INVALID_GPA (~(u64)0) #endif /* SELFTEST_KVM_UTIL_TYPES_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h index 2ed106b32c81..3888aeeb3524 100644 --- a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h +++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h @@ -70,9 +70,9 @@ static inline void timer_set_next_cmp_ms(unsigned int msec, bool period) csr_write(val, LOONGARCH_CSR_TCFG); } -static inline void __delay(uint64_t cycles) +static inline void __delay(u64 cycles) { - uint64_t start = timer_get_cycles(); + u64 start = timer_get_cycles(); while ((timer_get_cycles() - start) < cycles) cpu_relax(); diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index 9071eb6dea60..71296909302c 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -20,9 +20,9 @@ #define MEMSTRESS_MEM_SLOT_INDEX 1 struct memstress_vcpu_args { - uint64_t gpa; - uint64_t gva; - uint64_t pages; + u64 gpa; + u64 gva; + u64 pages; /* Only used by the host userspace part of the vCPU thread */ struct kvm_vcpu *vcpu; @@ -32,9 +32,9 @@ struct memstress_vcpu_args { struct memstress_args { struct kvm_vm *vm; /* The starting address and size of the guest test region. */ - uint64_t gpa; - uint64_t size; - uint64_t guest_page_size; + u64 gpa; + u64 size; + u64 guest_page_size; uint32_t random_seed; uint32_t write_percent; @@ -56,7 +56,7 @@ struct memstress_args { extern struct memstress_args memstress_args; struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, + u64 vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access); void memstress_destroy_vm(struct kvm_vm *vm); @@ -68,15 +68,15 @@ void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vc void memstress_join_vcpu_threads(int vcpus); void memstress_guest_code(uint32_t vcpu_id); -uint64_t memstress_nested_pages(int nr_vcpus); +u64 memstress_nested_pages(int nr_vcpus); void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots); void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots); void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots); void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], - int slots, uint64_t pages_per_slot); -unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot); + int slots, u64 pages_per_slot); +unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot); void memstress_free_bitmaps(unsigned long *bitmaps[], int slots); #endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h index 225d81dad064..66ed7e36a7cb 100644 --- a/tools/testing/selftests/kvm/include/riscv/arch_timer.h +++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h @@ -14,25 +14,25 @@ static unsigned long timer_freq; #define msec_to_cycles(msec) \ - ((timer_freq) * (uint64_t)(msec) / 1000) + ((timer_freq) * (u64)(msec) / 1000) #define usec_to_cycles(usec) \ - ((timer_freq) * (uint64_t)(usec) / 1000000) + ((timer_freq) * (u64)(usec) / 1000000) #define cycles_to_usec(cycles) \ - ((uint64_t)(cycles) * 1000000 / (timer_freq)) + ((u64)(cycles) * 1000000 / (timer_freq)) -static inline uint64_t timer_get_cycles(void) +static inline u64 timer_get_cycles(void) { return csr_read(CSR_TIME); } -static inline void timer_set_cmp(uint64_t cval) +static inline void timer_set_cmp(u64 cval) { csr_write(CSR_STIMECMP, cval); } -static inline uint64_t timer_get_cmp(void) +static inline u64 timer_get_cmp(void) { return csr_read(CSR_STIMECMP); } @@ -49,15 +49,15 @@ static inline void timer_irq_disable(void) static inline void timer_set_next_cmp_ms(uint32_t msec) { - uint64_t now_ct = timer_get_cycles(); - uint64_t next_ct = now_ct + msec_to_cycles(msec); + u64 now_ct = timer_get_cycles(); + u64 next_ct = now_ct + msec_to_cycles(msec); timer_set_cmp(next_ct); } -static inline void __delay(uint64_t cycles) +static inline void __delay(u64 cycles) { - uint64_t start = timer_get_cycles(); + u64 start = timer_get_cycles(); while ((timer_get_cycles() - start) < cycles) cpu_relax(); diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 4dade8c4d18e..e3acf2ae9881 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -25,8 +25,7 @@ #define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3) #define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT) -static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, - uint64_t idx, uint64_t size) +static inline u64 __kvm_reg_id(u64 type, u64 subtype, u64 idx, u64 size) { return KVM_REG_RISCV | type | subtype | idx | size; } @@ -62,14 +61,14 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, KVM_REG_RISCV_SBI_SINGLE, \ idx, KVM_REG_SIZE_ULONG) -bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext); +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext); -static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext) +static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, u64 isa_ext) { return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext)); } -static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) +static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, u64 sbi_ext) { return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); } diff --git a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h index b0ed71302722..6deaf18fec22 100644 --- a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h @@ -8,6 +8,6 @@ #ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER #define SELFTEST_KVM_DIAG318_TEST_HANDLER -uint64_t get_diag318_info(void); +u64 get_diag318_info(void); #endif diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h index 00a1ced6538b..41a265742666 100644 --- a/tools/testing/selftests/kvm/include/s390/facility.h +++ b/tools/testing/selftests/kvm/include/s390/facility.h @@ -16,7 +16,7 @@ /* alt_stfle_fac_list[16] + stfle_fac_list[16] */ #define NB_STFL_DOUBLEWORDS 32 -extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern u64 stfl_doublewords[NB_STFL_DOUBLEWORDS]; extern bool stfle_flag; static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) @@ -24,7 +24,7 @@ static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +static inline void stfle(u64 *fac, unsigned int nb_doublewords) { register unsigned long r0 asm("0") = nb_doublewords - 1; diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h index bc760761e1a3..e027e5790946 100644 --- a/tools/testing/selftests/kvm/include/sparsebit.h +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -6,7 +6,7 @@ * * Header file that describes API to the sparsebit library. * This library provides a memory efficient means of storing - * the settings of bits indexed via a uint64_t. Memory usage + * the settings of bits indexed via a u64. Memory usage * is reasonable, significantly less than (2^64 / 8) bytes, as * long as bits that are mostly set or mostly cleared are close * to each other. This library is efficient in memory usage @@ -25,8 +25,8 @@ extern "C" { #endif struct sparsebit; -typedef uint64_t sparsebit_idx_t; -typedef uint64_t sparsebit_num_t; +typedef u64 sparsebit_idx_t; +typedef u64 sparsebit_num_t; struct sparsebit *sparsebit_alloc(void); void sparsebit_free(struct sparsebit **sbitp); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index b4872ba8ed12..62fe83763021 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -22,6 +22,8 @@ #include #include "kselftest.h" +#include + #define msecs_to_usecs(msec) ((msec) * 1000ULL) static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; } @@ -127,9 +129,9 @@ static inline bool guest_random_bool(struct guest_random_state *state) return __guest_random_bool(state, 50); } -static inline uint64_t guest_random_u64(struct guest_random_state *state) +static inline u64 guest_random_u64(struct guest_random_state *state) { - return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state); + return ((u64)guest_random_u32(state) << 32) | guest_random_u32(state); } enum vm_mem_backing_src_type { @@ -189,18 +191,18 @@ static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) } /* Aligns x up to the next multiple of size. Size must be a power of 2. */ -static inline uint64_t align_up(uint64_t x, uint64_t size) +static inline u64 align_up(u64 x, u64 size) { - uint64_t mask = size - 1; + u64 mask = size - 1; TEST_ASSERT(size != 0 && !(size & (size - 1)), "size not a power of 2: %lu", size); return ((x + mask) & ~mask); } -static inline uint64_t align_down(uint64_t x, uint64_t size) +static inline u64 align_down(u64 x, u64 size) { - uint64_t x_aligned_up = align_up(x, size); + u64 x_aligned_up = align_up(x, size); if (x == x_aligned_up) return x; diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h index 9b6edaafe6d4..9501c6c825e2 100644 --- a/tools/testing/selftests/kvm/include/timer_test.h +++ b/tools/testing/selftests/kvm/include/timer_test.h @@ -24,15 +24,15 @@ struct test_args { uint32_t migration_freq_ms; uint32_t timer_err_margin_us; /* Members of struct kvm_arm_counter_offset */ - uint64_t counter_offset; - uint64_t reserved; + u64 counter_offset; + u64 reserved; }; /* Shared variables between host and guest */ struct test_vcpu_shared_data { uint32_t nr_iter; int guest_stage; - uint64_t xcnt; + u64 xcnt; }; extern struct test_args test_args; diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 1db399c00d02..cbdcb0a50c4f 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -21,8 +21,8 @@ enum { #define UCALL_BUFFER_LEN 1024 struct ucall { - uint64_t cmd; - uint64_t args[UCALL_MAX_ARGS]; + u64 cmd; + u64 args[UCALL_MAX_ARGS]; char buffer[UCALL_BUFFER_LEN]; /* Host virtual address of this struct. */ @@ -33,14 +33,14 @@ void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa); void ucall_arch_do_ucall(gva_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); -void ucall(uint64_t cmd, int nargs, ...); -__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...); -__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp, +void ucall(u64 cmd, int nargs, ...); +__printf(2, 3) void ucall_fmt(u64 cmd, const char *fmt, ...); +__printf(5, 6) void ucall_assert(u64 cmd, const char *exp, const char *file, unsigned int line, const char *fmt, ...); -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); +u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa); -int ucall_nr_pages_required(uint64_t page_size); +int ucall_nr_pages_required(u64 page_size); /* * Perform userspace call without any associated data. This bare call avoids diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h index 60f7f9d435dc..0bc1dc16600e 100644 --- a/tools/testing/selftests/kvm/include/userfaultfd_util.h +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -25,7 +25,7 @@ struct uffd_reader_args { struct uffd_desc { int uffd; - uint64_t num_readers; + u64 num_readers; /* Holds the write ends of the pipes for killing the readers. */ int *pipefds; pthread_t *readers; @@ -33,8 +33,8 @@ struct uffd_desc { }; struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, - void *hva, uint64_t len, - uint64_t num_readers, + void *hva, u64 len, + u64 num_readers, uffd_handler_t handler); void uffd_stop_demand_paging(struct uffd_desc *uffd); diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 5ca6bacbd70e..05573dedabd3 100644 --- a/tools/testing/selftests/kvm/include/x86/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -94,17 +94,17 @@ static inline void xapic_write_reg(unsigned int reg, uint32_t val) ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val; } -static inline uint64_t x2apic_read_reg(unsigned int reg) +static inline u64 x2apic_read_reg(unsigned int reg) { return rdmsr(APIC_BASE_MSR + (reg >> 4)); } -static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, u64 value) { return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); } -static inline void x2apic_write_reg(unsigned int reg, uint64_t value) +static inline void x2apic_write_reg(unsigned int reg, u64 value) { uint8_t fault = x2apic_write_reg_safe(reg, value); @@ -112,7 +112,7 @@ static inline void x2apic_write_reg(unsigned int reg, uint64_t value) fault, APIC_BASE_MSR + (reg >> 4), value); } -static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +static inline void x2apic_write_reg_fault(unsigned int reg, u64 value) { uint8_t fault = x2apic_write_reg_safe(reg, value); diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 5a74bb30e2f8..5ec5cca6f9e4 100644 --- a/tools/testing/selftests/kvm/include/x86/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -12,7 +12,7 @@ #define u16 uint16_t #define u32 uint32_t -#define u64 uint64_t +#define u64 u64 #define EVMCS_VERSION 1 @@ -245,7 +245,7 @@ static inline void evmcs_enable(void) enable_evmcs = true; } -static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) +static inline int evmcs_vmptrld(u64 vmcs_pa, void *vmcs) { current_vp_assist->current_nested_vmcs = vmcs_pa; current_vp_assist->enlighten_vmentry = 1; @@ -265,7 +265,7 @@ static inline bool load_evmcs(struct hyperv_test_pages *hv) return true; } -static inline int evmcs_vmptrst(uint64_t *value) +static inline int evmcs_vmptrst(u64 *value) { *value = current_vp_assist->current_nested_vmcs & ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; @@ -273,7 +273,7 @@ static inline int evmcs_vmptrst(uint64_t *value) return 0; } -static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) +static inline int evmcs_vmread(u64 encoding, u64 *value) { switch (encoding) { case GUEST_RIP: @@ -672,7 +672,7 @@ static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) return 0; } -static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) +static inline int evmcs_vmwrite(u64 encoding, u64 value) { switch (encoding) { case GUEST_RIP: @@ -1226,9 +1226,9 @@ static inline int evmcs_vmlaunch(void) "pop %%rbp;" : [ret]"=&a"(ret) : [host_rsp]"r" - ((uint64_t)¤t_evmcs->host_rsp), + ((u64)¤t_evmcs->host_rsp), [host_rip]"r" - ((uint64_t)¤t_evmcs->host_rip) + ((u64)¤t_evmcs->host_rip) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; @@ -1265,9 +1265,9 @@ static inline int evmcs_vmresume(void) "pop %%rbp;" : [ret]"=&a"(ret) : [host_rsp]"r" - ((uint64_t)¤t_evmcs->host_rsp), + ((u64)¤t_evmcs->host_rsp), [host_rip]"r" - ((uint64_t)¤t_evmcs->host_rip) + ((u64)¤t_evmcs->host_rip) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index eedfff3cf102..2add2123e37b 100644 --- a/tools/testing/selftests/kvm/include/x86/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -256,9 +256,9 @@ */ static inline uint8_t __hyperv_hypercall(u64 control, gva_t input_address, gva_t output_address, - uint64_t *hv_status) + u64 *hv_status) { - uint64_t error_code; + u64 error_code; uint8_t vector; /* Note both the hypercall and the "asm safe" clobber r9-r11. */ @@ -277,7 +277,7 @@ static inline uint8_t __hyperv_hypercall(u64 control, gva_t input_address, static inline void hyperv_hypercall(u64 control, gva_t input_address, gva_t output_address) { - uint64_t hv_status; + u64 hv_status; uint8_t vector; vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); @@ -327,22 +327,22 @@ struct hv_vp_assist_page { extern struct hv_vp_assist_page *current_vp_assist; -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); +int enable_vp_assist(u64 vp_assist_pa, void *vp_assist); struct hyperv_test_pages { /* VP assist page */ void *vp_assist_hva; - uint64_t vp_assist_gpa; + u64 vp_assist_gpa; void *vp_assist; /* Partition assist page */ void *partition_assist_hva; - uint64_t partition_assist_gpa; + u64 partition_assist_gpa; void *partition_assist; /* Enlightened VMCS */ void *enlightened_vmcs_hva; - uint64_t enlightened_vmcs_gpa; + u64 enlightened_vmcs_gpa; void *enlightened_vmcs; }; diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 4c605f624956..c33ab6e04171 100644 --- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -11,19 +11,19 @@ extern bool is_forced_emulation_enabled; struct pte_masks { - uint64_t present; - uint64_t writable; - uint64_t user; - uint64_t readable; - uint64_t executable; - uint64_t accessed; - uint64_t dirty; - uint64_t huge; - uint64_t nx; - uint64_t c; - uint64_t s; + u64 present; + u64 writable; + u64 user; + u64 readable; + u64 executable; + u64 accessed; + u64 dirty; + u64 huge; + u64 nx; + u64 c; + u64 s; - uint64_t always_set; + u64 always_set; }; struct kvm_mmu_arch { @@ -37,8 +37,8 @@ struct kvm_vm_arch { gva_t tss; gva_t idt; - uint64_t c_bit; - uint64_t s_bit; + u64 c_bit; + u64 s_bit; int sev_fd; bool is_pt_protected; }; @@ -62,7 +62,7 @@ do { \ : "+m" (mem) \ : "r" (val) : "memory"); \ } else { \ - uint64_t __old = READ_ONCE(mem); \ + u64 __old = READ_ONCE(mem); \ \ __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ : [ptr] "+m" (mem), [old] "+a" (__old) \ diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 72575eadb63a..98537cc8840d 100644 --- a/tools/testing/selftests/kvm/include/x86/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -6,8 +6,8 @@ #define SELFTEST_KVM_PMU_H #include -#include +#include #include #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 @@ -104,14 +104,15 @@ enum amd_pmu_zen_events { NR_AMD_ZEN_EVENTS, }; -extern const uint64_t intel_pmu_arch_events[]; -extern const uint64_t amd_pmu_zen_events[]; +extern const u64 intel_pmu_arch_events[]; +extern const u64 amd_pmu_zen_events[]; enum pmu_errata { INSTRUCTIONS_RETIRED_OVERCOUNT, BRANCHES_RETIRED_OVERCOUNT, }; -extern uint64_t pmu_errata_mask; + +extern u64 pmu_errata_mask; void kvm_init_pmu_errata(void); diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index d8634a760a60..dd7a68729ad0 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -23,7 +23,7 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; extern bool host_cpu_is_hygon; extern bool host_cpu_is_amd_compatible; -extern uint64_t guest_tsc_khz; +extern u64 guest_tsc_khz; #ifndef MAX_NR_CPUID_ENTRIES #define MAX_NR_CPUID_ENTRIES 100 @@ -409,7 +409,7 @@ struct desc64 { struct desc_ptr { uint16_t size; - uint64_t address; + u64 address; } __attribute__((packed)); struct kvm_x86_state { @@ -427,18 +427,18 @@ struct kvm_x86_state { struct kvm_msrs msrs; }; -static inline uint64_t get_desc64_base(const struct desc64 *desc) +static inline u64 get_desc64_base(const struct desc64 *desc) { - return (uint64_t)desc->base3 << 32 | - (uint64_t)desc->base2 << 24 | - (uint64_t)desc->base1 << 16 | - (uint64_t)desc->base0; + return (u64)desc->base3 << 32 | + (u64)desc->base2 << 24 | + (u64)desc->base1 << 16 | + (u64)desc->base0; } -static inline uint64_t rdtsc(void) +static inline u64 rdtsc(void) { uint32_t eax, edx; - uint64_t tsc_val; + u64 tsc_val; /* * The lfence is to wait (on Intel CPUs) until all previous * instructions have been executed. If software requires RDTSC to be @@ -446,28 +446,28 @@ static inline uint64_t rdtsc(void) * execute LFENCE immediately after RDTSC */ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); - tsc_val = ((uint64_t)edx) << 32 | eax; + tsc_val = ((u64)edx) << 32 | eax; return tsc_val; } -static inline uint64_t rdtscp(uint32_t *aux) +static inline u64 rdtscp(uint32_t *aux) { uint32_t eax, edx; __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); - return ((uint64_t)edx) << 32 | eax; + return ((u64)edx) << 32 | eax; } -static inline uint64_t rdmsr(uint32_t msr) +static inline u64 rdmsr(uint32_t msr) { uint32_t a, d; __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - return a | ((uint64_t) d << 32); + return a | ((u64)d << 32); } -static inline void wrmsr(uint32_t msr, uint64_t value) +static inline void wrmsr(uint32_t msr, u64 value) { uint32_t a = value; uint32_t d = value >> 32; @@ -550,57 +550,57 @@ static inline uint16_t get_tr(void) return tr; } -static inline uint64_t get_cr0(void) +static inline u64 get_cr0(void) { - uint64_t cr0; + u64 cr0; __asm__ __volatile__("mov %%cr0, %[cr0]" : /* output */ [cr0]"=r"(cr0)); return cr0; } -static inline void set_cr0(uint64_t val) +static inline void set_cr0(u64 val) { __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory"); } -static inline uint64_t get_cr3(void) +static inline u64 get_cr3(void) { - uint64_t cr3; + u64 cr3; __asm__ __volatile__("mov %%cr3, %[cr3]" : /* output */ [cr3]"=r"(cr3)); return cr3; } -static inline void set_cr3(uint64_t val) +static inline void set_cr3(u64 val) { __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory"); } -static inline uint64_t get_cr4(void) +static inline u64 get_cr4(void) { - uint64_t cr4; + u64 cr4; __asm__ __volatile__("mov %%cr4, %[cr4]" : /* output */ [cr4]"=r"(cr4)); return cr4; } -static inline void set_cr4(uint64_t val) +static inline void set_cr4(u64 val) { __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } -static inline uint64_t get_cr8(void) +static inline u64 get_cr8(void) { - uint64_t cr8; + u64 cr8; __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8)); return cr8; } -static inline void set_cr8(uint64_t val) +static inline void set_cr8(u64 val) { __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory"); } @@ -782,13 +782,13 @@ static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) return nr_bits > feature.f.bit || this_cpu_has(feature.f); } -static __always_inline uint64_t this_cpu_supported_xcr0(void) +static __always_inline u64 this_cpu_supported_xcr0(void) { if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) return 0; return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | - ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); + ((u64)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); } typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -867,7 +867,7 @@ static inline void cpu_relax(void) static inline void udelay(unsigned long usec) { - uint64_t start, now, cycles; + u64 start, now, cycles; GUEST_ASSERT(guest_tsc_khz); cycles = guest_tsc_khz / 1000 * usec; @@ -899,7 +899,7 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state); const struct kvm_msr_list *kvm_get_msr_index_list(void); const struct kvm_msr_list *kvm_get_feature_msr_index_list(void); bool kvm_msr_is_in_save_restore_list(uint32_t msr_index); -uint64_t kvm_get_feature_msr(uint64_t msr_index); +u64 kvm_get_feature_msr(u64 msr_index); static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs) @@ -1022,13 +1022,13 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) return nr_bits > feature.f.bit || kvm_cpu_has(feature.f); } -static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +static __always_inline u64 kvm_cpu_supported_xcr0(void) { if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) return 0; return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | - ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); + ((u64)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); } static inline size_t kvm_cpuid2_size(int nr_entries) @@ -1135,8 +1135,8 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); } -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); +u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index); +int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value); /* * Assert on an MSR access(es) and pretty print the MSR name when possible. @@ -1168,7 +1168,7 @@ static inline bool is_durable_msr(uint32_t msr) #define vcpu_set_msr(vcpu, msr, val) \ do { \ - uint64_t r, v = val; \ + u64 r, v = val; \ \ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \ @@ -1182,15 +1182,15 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); void kvm_init_vm_address_properties(struct kvm_vm *vm); struct ex_regs { - uint64_t rax, rcx, rdx, rbx; - uint64_t rbp, rsi, rdi; - uint64_t r8, r9, r10, r11; - uint64_t r12, r13, r14, r15; - uint64_t vector; - uint64_t error_code; - uint64_t rip; - uint64_t cs; - uint64_t rflags; + u64 rax, rcx, rdx, rbx; + u64 rbp, rsi, rdi; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 vector; + u64 error_code; + u64 rip; + u64 cs; + u64 rflags; }; struct idt_entry { @@ -1262,7 +1262,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe(insn, inputs...) \ ({ \ - uint64_t ign_error_code; \ + u64 ign_error_code; \ uint8_t vector; \ \ asm volatile(KVM_ASM_SAFE(insn) \ @@ -1285,7 +1285,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_fep(insn, inputs...) \ ({ \ - uint64_t ign_error_code; \ + u64 ign_error_code; \ uint8_t vector; \ \ asm volatile(KVM_ASM_SAFE_FEP(insn) \ @@ -1307,9 +1307,9 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, }) #define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ -static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ +static inline uint8_t insn##_safe ##_fep(uint32_t idx, u64 *val) \ { \ - uint64_t error_code; \ + u64 error_code; \ uint8_t vector; \ uint32_t a, d; \ \ @@ -1319,7 +1319,7 @@ static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ : "c"(idx) \ : KVM_ASM_SAFE_CLOBBERS); \ \ - *val = (uint64_t)a | ((uint64_t)d << 32); \ + *val = (u64)a | ((u64)d << 32); \ return vector; \ } @@ -1335,12 +1335,12 @@ BUILD_READ_U64_SAFE_HELPERS(rdmsr) BUILD_READ_U64_SAFE_HELPERS(rdpmc) BUILD_READ_U64_SAFE_HELPERS(xgetbv) -static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) +static inline uint8_t wrmsr_safe(uint32_t msr, u64 val) { return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); } -static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) +static inline uint8_t xsetbv_safe(uint32_t index, u64 value) { u32 eax = value; u32 edx = value >> 32; @@ -1395,23 +1395,20 @@ static inline bool kvm_is_lbrv_enabled(void) return !!get_kvm_amd_param_integer("lbrv"); } -uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr); +u64 *vm_get_pte(struct kvm_vm *vm, u64 vaddr); -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3); -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); +u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3); +u64 __xen_hypercall(u64 nr, u64 a0, void *a1); +void xen_hypercall(u64 nr, u64 a0, void *a1); -static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, - uint64_t size, uint64_t flags) +static inline u64 __kvm_hypercall_map_gpa_range(u64 gpa, u64 size, u64 flags) { return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); } -static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, - uint64_t flags) +static inline void kvm_hypercall_map_gpa_range(u64 gpa, u64 size, u64 flags) { - uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); + u64 ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); GUEST_ASSERT(!ret); } @@ -1456,7 +1453,7 @@ static inline void cli(void) asm volatile ("cli"); } -void __vm_xsave_require_permission(uint64_t xfeature, const char *name); +void __vm_xsave_require_permission(u64 xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ __vm_xsave_require_permission(xfeature, #xfeature) @@ -1511,17 +1508,17 @@ enum pg_level { void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, struct pte_masks *pte_masks); -void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, - uint64_t paddr, int level); -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level); +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, + u64 paddr, int level); +void virt_map_level(struct kvm_vm *vm, u64 vaddr, u64 paddr, + u64 nr_bytes, int level); void vm_enable_tdp(struct kvm_vm *vm); bool kvm_cpu_has_tdp(void); -void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size); +void tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, u64 size); void tdp_identity_map_default_memslots(struct kvm_vm *vm); -void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size); -uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa); +void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size); +u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa); /* * Basic CPU control in CR0 diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 289ff5b3f10c..f65e3361c7c0 100644 --- a/tools/testing/selftests/kvm/include/x86/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -49,13 +49,13 @@ static inline bool is_sev_vm(struct kvm_vm *vm) void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); -void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy); +void snp_vm_launch_start(struct kvm_vm *vm, u64 policy); void snp_vm_launch_update(struct kvm_vm *vm); void snp_vm_launch_finish(struct kvm_vm *vm); struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu); -void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement); +void vm_sev_launch(struct kvm_vm *vm, u64 policy, uint8_t *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); @@ -85,7 +85,7 @@ static inline u64 snp_default_policy(void) unsigned long raw; \ } sev_cmd = { .c = { \ .id = (cmd), \ - .data = (uint64_t)(arg), \ + .data = (u64)(arg), \ .sev_fd = (vm)->arch.sev_fd, \ } }; \ \ @@ -121,7 +121,7 @@ static inline void sev_register_encrypted_memory(struct kvm_vm *vm, } static inline void sev_launch_update_data(struct kvm_vm *vm, gpa_t gpa, - uint64_t size) + u64 size) { struct kvm_sev_launch_update_data update_data = { .uaddr = (unsigned long)addr_gpa2hva(vm, gpa), @@ -132,7 +132,7 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, gpa_t gpa, } static inline void snp_launch_update_data(struct kvm_vm *vm, gpa_t gpa, - uint64_t hva, uint64_t size, uint8_t type) + u64 hva, u64 size, uint8_t type) { struct kvm_sev_snp_launch_update update_data = { .uaddr = hva, diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h index 19337c34f13e..2d1afa09819b 100644 --- a/tools/testing/selftests/kvm/include/x86/smm.h +++ b/tools/testing/selftests/kvm/include/x86/smm.h @@ -8,8 +8,7 @@ #define SMRAM_MEMSLOT ((1 << 16) | 1) #define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) -void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t smram_gpa, +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa, const void *smi_handler, size_t handler_size); void inject_smi(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index a25b83e2c233..6c013eb838be 100644 --- a/tools/testing/selftests/kvm/include/x86/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -16,20 +16,20 @@ struct svm_test_data { /* VMCB */ struct vmcb *vmcb; /* gva */ void *vmcb_hva; - uint64_t vmcb_gpa; + u64 vmcb_gpa; /* host state-save area */ struct vmcb_save_area *save_area; /* gva */ void *save_area_hva; - uint64_t save_area_gpa; + u64 save_area_gpa; /* MSR-Bitmap */ void *msr; /* gva */ void *msr_hva; - uint64_t msr_gpa; + u64 msr_gpa; /* NPT */ - uint64_t ncr3_gpa; + u64 ncr3_gpa; }; static inline void vmmcall(void) @@ -58,7 +58,7 @@ static inline void vmmcall(void) struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva); void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); +void run_guest(struct vmcb *vmcb, u64 vmcb_gpa); static inline bool kvm_cpu_has_npt(void) { diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index f194723da3d0..5e9810fb9d20 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -287,12 +287,12 @@ enum vmcs_field { struct vmx_msr_entry { uint32_t index; uint32_t reserved; - uint64_t value; + u64 value; } __attribute__ ((aligned(16))); #include "evmcs.h" -static inline int vmxon(uint64_t phys) +static inline int vmxon(u64 phys) { uint8_t ret; @@ -309,7 +309,7 @@ static inline void vmxoff(void) __asm__ __volatile__("vmxoff"); } -static inline int vmclear(uint64_t vmcs_pa) +static inline int vmclear(u64 vmcs_pa) { uint8_t ret; @@ -321,7 +321,7 @@ static inline int vmclear(uint64_t vmcs_pa) return ret; } -static inline int vmptrld(uint64_t vmcs_pa) +static inline int vmptrld(u64 vmcs_pa) { uint8_t ret; @@ -336,9 +336,9 @@ static inline int vmptrld(uint64_t vmcs_pa) return ret; } -static inline int vmptrst(uint64_t *value) +static inline int vmptrst(u64 *value) { - uint64_t tmp; + u64 tmp; uint8_t ret; if (enable_evmcs) @@ -356,9 +356,9 @@ static inline int vmptrst(uint64_t *value) * A wrapper around vmptrst that ignores errors and returns zero if the * vmptrst instruction fails. */ -static inline uint64_t vmptrstz(void) +static inline u64 vmptrstz(void) { - uint64_t value = 0; + u64 value = 0; vmptrst(&value); return value; } @@ -391,8 +391,8 @@ static inline int vmlaunch(void) "pop %%rcx;" "pop %%rbp;" : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) + : [host_rsp]"r"((u64)HOST_RSP), + [host_rip]"r"((u64)HOST_RIP) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; @@ -426,8 +426,8 @@ static inline int vmresume(void) "pop %%rcx;" "pop %%rbp;" : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) + : [host_rsp]"r"((u64)HOST_RSP), + [host_rip]"r"((u64)HOST_RIP) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; @@ -447,9 +447,9 @@ static inline void vmcall(void) "r10", "r11", "r12", "r13", "r14", "r15"); } -static inline int vmread(uint64_t encoding, uint64_t *value) +static inline int vmread(u64 encoding, u64 *value) { - uint64_t tmp; + u64 tmp; uint8_t ret; if (enable_evmcs) @@ -468,14 +468,14 @@ static inline int vmread(uint64_t encoding, uint64_t *value) * A wrapper around vmread that ignores errors and returns zero if the * vmread instruction fails. */ -static inline uint64_t vmreadz(uint64_t encoding) +static inline u64 vmreadz(u64 encoding) { - uint64_t value = 0; + u64 value = 0; vmread(encoding, &value); return value; } -static inline int vmwrite(uint64_t encoding, uint64_t value) +static inline int vmwrite(u64 encoding, u64 value) { uint8_t ret; @@ -497,34 +497,34 @@ static inline uint32_t vmcs_revision(void) struct vmx_pages { void *vmxon_hva; - uint64_t vmxon_gpa; + u64 vmxon_gpa; void *vmxon; void *vmcs_hva; - uint64_t vmcs_gpa; + u64 vmcs_gpa; void *vmcs; void *msr_hva; - uint64_t msr_gpa; + u64 msr_gpa; void *msr; void *shadow_vmcs_hva; - uint64_t shadow_vmcs_gpa; + u64 shadow_vmcs_gpa; void *shadow_vmcs; void *vmread_hva; - uint64_t vmread_gpa; + u64 vmread_gpa; void *vmread; void *vmwrite_hva; - uint64_t vmwrite_gpa; + u64 vmwrite_gpa; void *vmwrite; void *apic_access_hva; - uint64_t apic_access_gpa; + u64 apic_access_gpa; void *apic_access; - uint64_t eptp_gpa; + u64 eptp_gpa; }; union vmx_basic { diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index e8a60d5ccbe6..fd3e0d03e370 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -46,12 +46,12 @@ static const char * const test_stage_string[] = { struct test_args { struct kvm_vm *vm; - uint64_t guest_test_virt_mem; - uint64_t host_page_size; - uint64_t host_num_pages; - uint64_t large_page_size; - uint64_t large_num_pages; - uint64_t host_pages_per_lpage; + u64 guest_test_virt_mem; + u64 host_page_size; + u64 host_num_pages; + u64 large_page_size; + u64 large_num_pages; + u64 host_pages_per_lpage; enum vm_mem_backing_src_type src_type; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; }; @@ -77,19 +77,19 @@ static sem_t test_stage_completed; * This will be set to the topmost valid physical address minus * the test memory size. */ -static uint64_t guest_test_phys_mem; +static u64 guest_test_phys_mem; /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static void guest_code(bool do_write) { struct test_args *p = &test_args; enum test_stage *current_stage = &guest_test_stage; - uint64_t addr; + u64 addr; int i, j; while (true) { @@ -113,9 +113,9 @@ static void guest_code(bool do_write) case KVM_CREATE_MAPPINGS: for (i = 0; i < p->large_num_pages; i++) { if (do_write) - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; else - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); addr += p->large_page_size; } @@ -131,7 +131,7 @@ static void guest_code(bool do_write) case KVM_UPDATE_MAPPINGS: if (p->src_type == VM_MEM_SRC_ANONYMOUS) { for (i = 0; i < p->host_num_pages; i++) { - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; addr += p->host_page_size; } break; @@ -142,7 +142,7 @@ static void guest_code(bool do_write) * Write to the first host page in each large * page region, and triger break of large pages. */ - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; /* * Access the middle host pages in each large @@ -152,7 +152,7 @@ static void guest_code(bool do_write) */ addr += p->large_page_size / 2; for (j = 0; j < p->host_pages_per_lpage / 2; j++) { - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); addr += p->host_page_size; } } @@ -167,7 +167,7 @@ static void guest_code(bool do_write) */ case KVM_ADJUST_MAPPINGS: for (i = 0; i < p->host_num_pages; i++) { - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); addr += p->host_page_size; } break; @@ -227,8 +227,8 @@ static void *vcpu_worker(void *data) } struct test_params { - uint64_t phys_offset; - uint64_t test_mem_size; + u64 phys_offset; + u64 test_mem_size; enum vm_mem_backing_src_type src_type; }; @@ -237,12 +237,12 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) int ret; struct test_params *p = arg; enum vm_mem_backing_src_type src_type = p->src_type; - uint64_t large_page_size = get_backing_src_pagesz(src_type); - uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; - uint64_t host_page_size = getpagesize(); - uint64_t test_mem_size = p->test_mem_size; - uint64_t guest_num_pages; - uint64_t alignment; + u64 large_page_size = get_backing_src_pagesz(src_type); + u64 guest_page_size = vm_guest_mode_params[mode].page_size; + u64 host_page_size = getpagesize(); + u64 test_mem_size = p->test_mem_size; + u64 guest_num_pages; + u64 alignment; void *host_test_mem; struct kvm_vm *vm; @@ -304,7 +304,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) pr_info("Guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); pr_info("Host virtual test memory offset: 0x%lx\n", - (uint64_t)host_test_mem); + (u64)host_test_mem); pr_info("Number of testing vCPUs: %d\n", nr_vcpus); return vm; diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index b023868fe0b8..3b0aa4eff557 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -73,7 +73,7 @@ void gic_irq_disable(unsigned int intid) unsigned int gic_get_and_ack_irq(void) { - uint64_t irqstat; + u64 irqstat; unsigned int intid; GUEST_ASSERT(gic_common_ops); @@ -102,7 +102,7 @@ void gic_set_eoi_split(bool split) gic_common_ops->gic_set_eoi_split(split); } -void gic_set_priority_mask(uint64_t pmr) +void gic_set_priority_mask(u64 pmr) { GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_set_priority_mask(pmr); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index b6a7e30c3eb1..4ccc72533b35 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -12,11 +12,11 @@ struct gic_common_ops { void (*gic_cpu_init)(unsigned int cpu); void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); - uint64_t (*gic_read_iar)(void); + u64 (*gic_read_iar)(void); void (*gic_write_eoir)(uint32_t irq); void (*gic_write_dir)(uint32_t irq); void (*gic_set_eoi_split)(bool split); - void (*gic_set_priority_mask)(uint64_t mask); + void (*gic_set_priority_mask)(u64 mask); void (*gic_set_priority)(uint32_t intid, uint32_t prio); void (*gic_irq_set_active)(uint32_t intid); void (*gic_irq_clear_active)(uint32_t intid); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index ae3959f3bb11..cdd29245c84e 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -91,9 +91,9 @@ static enum gicv3_intid_range get_intid_range(unsigned int intid) return INVALID_RANGE; } -static uint64_t gicv3_read_iar(void) +static u64 gicv3_read_iar(void) { - uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); + u64 irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); dsb(sy); return irqstat; @@ -111,7 +111,7 @@ static void gicv3_write_dir(uint32_t irq) isb(); } -static void gicv3_set_priority_mask(uint64_t mask) +static void gicv3_set_priority_mask(u64 mask) { write_sysreg_s(mask, SYS_ICC_PMR_EL1); } @@ -129,27 +129,27 @@ static void gicv3_set_eoi_split(bool split) isb(); } -uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) +uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, u64 offset) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } -void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) +void gicv3_reg_writel(uint32_t cpu_or_dist, u64 offset, uint32_t reg_val) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } -uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) +uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, u64 offset, uint32_t mask) { return gicv3_reg_readl(cpu_or_dist, offset) & mask; } -void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, - uint32_t mask, uint32_t reg_val) +void gicv3_setl_fields(uint32_t cpu_or_dist, u64 offset, + uint32_t mask, uint32_t reg_val) { uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; @@ -165,9 +165,9 @@ void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being * marked as "Reserved" in the Distributor map. */ -static void gicv3_access_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, - bool write, uint32_t *val) +static void gicv3_access_reg(uint32_t intid, u64 offset, + uint32_t reg_bits, uint32_t bits_per_field, + bool write, uint32_t *val) { uint32_t cpu = guest_get_vcpuid(); enum gicv3_intid_range intid_range = get_intid_range(intid); @@ -197,15 +197,15 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset, *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; } -static void gicv3_write_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +static void gicv3_write_reg(uint32_t intid, u64 offset, + uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) { gicv3_access_reg(intid, offset, reg_bits, bits_per_field, true, &val); } -static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field) +static uint32_t gicv3_read_reg(uint32_t intid, u64 offset, + uint32_t reg_bits, uint32_t bits_per_field) { uint32_t val; diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 0e8603788134..159368036325 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -21,18 +21,18 @@ static gva_t exception_handlers; -static uint64_t pgd_index(struct kvm_vm *vm, gva_t gva) +static u64 pgd_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; + u64 mask = (1UL << (vm->va_bits - shift)) - 1; return (gva >> shift) & mask; } -static uint64_t pud_index(struct kvm_vm *vm, gva_t gva) +static u64 pud_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; TEST_ASSERT(vm->mmu.pgtable_levels == 4, "Mode %d does not have 4 page table levels", vm->mode); @@ -40,10 +40,10 @@ static uint64_t pud_index(struct kvm_vm *vm, gva_t gva) return (gva >> shift) & mask; } -static uint64_t pmd_index(struct kvm_vm *vm, gva_t gva) +static u64 pmd_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; TEST_ASSERT(vm->mmu.pgtable_levels >= 3, "Mode %d does not have >= 3 page table levels", vm->mode); @@ -51,9 +51,9 @@ static uint64_t pmd_index(struct kvm_vm *vm, gva_t gva) return (gva >> shift) & mask; } -static uint64_t pte_index(struct kvm_vm *vm, gva_t gva) +static u64 pte_index(struct kvm_vm *vm, gva_t gva) { - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; return (gva >> vm->page_shift) & mask; } @@ -63,9 +63,9 @@ static inline bool use_lpa2_pte_format(struct kvm_vm *vm) (vm->pa_bits > 48 || vm->va_bits > 48); } -static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) +static u64 addr_pte(struct kvm_vm *vm, u64 pa, u64 attrs) { - uint64_t pte; + u64 pte; if (use_lpa2_pte_format(vm)) { pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); @@ -81,9 +81,9 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) return pte; } -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) +static u64 pte_addr(struct kvm_vm *vm, u64 pte) { - uint64_t pa; + u64 pa; if (use_lpa2_pte_format(vm)) { pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); @@ -97,13 +97,13 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) return pa; } -static uint64_t ptrs_per_pgd(struct kvm_vm *vm) +static u64 ptrs_per_pgd(struct kvm_vm *vm) { unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; return 1 << (vm->va_bits - shift); } -static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) +static u64 __maybe_unused ptrs_per_pte(struct kvm_vm *vm) { return 1 << (vm->page_shift - 3); } @@ -121,12 +121,12 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t flags) +static void _virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, + u64 flags) { uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); - uint64_t pg_attr; - uint64_t *ptep; + u64 pg_attr; + u64 *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, "Virtual address not on page boundary,\n" @@ -174,16 +174,16 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, *ptep = addr_pte(vm, paddr, pg_attr); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) { - uint64_t attr_idx = MT_NORMAL; + u64 attr_idx = MT_NORMAL; _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) +u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) { - uint64_t *ptep; + u64 *ptep; if (!vm->mmu.pgd_created) goto unmapped_gva; @@ -225,23 +225,23 @@ unmapped_gva: exit(EXIT_FAILURE); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva) +u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva) { return virt_get_pte_hva_at_level(vm, gva, 3); } gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep = virt_get_pte_hva(vm, gva); + u64 *ptep = virt_get_pte_hva(vm, gva); return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, u64 page, int level) { #ifdef DEBUG static const char * const type[] = { "", "pud", "pmd", "pte" }; - uint64_t pte, *ptep; + u64 pte, *ptep; if (level == 4) return; @@ -259,7 +259,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { int level = 4 - (vm->mmu.pgtable_levels - 1); - uint64_t pgd, *ptep; + u64 pgd, *ptep; if (!vm->mmu.pgd_created) return; @@ -298,7 +298,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; - uint64_t sctlr_el1, tcr_el1, ttbr0_el1; + u64 sctlr_el1, tcr_el1, ttbr0_el1; if (!init) { kvm_get_default_vcpu_target(vm, &default_init); @@ -399,7 +399,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { - uint64_t pstate, pc; + u64 pstate, pc; pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); @@ -410,14 +410,14 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) { - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code); } static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init) { size_t stack_size; - uint64_t stack_vaddr; + u64 stack_vaddr; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : @@ -459,13 +459,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) for (i = 0; i < num; i++) { vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]), - va_arg(ap, uint64_t)); + va_arg(ap, u64)); } va_end(ap); } -void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +void kvm_exit_unexpected_exception(int vector, u64 ec, bool valid_ec) { ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); while (1) @@ -498,7 +498,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (u64)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -584,11 +584,11 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; - uint64_t val; + u64 val; uint32_t gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), - .addr = (uint64_t)&val, + .addr = (u64)&val, }; kvm_fd = open_kvm_dev_path_or_exit(); @@ -646,17 +646,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) +void smccc_hvc(uint32_t function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res) { __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, arg6, res); } -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) +void smccc_smc(uint32_t function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res) { __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, arg6, res); diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index 5f85fa7a9449..8257dc4ae106 100644 --- a/tools/testing/selftests/kvm/lib/arm64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -25,9 +25,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { - TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64), "Unexpected ucall exit mmio address access"); - return (void *)(*((uint64_t *)run->mmio.data)); + return (void *)(*((u64 *)run->mmio.data)); } return NULL; diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index d0f7bd0984b8..166637d6abf5 100644 --- a/tools/testing/selftests/kvm/lib/arm64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -44,7 +44,7 @@ bool kvm_supports_vgic_v3(void) int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; - uint64_t attr; + u64 attr; unsigned int nr_gic_pages; /* Distributor setup */ @@ -106,9 +106,9 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) /* should only work for level sensitive interrupts */ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) { - uint64_t attr = 32 * (intid / 32); - uint64_t index = intid % 32; - uint64_t val; + u64 attr = 32 * (intid / 32); + u64 index = intid % 32; + u64 val; int ret; ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, @@ -152,12 +152,12 @@ void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) } static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, - uint64_t reg_off) + u64 reg_off) { - uint64_t reg = intid / 32; - uint64_t index = intid % 32; - uint64_t attr = reg_off + reg * 4; - uint64_t val; + u64 reg = intid / 32; + u64 index = intid % 32; + u64 attr = reg_off + reg * 4; + u64 val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index ff90fba3a5c6..0f2710cda9d8 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -156,7 +156,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment " "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, - n1, (uint64_t) phdr.p_memsz); + n1, (u64)phdr.p_memsz); gva_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); gva_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index 74627514c4d4..a3a44657e72d 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -35,8 +35,8 @@ static int skip_atoi(const char **s) ({ \ int __res; \ \ - __res = ((uint64_t) n) % (uint32_t) base; \ - n = ((uint64_t) n) / (uint32_t) base; \ + __res = ((u64)n) % (uint32_t) base; \ + n = ((u64)n) / (uint32_t) base; \ __res; \ }) @@ -119,7 +119,7 @@ int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) { char *str, *end; const char *s; - uint64_t num; + u64 num; int i, base; int len; @@ -240,7 +240,7 @@ repeat: flags |= SPECIAL | SMALL | ZEROPAD; } str = number(str, end, - (uint64_t)va_arg(args, void *), 16, + (u64)va_arg(args, void *), 16, field_width, precision, flags); continue; @@ -284,7 +284,7 @@ repeat: continue; } if (qualifier == 'l') - num = va_arg(args, uint64_t); + num = va_arg(args, u64); else if (qualifier == 'h') { num = (uint16_t)va_arg(args, int); if (flags & SIGN) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 89c4e6f01739..d97bf1141a55 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -396,12 +396,12 @@ struct kvm_vm *____vm_create(struct vm_shape shape) return vm; } -static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, - uint32_t nr_runnable_vcpus, - uint64_t extra_mem_pages) +static u64 vm_nr_pages_required(enum vm_guest_mode mode, + uint32_t nr_runnable_vcpus, + u64 extra_mem_pages) { - uint64_t page_size = vm_guest_mode_params[mode].page_size; - uint64_t nr_pages; + u64 page_size = vm_guest_mode_params[mode].page_size; + u64 nr_pages; TEST_ASSERT(nr_runnable_vcpus, "Use vm_create_barebones() for VMs that _never_ have vCPUs"); @@ -477,9 +477,9 @@ static bool is_guest_memfd_required(struct vm_shape shape) } struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages) + u64 nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, + u64 nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; @@ -547,7 +547,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, * no real memory allocation for non-slot0 memory in this function. */ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { struct kvm_vm *vm; @@ -566,7 +566,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code) { struct kvm_vcpu *vcpus[1]; @@ -715,15 +715,15 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], * region exists. */ static struct userspace_mem_region * -userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) +userspace_mem_region_find(struct kvm_vm *vm, u64 start, u64 end) { struct rb_node *node; for (node = vm->regions.gpa_tree.rb_node; node; ) { struct userspace_mem_region *region = container_of(node, struct userspace_mem_region, gpa_node); - uint64_t existing_start = region->region.guest_phys_addr; - uint64_t existing_end = region->region.guest_phys_addr + u64 existing_start = region->region.guest_phys_addr; + u64 existing_end = region->region.guest_phys_addr + region->region.memory_size - 1; if (start <= existing_end && end >= existing_start) return region; @@ -919,7 +919,7 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva) + u64 gpa, u64 size, void *hva) { struct kvm_userspace_memory_region region = { .slot = slot, @@ -933,7 +933,7 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags } void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva) + u64 gpa, u64 size, void *hva) { int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); @@ -946,8 +946,8 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset) + u64 gpa, u64 size, void *hva, + uint32_t guest_memfd, u64 guest_memfd_offset) { struct kvm_userspace_memory_region2 region = { .slot = slot, @@ -965,8 +965,8 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag } void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset) + u64 gpa, u64 size, void *hva, + uint32_t guest_memfd, u64 guest_memfd_offset) { int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, guest_memfd, guest_memfd_offset); @@ -978,8 +978,8 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, - int guest_memfd, uint64_t guest_memfd_offset) + u64 gpa, uint32_t slot, u64 npages, uint32_t flags, + int guest_memfd, u64 guest_memfd_offset) { int ret; struct userspace_mem_region *region; @@ -1016,8 +1016,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n" " existing gpa: 0x%lx size: 0x%lx", gpa, npages, vm->page_size, - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size); + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size); /* Confirm no region with the requested slot already exists. */ hash_for_each_possible(vm->regions.slot_hash, region, slot_node, @@ -1030,8 +1030,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", slot, gpa, npages, region->region.slot, - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size); + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size); } /* Allocate and initialize new mem region structure. */ @@ -1141,7 +1141,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t gpa, uint32_t slot, uint64_t npages, + u64 gpa, uint32_t slot, u64 npages, uint32_t flags) { vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); @@ -1234,7 +1234,7 @@ void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) * * Change the gpa of a memory region. */ -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, u64 new_gpa) { struct userspace_mem_region *region; int ret; @@ -1273,18 +1273,18 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) __vm_mem_region_delete(vm, region); } -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, +void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 base, u64 size, bool punch_hole) { const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); struct userspace_mem_region *region; - uint64_t end = base + size; - uint64_t gpa, len; + u64 end = base + size; + u64 gpa, len; off_t fd_offset; int ret; for (gpa = base; gpa < end; gpa += len) { - uint64_t offset; + u64 offset; region = userspace_mem_region_find(vm, gpa, gpa); TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, @@ -1292,7 +1292,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, offset = gpa - region->region.guest_phys_addr; fd_offset = region->region.guest_memfd_offset + offset; - len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); + len = min_t(u64, end - gpa, region->region.memory_size - offset); ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", @@ -1388,10 +1388,10 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) */ gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) { - uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; + u64 pages = (sz + vm->page_size - 1) >> vm->page_shift; /* Determine lowest permitted virtual page index. */ - uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; + u64 pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; if ((pgidx_start * vm->page_size) < vaddr_min) goto no_va_found; @@ -1454,7 +1454,7 @@ va_found: static gva_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, enum kvm_mem_region_type type, bool protected) { - uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); + u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); gpa_t paddr = __vm_phy_pages_alloc(vm, pages, @@ -1573,7 +1573,7 @@ gva_t vm_vaddr_alloc_page(struct kvm_vm *vm) * Within the VM given by @vm, creates a virtual translation for * @npages starting at @vaddr to the page range starting at @paddr. */ -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, +void virt_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, unsigned int npages) { size_t page_size = vm->page_size; @@ -1807,7 +1807,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) * Device Ioctl */ -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +int __kvm_has_device_attr(int dev_fd, uint32_t group, u64 attr) { struct kvm_device_attr attribute = { .group = group, @@ -1818,7 +1818,7 @@ int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); } -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) +int __kvm_test_create_device(struct kvm_vm *vm, u64 type) { struct kvm_create_device create_dev = { .type = type, @@ -1828,7 +1828,7 @@ int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); } -int __kvm_create_device(struct kvm_vm *vm, uint64_t type) +int __kvm_create_device(struct kvm_vm *vm, u64 type) { struct kvm_create_device create_dev = { .type = type, @@ -1842,7 +1842,7 @@ int __kvm_create_device(struct kvm_vm *vm, uint64_t type) return err ? : create_dev.fd; } -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) +int __kvm_device_attr_get(int dev_fd, uint32_t group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1854,7 +1854,7 @@ int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) +int __kvm_device_attr_set(int dev_fd, uint32_t group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1965,8 +1965,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " "host_virt: %p\n", indent + 2, "", - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size, region->host_mem); fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); sparsebit_dump(stream, region->unused_phy_pages, 0); @@ -2254,7 +2254,7 @@ struct kvm_stats_desc *read_stats_descriptors(int stats_fd, * Read the data values of a specified stat from the binary stats interface. */ void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, + struct kvm_stats_desc *desc, u64 *data, size_t max_elements) { size_t nr_elements = min_t(ssize_t, desc->size, max_elements); @@ -2275,7 +2275,7 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, } void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, - uint64_t *data, size_t max_elements) + u64 *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 28a384e9704f..989473b3da7b 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -15,29 +15,29 @@ static gpa_t invalid_pgtable[4]; static gva_t exception_handlers; -static uint64_t virt_pte_index(struct kvm_vm *vm, gva_t gva, int level) +static u64 virt_pte_index(struct kvm_vm *vm, gva_t gva, int level) { unsigned int shift; - uint64_t mask; + u64 mask; shift = level * (vm->page_shift - 3) + vm->page_shift; mask = (1UL << (vm->page_shift - 3)) - 1; return (gva >> shift) & mask; } -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +static u64 pte_addr(struct kvm_vm *vm, u64 entry) { return entry & ~((0x1UL << vm->page_shift) - 1); } -static uint64_t ptrs_per_pte(struct kvm_vm *vm) +static u64 ptrs_per_pte(struct kvm_vm *vm) { return 1 << (vm->page_shift - 3); } static void virt_set_pgtable(struct kvm_vm *vm, gpa_t table, gpa_t child) { - uint64_t *ptep; + u64 *ptep; int i, ptrs_per_pte; ptep = addr_gpa2hva(vm, table); @@ -67,15 +67,15 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -static int virt_pte_none(uint64_t *ptep, int level) +static int virt_pte_none(u64 *ptep, int level) { return *ptep == invalid_pgtable[level]; } -static uint64_t *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc) +static u64 *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc) { int level; - uint64_t *ptep; + u64 *ptep; gpa_t child; if (!vm->mmu.pgd_created) @@ -108,7 +108,7 @@ unmapped_gva: gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep; + u64 *ptep; ptep = virt_populate_pte(vm, gva, 0); TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva); @@ -116,10 +116,10 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) { uint32_t prot_bits; - uint64_t *ptep; + u64 *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, "Virtual address not on page boundary,\n" @@ -140,9 +140,9 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) WRITE_ONCE(*ptep, paddr | prot_bits); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, u64 page, int level) { - uint64_t pte, *ptep; + u64 pte, *ptep; static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; if (level < 0) @@ -241,36 +241,36 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) va_start(ap, num); for (i = 0; i < num; i++) - regs.gpr[i + 4] = va_arg(ap, uint64_t); + regs.gpr[i + 4] = va_arg(ap, u64); va_end(ap); vcpu_regs_set(vcpu, ®s); } -static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +static void loongarch_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) { __vcpu_set_reg(vcpu, id, val); } -static void loongarch_set_cpucfg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +static void loongarch_set_cpucfg(struct kvm_vcpu *vcpu, u64 id, u64 val) { - uint64_t cfgid; + u64 cfgid; cfgid = KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | 8 * id; __vcpu_set_reg(vcpu, cfgid, val); } -static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static void loongarch_get_csr(struct kvm_vcpu *vcpu, u64 id, void *addr) { - uint64_t csrid; + u64 csrid; csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; __vcpu_get_reg(vcpu, csrid, addr); } -static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +static void loongarch_set_csr(struct kvm_vcpu *vcpu, u64 id, u64 val) { - uint64_t csrid; + u64 csrid; csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; __vcpu_set_reg(vcpu, csrid, val); @@ -372,7 +372,7 @@ void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { size_t stack_size; - uint64_t stack_vaddr; + u64 stack_vaddr; struct kvm_regs regs; struct kvm_vcpu *vcpu; @@ -397,6 +397,6 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) /* Setup guest PC register */ vcpu_regs_get(vcpu, ®s); - regs.pc = (uint64_t)guest_code; + regs.pc = (u64)guest_code; vcpu_regs_set(vcpu, ®s); } diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c index 2c8abe9f5382..eb9f714a535c 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/ucall.c +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -28,10 +28,10 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { - TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64), "Unexpected ucall exit mmio address access"); - return (void *)(*((uint64_t *)run->mmio.data)); + return (void *)(*((u64 *)run->mmio.data)); } return NULL; diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index b7bfeade85f7..5fcae17f687e 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -16,7 +16,7 @@ struct memstress_args memstress_args; * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; struct vcpu_thread { /* The index of the vCPU. */ @@ -49,10 +49,10 @@ void memstress_guest_code(uint32_t vcpu_idx) struct memstress_args *args = &memstress_args; struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; struct guest_random_state rand_state; - uint64_t gva; - uint64_t pages; - uint64_t addr; - uint64_t page; + u64 gva; + u64 pages; + u64 addr; + u64 page; int i; rand_state = new_guest_random_state(guest_random_seed + vcpu_idx); @@ -76,9 +76,9 @@ void memstress_guest_code(uint32_t vcpu_idx) addr = gva + (page * args->guest_page_size); if (__guest_random_bool(&rand_state, args->write_percent)) - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; else - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); } GUEST_SYNC(1); @@ -87,7 +87,7 @@ void memstress_guest_code(uint32_t vcpu_idx) void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], - uint64_t vcpu_memory_bytes, + u64 vcpu_memory_bytes, bool partition_vcpu_memory_access) { struct memstress_args *args = &memstress_args; @@ -122,15 +122,15 @@ void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, } struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, + u64 vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { struct memstress_args *args = &memstress_args; struct kvm_vm *vm; - uint64_t guest_num_pages, slot0_pages = 0; - uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); - uint64_t region_end_gfn; + u64 guest_num_pages, slot0_pages = 0; + u64 backing_src_pagesz = get_backing_src_pagesz(backing_src); + u64 region_end_gfn; int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); @@ -202,7 +202,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { - uint64_t region_pages = guest_num_pages / slots; + u64 region_pages = guest_num_pages / slots; gpa_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, @@ -244,7 +244,7 @@ void memstress_set_random_access(struct kvm_vm *vm, bool random_access) sync_global_to_guest(vm, memstress_args.random_access); } -uint64_t __weak memstress_nested_pages(int nr_vcpus) +u64 __weak memstress_nested_pages(int nr_vcpus) { return 0; } @@ -349,7 +349,7 @@ void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int sl } void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], - int slots, uint64_t pages_per_slot) + int slots, u64 pages_per_slot) { int i; @@ -360,7 +360,7 @@ void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], } } -unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot) +unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot) { unsigned long **bitmaps; int i; diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 25749439fdbf..a1f5c8660952 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -17,7 +17,7 @@ static gva_t exception_handlers; -bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext) { unsigned long value = 0; int ret; @@ -27,18 +27,18 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) return !ret && !!value; } -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +static u64 pte_addr(struct kvm_vm *vm, u64 entry) { return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << PGTBL_PAGE_SIZE_SHIFT; } -static uint64_t ptrs_per_pte(struct kvm_vm *vm) +static u64 ptrs_per_pte(struct kvm_vm *vm) { - return PGTBL_PAGE_SIZE / sizeof(uint64_t); + return PGTBL_PAGE_SIZE / sizeof(u64); } -static uint64_t pte_index_mask[] = { +static u64 pte_index_mask[] = { PGTBL_L0_INDEX_MASK, PGTBL_L1_INDEX_MASK, PGTBL_L2_INDEX_MASK, @@ -52,7 +52,7 @@ static uint32_t pte_index_shift[] = { PGTBL_L3_INDEX_SHIFT, }; -static uint64_t pte_index(struct kvm_vm *vm, gva_t gva, int level) +static u64 pte_index(struct kvm_vm *vm, gva_t gva, int level) { TEST_ASSERT(level > -1, "Negative page table level (%d) not possible", level); @@ -75,9 +75,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) { - uint64_t *ptep, next_ppn; + u64 *ptep, next_ppn; int level = vm->mmu.pgtable_levels - 1; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -121,7 +121,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep; + u64 *ptep; int level = vm->mmu.pgtable_levels - 1; if (!vm->mmu.pgd_created) @@ -149,11 +149,11 @@ unmapped_gva: } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t page, int level) + u64 page, int level) { #ifdef DEBUG static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; - uint64_t pte, *ptep; + u64 pte, *ptep; if (level < 0) return; @@ -174,7 +174,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { struct kvm_mmu *mmu = &vm->mmu; int level = mmu->pgtable_levels - 1; - uint64_t pgd, *ptep; + u64 pgd, *ptep; if (!mmu->pgd_created) return; @@ -358,7 +358,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; - uint64_t id = RISCV_CORE_REG(regs.a0); + u64 id = RISCV_CORE_REG(regs.a0); int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" @@ -393,7 +393,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) id = RISCV_CORE_REG(regs.a7); break; } - vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); + vcpu_set_reg(vcpu, id, va_arg(ap, u64)); } va_end(ap); @@ -544,10 +544,10 @@ void kvm_selftest_arch_init(void) unsigned long riscv64_get_satp_mode(void) { int kvm_fd, vm_fd, vcpu_fd, err; - uint64_t val; + u64 val; struct kvm_one_reg reg = { .id = RISCV_CONFIG_REG(satp_mode), - .addr = (uint64_t)&val, + .addr = (u64)&val, }; kvm_fd = open_kvm_dev_path_or_exit(); diff --git a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..f5480473f192 100644 --- a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c @@ -13,7 +13,7 @@ static void guest_code(void) { - uint64_t diag318_info = 0x12345678; + u64 diag318_info = 0x12345678; asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); } @@ -23,13 +23,13 @@ static void guest_code(void) * we create an ad-hoc VM here to handle the instruction then extract the * necessary data. It is up to the caller to decide what to do with that data. */ -static uint64_t diag318_handler(void) +static u64 diag318_handler(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; - uint64_t reg; - uint64_t diag318_info; + u64 reg; + u64 diag318_info; vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_run(vcpu); @@ -51,9 +51,9 @@ static uint64_t diag318_handler(void) return diag318_info; } -uint64_t get_diag318_info(void) +u64 get_diag318_info(void) { - static uint64_t diag318_info; + static u64 diag318_info; static bool printed_skip; /* diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c index d540812d911a..9a778054f07f 100644 --- a/tools/testing/selftests/kvm/lib/s390/facility.c +++ b/tools/testing/selftests/kvm/lib/s390/facility.c @@ -10,5 +10,5 @@ #include "facility.h" -uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +u64 stfl_doublewords[NB_STFL_DOUBLEWORDS]; bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 153cef5c2328..a83a2b76524b 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -34,9 +34,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) * a page table (ri == 4). Returns a suitable region/segment table entry * which points to the freshly allocated pages. */ -static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) +static u64 virt_alloc_region(struct kvm_vm *vm, int ri) { - uint64_t taddr; + u64 taddr; taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); @@ -47,10 +47,10 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) +void virt_arch_pg_map(struct kvm_vm *vm, u64 gva, u64 gpa) { int ri, idx; - uint64_t *entry; + u64 *entry; TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" @@ -89,7 +89,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int ri, idx; - uint64_t *entry; + u64 *entry; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); @@ -112,9 +112,9 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) } static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t ptea_start) + u64 ptea_start) { - uint64_t *pte, ptea; + u64 *pte, ptea; for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { pte = addr_gpa2hva(vm, ptea); @@ -126,9 +126,9 @@ static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, } static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t reg_tab_addr) + u64 reg_tab_addr) { - uint64_t addr, *entry; + u64 addr, *entry; for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { entry = addr_gpa2hva(vm, addr); @@ -163,7 +163,7 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); - uint64_t stack_vaddr; + u64 stack_vaddr; struct kvm_regs regs; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; @@ -206,7 +206,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) vcpu_regs_get(vcpu, ®s); for (i = 0; i < num; i++) - regs.gprs[i + 2] = va_arg(ap, uint64_t); + regs.gprs[i + 2] = va_arg(ap, u64); vcpu_regs_set(vcpu, ®s); va_end(ap); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index a99188f87a38..8a472dcb3d5e 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -76,8 +76,8 @@ * the use of a binary-search tree, where each node contains at least * the following members: * - * typedef uint64_t sparsebit_idx_t; - * typedef uint64_t sparsebit_num_t; + * typedef u64 sparsebit_idx_t; + * typedef u64 sparsebit_num_t; * * sparsebit_idx_t idx; * uint32_t mask; @@ -2056,9 +2056,9 @@ unsigned char get8(void) return ch; } -uint64_t get64(void) +u64 get64(void) { - uint64_t x; + u64 x; x = get8(); x = (x << 8) | get8(); @@ -2075,8 +2075,8 @@ int main(void) s = sparsebit_alloc(); for (;;) { uint8_t op = get8() & 0xf; - uint64_t first = get64(); - uint64_t last = get64(); + u64 first = get64(); + u64 last = get64(); operate(op, first, last); } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8a1848586a85..d863705f6795 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -38,7 +38,7 @@ struct guest_random_state new_guest_random_state(uint32_t seed) uint32_t guest_random_u32(struct guest_random_state *state) { - state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + state->seed = (u64)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); return state->seed; } diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 9afcae844d72..b16b5c5b3a1e 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -14,7 +14,7 @@ struct ucall_header { struct ucall ucalls[KVM_MAX_VCPUS]; }; -int ucall_nr_pages_required(uint64_t page_size) +int ucall_nr_pages_required(u64 page_size) { return align_up(sizeof(struct ucall_header), page_size) / page_size; } @@ -79,7 +79,7 @@ static void ucall_free(struct ucall *uc) clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); } -void ucall_assert(uint64_t cmd, const char *exp, const char *file, +void ucall_assert(u64 cmd, const char *exp, const char *file, unsigned int line, const char *fmt, ...) { struct ucall *uc; @@ -88,8 +88,8 @@ void ucall_assert(uint64_t cmd, const char *exp, const char *file, uc = ucall_alloc(); uc->cmd = cmd; - WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp)); - WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file)); + WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (u64)(exp)); + WRITE_ONCE(uc->args[GUEST_FILE], (u64)(file)); WRITE_ONCE(uc->args[GUEST_LINE], line); va_start(va, fmt); @@ -101,7 +101,7 @@ void ucall_assert(uint64_t cmd, const char *exp, const char *file, ucall_free(uc); } -void ucall_fmt(uint64_t cmd, const char *fmt, ...) +void ucall_fmt(u64 cmd, const char *fmt, ...) { struct ucall *uc; va_list va; @@ -118,7 +118,7 @@ void ucall_fmt(uint64_t cmd, const char *fmt, ...) ucall_free(uc); } -void ucall(uint64_t cmd, int nargs, ...) +void ucall(u64 cmd, int nargs, ...) { struct ucall *uc; va_list va; @@ -132,7 +132,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_start(va, nargs); for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); + WRITE_ONCE(uc->args[i], va_arg(va, u64)); va_end(va); ucall_arch_do_ucall((gva_t)uc->hva); @@ -140,7 +140,7 @@ void ucall(uint64_t cmd, int nargs, ...) ucall_free(uc); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct ucall ucall; void *addr; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 5bde176cedd5..2f069ce6a446 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -100,8 +100,8 @@ static void *uffd_handler_thread_fn(void *arg) } struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, - void *hva, uint64_t len, - uint64_t num_readers, + void *hva, u64 len, + u64 num_readers, uffd_handler_t handler) { struct uffd_desc *uffd_desc; @@ -109,7 +109,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, int uffd; struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + u64 expected_ioctls = ((u64)1) << _UFFDIO_COPY; int ret, i; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", @@ -132,7 +132,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, /* In order to get minor faults, prefault via the alias. */ if (is_minor) - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + expected_ioctls = ((u64)1) << _UFFDIO_CONTINUE; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); @@ -141,9 +141,9 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, uffdio_api.features = 0; TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, "ioctl UFFDIO_API failed: %" PRIu64, - (uint64_t)uffdio_api.api); + (u64)uffdio_api.api); - uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.start = (u64)hva; uffdio_register.range.len = len; uffdio_register.mode = uffd_mode; TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 89153a333e83..5182fd0d6a76 100644 --- a/tools/testing/selftests/kvm/lib/x86/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c @@ -14,7 +14,7 @@ void apic_disable(void) void xapic_enable(void) { - uint64_t val = rdmsr(MSR_IA32_APICBASE); + u64 val = rdmsr(MSR_IA32_APICBASE); /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ if (val & MSR_IA32_APICBASE_EXTD) { diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index be8b31572588..c2806bed43c9 100644 --- a/tools/testing/selftests/kvm/lib/x86/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -100,9 +100,9 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, return hv; } -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +int enable_vp_assist(u64 vp_assist_pa, void *vp_assist) { - uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index 73a82730927d..61cf952cd2dc 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -16,7 +16,7 @@ #include "svm_util.h" #include "vmx.h" -void memstress_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(u64 vcpu_id) { memstress_guest_code(vcpu_id); vmcall(); @@ -32,7 +32,7 @@ __asm__( #define L2_GUEST_STACK_SIZE 64 -static void l1_vmx_code(struct vmx_pages *vmx, uint64_t vcpu_id) +static void l1_vmx_code(struct vmx_pages *vmx, u64 vcpu_id) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; unsigned long *rsp; @@ -51,7 +51,7 @@ static void l1_vmx_code(struct vmx_pages *vmx, uint64_t vcpu_id) GUEST_DONE(); } -static void l1_svm_code(struct svm_test_data *svm, uint64_t vcpu_id) +static void l1_svm_code(struct svm_test_data *svm, u64 vcpu_id) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; unsigned long *rsp; @@ -67,7 +67,7 @@ static void l1_svm_code(struct svm_test_data *svm, uint64_t vcpu_id) } -static void memstress_l1_guest_code(void *data, uint64_t vcpu_id) +static void memstress_l1_guest_code(void *data, u64 vcpu_id) { if (this_cpu_has(X86_FEATURE_VMX)) l1_vmx_code(data, vcpu_id); @@ -75,7 +75,7 @@ static void memstress_l1_guest_code(void *data, uint64_t vcpu_id) l1_svm_code(data, vcpu_id); } -uint64_t memstress_nested_pages(int nr_vcpus) +u64 memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -87,7 +87,7 @@ uint64_t memstress_nested_pages(int nr_vcpus) static void memstress_setup_ept_mappings(struct kvm_vm *vm) { - uint64_t start, end; + u64 start, end; /* * Identity map the first 4G and the test region with 1G pages so that diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c index 34cb57d1d671..0851b74b4e46 100644 --- a/tools/testing/selftests/kvm/lib/x86/pmu.c +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -11,7 +11,7 @@ #include "processor.h" #include "pmu.h" -const uint64_t intel_pmu_arch_events[] = { +const u64 intel_pmu_arch_events[] = { INTEL_ARCH_CPU_CYCLES, INTEL_ARCH_INSTRUCTIONS_RETIRED, INTEL_ARCH_REFERENCE_CYCLES, @@ -28,7 +28,7 @@ const uint64_t intel_pmu_arch_events[] = { }; kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); -const uint64_t amd_pmu_zen_events[] = { +const u64 amd_pmu_zen_events[] = { AMD_ZEN_CORE_CYCLES, AMD_ZEN_INSTRUCTIONS_RETIRED, AMD_ZEN_BRANCHES_RETIRED, @@ -50,7 +50,7 @@ kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); * be overcounted on these certain instructions, but for Clearwater Forest * only "Instruction Retired" event is overcounted on these instructions. */ -static uint64_t get_pmu_errata(void) +static u64 get_pmu_errata(void) { if (!this_cpu_is_intel()) return 0; @@ -72,7 +72,7 @@ static uint64_t get_pmu_errata(void) } } -uint64_t pmu_errata_mask; +u64 pmu_errata_mask; void kvm_init_pmu_errata(void) { diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index d1de157fedff..81f5dea51fc3 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -27,7 +27,7 @@ bool host_cpu_is_intel; bool host_cpu_is_hygon; bool host_cpu_is_amd_compatible; bool is_forced_emulation_enabled; -uint64_t guest_tsc_khz; +u64 guest_tsc_khz; const char *ex_str(int vector) { @@ -207,10 +207,10 @@ void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, } static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, - uint64_t *parent_pte, uint64_t vaddr, int level) + u64 *parent_pte, u64 vaddr, int level) { - uint64_t pt_gpa = PTE_GET_PA(*parent_pte); - uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); + u64 pt_gpa = PTE_GET_PA(*parent_pte); + u64 *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte), @@ -220,15 +220,15 @@ static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, return &page_table[index]; } -static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - struct kvm_mmu *mmu, - uint64_t *parent_pte, - uint64_t vaddr, - uint64_t paddr, - int current_level, - int target_level) +static u64 *virt_create_upper_pte(struct kvm_vm *vm, + struct kvm_mmu *mmu, + u64 *parent_pte, + u64 vaddr, + u64 paddr, + int current_level, + int target_level) { - uint64_t *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level); + u64 *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level); paddr = vm_untag_gpa(vm, paddr); @@ -256,11 +256,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, return pte; } -void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, - uint64_t paddr, int level) +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, + u64 paddr, int level) { - const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pte = &mmu->pgd; + const u64 pg_size = PG_LEVEL_SIZE(level); + u64 *pte = &mmu->pgd; int current_level; TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, @@ -315,16 +315,16 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, *pte |= PTE_S_BIT_MASK(mmu); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) { __virt_pg_map(vm, &vm->mmu, vaddr, paddr, PG_LEVEL_4K); } -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level) +void virt_map_level(struct kvm_vm *vm, u64 vaddr, u64 paddr, + u64 nr_bytes, int level) { - uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t nr_pages = nr_bytes / pg_size; + u64 pg_size = PG_LEVEL_SIZE(level); + u64 nr_pages = nr_bytes / pg_size; int i; TEST_ASSERT(nr_bytes % pg_size == 0, @@ -341,7 +341,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static bool vm_is_target_pte(struct kvm_mmu *mmu, uint64_t *pte, +static bool vm_is_target_pte(struct kvm_mmu *mmu, u64 *pte, int *level, int current_level) { if (is_huge_pte(mmu, pte)) { @@ -354,13 +354,13 @@ static bool vm_is_target_pte(struct kvm_mmu *mmu, uint64_t *pte, return *level == current_level; } -static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, - struct kvm_mmu *mmu, - uint64_t vaddr, - int *level) +static u64 *__vm_get_page_table_entry(struct kvm_vm *vm, + struct kvm_mmu *mmu, + u64 vaddr, + int *level) { int va_width = 12 + (mmu->pgtable_levels) * 9; - uint64_t *pte = &mmu->pgd; + u64 *pte = &mmu->pgd; int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, @@ -393,14 +393,14 @@ static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); } -uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa) +u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa) { int level = PG_LEVEL_4K; return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level); } -uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr) +u64 *vm_get_pte(struct kvm_vm *vm, u64 vaddr) { int level = PG_LEVEL_4K; @@ -410,10 +410,10 @@ uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr) void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { struct kvm_mmu *mmu = &vm->mmu; - uint64_t *pml4e, *pml4e_start; - uint64_t *pdpe, *pdpe_start; - uint64_t *pde, *pde_start; - uint64_t *pte, *pte_start; + u64 *pml4e, *pml4e_start; + u64 *pdpe, *pdpe_start; + u64 *pde, *pde_start; + u64 *pte, *pte_start; if (!mmu->pgd_created) return; @@ -423,7 +423,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*s index hvaddr gpaddr " "addr w exec dirty\n", indent, ""); - pml4e_start = (uint64_t *) addr_gpa2hva(vm, mmu->pgd); + pml4e_start = (u64 *)addr_gpa2hva(vm, mmu->pgd); for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; if (!is_present_pte(mmu, pml4e)) @@ -475,10 +475,10 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) is_writable_pte(mmu, pte), is_nx_pte(mmu, pte), is_dirty_pte(mmu, pte), - ((uint64_t) n1 << 27) - | ((uint64_t) n2 << 18) - | ((uint64_t) n3 << 9) - | ((uint64_t) n4)); + ((u64)n1 << 27) + | ((u64)n2 << 18) + | ((u64)n3 << 9) + | ((u64)n4)); } } } @@ -498,8 +498,8 @@ bool kvm_cpu_has_tdp(void) return kvm_cpu_has_ept() || kvm_cpu_has_npt(); } -void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, - uint64_t size, int level) +void __tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, + u64 size, int level) { size_t page_size = PG_LEVEL_SIZE(level); size_t npages = size / page_size; @@ -514,8 +514,8 @@ void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, } } -void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, - uint64_t size) +void tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, + u64 size) { __tdp_map(vm, nested_paddr, paddr, size, PG_LEVEL_4K); } @@ -540,13 +540,13 @@ void tdp_identity_map_default_memslots(struct kvm_vm *vm) if (i > last) break; - tdp_map(vm, (uint64_t)i << vm->page_shift, - (uint64_t)i << vm->page_shift, 1 << vm->page_shift); + tdp_map(vm, (u64)i << vm->page_shift, + (u64)i << vm->page_shift, 1 << vm->page_shift); } } /* Identity map a region with 1GiB Pages. */ -void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size) +void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size) { __tdp_map(vm, addr, addr, size, PG_LEVEL_1G); } @@ -621,7 +621,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_NONE; - uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); + u64 *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); TEST_ASSERT(is_present_pte(&vm->mmu, pte), "Leaf PTE not PRESENT for gva: 0x%08lx", gva); @@ -943,7 +943,7 @@ uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, property.reg, property.lo_bit, property.hi_bit); } -uint64_t kvm_get_feature_msr(uint64_t msr_index) +u64 kvm_get_feature_msr(u64 msr_index) { struct { struct kvm_msrs header; @@ -962,7 +962,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) return buffer.entry.data; } -void __vm_xsave_require_permission(uint64_t xfeature, const char *name) +void __vm_xsave_require_permission(u64 xfeature, const char *name) { int kvm_fd; u64 bitmask; @@ -1063,7 +1063,7 @@ void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_cpuid(vcpu); } -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) +u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index) { struct { struct kvm_msrs header; @@ -1078,7 +1078,7 @@ uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) return buffer.entry.data; } -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) +int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value) { struct { struct kvm_msrs header; @@ -1106,22 +1106,22 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) vcpu_regs_get(vcpu, ®s); if (num >= 1) - regs.rdi = va_arg(ap, uint64_t); + regs.rdi = va_arg(ap, u64); if (num >= 2) - regs.rsi = va_arg(ap, uint64_t); + regs.rsi = va_arg(ap, u64); if (num >= 3) - regs.rdx = va_arg(ap, uint64_t); + regs.rdx = va_arg(ap, u64); if (num >= 4) - regs.rcx = va_arg(ap, uint64_t); + regs.rcx = va_arg(ap, u64); if (num >= 5) - regs.r8 = va_arg(ap, uint64_t); + regs.r8 = va_arg(ap, u64); if (num >= 6) - regs.r9 = va_arg(ap, uint64_t); + regs.r9 = va_arg(ap, u64); vcpu_regs_set(vcpu, ®s); va_end(ap); @@ -1344,7 +1344,7 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, #define X86_HYPERCALL(inputs...) \ ({ \ - uint64_t r; \ + u64 r; \ \ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \ "jnz 1f\n\t" \ @@ -1359,18 +1359,17 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, r; \ }) -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3) +u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3) { return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); } -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +u64 __xen_hypercall(u64 nr, u64 a0, void *a1) { return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1)); } -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +void xen_hypercall(u64 nr, u64 a0, void *a1) { GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } @@ -1453,8 +1452,7 @@ bool kvm_arch_has_default_irqchip(void) return true; } -void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t smram_gpa, +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa, const void *smi_handler, size_t handler_size) { vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa, diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index aecef6048ff1..555198348159 100644 --- a/tools/testing/selftests/kvm/lib/x86/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -29,15 +29,15 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio sev_register_encrypted_memory(vm, region); sparsebit_for_each_set_range(protected_phy_pages, i, j) { - const uint64_t size = (j - i + 1) * vm->page_size; - const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; + const u64 size = (j - i + 1) * vm->page_size; + const u64 offset = (i - lowest_page_in_region) * vm->page_size; if (private) vm_mem_set_private(vm, gpa_base + offset, size); if (is_sev_snp_vm(vm)) snp_launch_update_data(vm, gpa_base + offset, - (uint64_t)addr_gpa2hva(vm, gpa_base + offset), + (u64)addr_gpa2hva(vm, gpa_base + offset), size, page_type); else sev_launch_update_data(vm, gpa_base + offset, size); @@ -131,7 +131,7 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } -void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy) +void snp_vm_launch_start(struct kvm_vm *vm, u64 policy) { struct kvm_sev_snp_launch_start launch_start = { .policy = policy, @@ -174,7 +174,7 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, return vm; } -void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement) +void vm_sev_launch(struct kvm_vm *vm, u64 policy, uint8_t *measurement) { if (is_sev_snp_vm(vm)) { vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 4a3b1a2738a2..620bdc5d3cc2 100644 --- a/tools/testing/selftests/kvm/lib/x86/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -84,14 +84,14 @@ void vm_enable_npt(struct kvm_vm *vm) void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; - uint64_t vmcb_gpa = svm->vmcb_gpa; + u64 vmcb_gpa = svm->vmcb_gpa; struct vmcb_save_area *save = &vmcb->save; struct vmcb_control_area *ctrl = &vmcb->control; u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; - uint64_t efer; + u64 efer; efer = rdmsr(MSR_EFER); wrmsr(MSR_EFER, efer | EFER_SVME); @@ -158,7 +158,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r * for now. registers involved in LOAD/SAVE_GPR_C are eventually * unmodified so they do not need to be in the clobber list. */ -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) +void run_guest(struct vmcb *vmcb, u64 vmcb_gpa) { asm volatile ( "vmload %[vmcb_gpa]\n\t" diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index a6bb649e62c3..1d3d9bcfcf8a 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -125,8 +125,8 @@ vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva) bool prepare_for_vmx_operation(struct vmx_pages *vmx) { - uint64_t feature_control; - uint64_t required; + u64 feature_control; + u64 required; unsigned long cr0; unsigned long cr4; @@ -185,7 +185,7 @@ bool load_vmcs(struct vmx_pages *vmx) return true; } -static bool ept_vpid_cap_supported(uint64_t mask) +static bool ept_vpid_cap_supported(u64 mask) { return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; } @@ -208,7 +208,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); if (vmx->eptp_gpa) { - uint64_t eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; + u64 eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0, "Illegal bits set in vmx->eptp_gpa"); @@ -358,8 +358,8 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp) vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); vmwrite(GUEST_DR7, 0x400); - vmwrite(GUEST_RSP, (uint64_t)rsp); - vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RSP, (u64)rsp); + vmwrite(GUEST_RIP, (u64)rip); vmwrite(GUEST_RFLAGS, 2); vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); @@ -375,7 +375,7 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) bool kvm_cpu_has_ept(void) { - uint64_t ctrl; + u64 ctrl; if (!kvm_cpu_has(X86_FEATURE_VMX)) return false; diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c index 355ecac30954..f80e36962879 100644 --- a/tools/testing/selftests/kvm/loongarch/arch_timer.c +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -28,7 +28,7 @@ static void guest_irq_handler(struct ex_regs *regs) { unsigned int intid; uint32_t cpu = guest_get_vcpuid(); - uint64_t xcnt, val, cfg, xcnt_diff_us; + u64 xcnt, val, cfg, xcnt_diff_us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; intid = !!(regs->estat & BIT(INT_TI)); @@ -65,7 +65,7 @@ static void guest_irq_handler(struct ex_regs *regs) static void guest_test_period_timer(uint32_t cpu) { uint32_t irq_iter, config_iter; - uint64_t us; + u64 us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; shared_data->nr_iter = test_args.nr_iter; @@ -89,7 +89,7 @@ static void guest_test_period_timer(uint32_t cpu) static void guest_test_oneshot_timer(uint32_t cpu) { uint32_t irq_iter, config_iter; - uint64_t us; + u64 us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; shared_data->nr_iter = 0; @@ -115,7 +115,7 @@ static void guest_test_oneshot_timer(uint32_t cpu) static void guest_test_emulate_timer(uint32_t cpu) { uint32_t config_iter; - uint64_t xcnt_diff_us, us; + u64 xcnt_diff_us, us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; local_irq_disable(); diff --git a/tools/testing/selftests/kvm/loongarch/pmu_test.c b/tools/testing/selftests/kvm/loongarch/pmu_test.c index 88bb530e336e..bf5249c87f75 100644 --- a/tools/testing/selftests/kvm/loongarch/pmu_test.c +++ b/tools/testing/selftests/kvm/loongarch/pmu_test.c @@ -52,7 +52,7 @@ static void guest_pmu_base_test(void) { int i; uint32_t cfg6, pmnum; - uint64_t cnt[4]; + u64 cnt[4]; cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); pmnum = (cfg6 >> 4) & 0xf; @@ -114,7 +114,7 @@ static void guest_irq_handler(struct ex_regs *regs) static void guest_pmu_interrupt_test(void) { - uint64_t cnt; + u64 cnt; csr_write(PMU_OVERFLOW - 1, LOONGARCH_CSR_PERFCNTR0); csr_write(PMU_ENVENT_ENABLED | CSR_PERFCTRL_PMIE | LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0); diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 3cdfa3b19b85..9d7c4afab961 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -30,7 +30,7 @@ static int nr_vcpus = 1; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { @@ -55,10 +55,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, - uint64_t nr_modifications) + u64 nr_modifications) { - uint64_t pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size; - uint64_t gpa; + u64 pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size; + u64 gpa; int i; /* @@ -78,7 +78,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, struct test_params { useconds_t delay; - uint64_t nr_iterations; + u64 nr_iterations; bool partition_vcpu_memory_access; bool disable_slot_zap_quirk; }; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 5087d082c4b0..d5161e8aee14 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -86,12 +86,12 @@ struct vm_data { struct kvm_vcpu *vcpu; pthread_t vcpu_thread; uint32_t nslots; - uint64_t npages; - uint64_t pages_per_slot; + u64 npages; + u64 pages_per_slot; void **hva_slots; bool mmio_ok; - uint64_t mmio_gpa_min; - uint64_t mmio_gpa_max; + u64 mmio_gpa_min; + u64 mmio_gpa_max; }; struct sync_area { @@ -186,9 +186,9 @@ static void wait_for_vcpu(void) "sem_timedwait() failed: %d", errno); } -static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) +static void *vm_gpa2hva(struct vm_data *data, u64 gpa, u64 *rempages) { - uint64_t gpage, pgoffs; + u64 gpage, pgoffs; uint32_t slot, slotoffs; void *base; uint32_t guest_page_size = data->vm->page_size; @@ -200,11 +200,11 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) gpage = gpa / guest_page_size; pgoffs = gpa % guest_page_size; - slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); + slot = min(gpage / data->pages_per_slot, (u64)data->nslots - 1); slotoffs = gpage - (slot * data->pages_per_slot); if (rempages) { - uint64_t slotpages; + u64 slotpages; if (slot == data->nslots - 1) slotpages = data->npages - slot * data->pages_per_slot; @@ -220,7 +220,7 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; } -static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) +static u64 vm_slot2gpa(struct vm_data *data, uint32_t slot) { uint32_t guest_page_size = data->vm->page_size; @@ -244,7 +244,7 @@ static struct vm_data *alloc_vm(void) } static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, - uint64_t pages_per_slot, uint64_t rempages) + u64 pages_per_slot, u64 rempages) { if (!pages_per_slot) return false; @@ -259,11 +259,11 @@ static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, } -static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) +static u64 get_max_slots(struct vm_data *data, uint32_t host_page_size) { uint32_t guest_page_size = data->vm->page_size; - uint64_t mempages, pages_per_slot, rempages; - uint64_t slots; + u64 mempages, pages_per_slot, rempages; + u64 slots; mempages = data->npages; slots = data->nslots; @@ -281,12 +281,12 @@ static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) return 0; } -static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, - void *guest_code, uint64_t mem_size, +static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots, + void *guest_code, u64 mem_size, struct timespec *slot_runtime) { - uint64_t mempages, rempages; - uint64_t guest_addr; + u64 mempages, rempages; + u64 guest_addr; uint32_t slot, host_page_size, guest_page_size; struct timespec tstart; struct sync_area *sync; @@ -317,7 +317,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, clock_gettime(CLOCK_MONOTONIC, &tstart); for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { - uint64_t npages; + u64 npages; npages = data->pages_per_slot; if (slot == data->nslots) @@ -331,8 +331,8 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, *slot_runtime = timespec_elapsed(tstart); for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { - uint64_t npages; - uint64_t gpa; + u64 npages; + u64 gpa; npages = data->pages_per_slot; if (slot == data->nslots) @@ -460,7 +460,7 @@ static void guest_code_test_memslot_move(void) for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; /* * No host sync here since the MMIO exits are so expensive @@ -489,7 +489,7 @@ static void guest_code_test_memslot_map(void) for (ptr = MEM_TEST_GPA; ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; @@ -497,7 +497,7 @@ static void guest_code_test_memslot_map(void) for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_2; + *(u64 *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) break; @@ -526,13 +526,13 @@ static void guest_code_test_memslot_unmap(void) * * Just access a single page to be on the safe side. */ - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; ptr += MEM_TEST_UNMAP_SIZE / 2; - *(uint64_t *)ptr = MEM_TEST_VAL_2; + *(u64 *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) break; @@ -555,17 +555,17 @@ static void guest_code_test_memslot_rw(void) for (ptr = MEM_TEST_GPA; ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; for (ptr = MEM_TEST_GPA + page_size / 2; ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { - uint64_t val = *(uint64_t *)ptr; + u64 val = *(u64 *)ptr; GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); - *(uint64_t *)ptr = 0; + *(u64 *)ptr = 0; } if (!guest_perform_sync()) @@ -577,10 +577,10 @@ static void guest_code_test_memslot_rw(void) static bool test_memslot_move_prepare(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots, bool isactive) + u64 *maxslots, bool isactive) { uint32_t guest_page_size = data->vm->page_size; - uint64_t movesrcgpa, movetestgpa; + u64 movesrcgpa, movetestgpa; #ifdef __x86_64__ if (disable_slot_zap_quirk) @@ -590,7 +590,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { - uint64_t lastpages; + u64 lastpages; vm_gpa2hva(data, movesrcgpa, &lastpages); if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { @@ -613,21 +613,21 @@ static bool test_memslot_move_prepare(struct vm_data *data, static bool test_memslot_move_prepare_active(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots) + u64 *maxslots) { return test_memslot_move_prepare(data, sync, maxslots, true); } static bool test_memslot_move_prepare_inactive(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots) + u64 *maxslots) { return test_memslot_move_prepare(data, sync, maxslots, false); } static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) { - uint64_t movesrcgpa; + u64 movesrcgpa; movesrcgpa = vm_slot2gpa(data, data->nslots - 1); vm_mem_region_move(data->vm, data->nslots - 1 + 1, @@ -636,13 +636,13 @@ static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) } static void test_memslot_do_unmap(struct vm_data *data, - uint64_t offsp, uint64_t count) + u64 offsp, u64 count) { - uint64_t gpa, ctr; + u64 gpa, ctr; uint32_t guest_page_size = data->vm->page_size; for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { - uint64_t npages; + u64 npages; void *hva; int ret; @@ -661,10 +661,10 @@ static void test_memslot_do_unmap(struct vm_data *data, } static void test_memslot_map_unmap_check(struct vm_data *data, - uint64_t offsp, uint64_t valexp) + u64 offsp, u64 valexp) { - uint64_t gpa; - uint64_t *val; + u64 gpa; + u64 *val; uint32_t guest_page_size = data->vm->page_size; if (!map_unmap_verify) @@ -681,7 +681,7 @@ static void test_memslot_map_unmap_check(struct vm_data *data, static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) { uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; + u64 guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; /* * Unmap the second half of the test area while guest writes to (maps) @@ -718,11 +718,11 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) static void test_memslot_unmap_loop_common(struct vm_data *data, struct sync_area *sync, - uint64_t chunk) + u64 chunk) { uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; - uint64_t ctr; + u64 guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; + u64 ctr; /* * Wait for the guest to finish mapping page(s) in the first half @@ -748,7 +748,7 @@ static void test_memslot_unmap_loop(struct vm_data *data, { uint32_t host_page_size = getpagesize(); uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_chunk_pages = guest_page_size >= host_page_size ? + u64 guest_chunk_pages = guest_page_size >= host_page_size ? 1 : host_page_size / guest_page_size; test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); @@ -758,26 +758,26 @@ static void test_memslot_unmap_loop_chunked(struct vm_data *data, struct sync_area *sync) { uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; + u64 guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) { - uint64_t gptr; + u64 gptr; uint32_t guest_page_size = data->vm->page_size; for (gptr = MEM_TEST_GPA + guest_page_size / 2; gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) - *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; + *(u64 *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; host_perform_sync(sync); for (gptr = MEM_TEST_GPA; gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { - uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); - uint64_t val = *vptr; + u64 *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); + u64 val = *vptr; TEST_ASSERT(val == MEM_TEST_VAL_1, "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")", @@ -790,21 +790,21 @@ static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) struct test_data { const char *name; - uint64_t mem_size; + u64 mem_size; void (*guest_code)(void); bool (*prepare)(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots); + u64 *maxslots); void (*loop)(struct vm_data *data, struct sync_area *sync); }; -static bool test_execute(int nslots, uint64_t *maxslots, +static bool test_execute(int nslots, u64 *maxslots, unsigned int maxtime, const struct test_data *tdata, - uint64_t *nloops, + u64 *nloops, struct timespec *slot_runtime, struct timespec *guest_runtime) { - uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; + u64 mem_size = tdata->mem_size ? : MEM_SIZE; struct vm_data *data; struct sync_area *sync; struct timespec tstart; @@ -1041,7 +1041,7 @@ static bool parse_args(int argc, char *argv[], struct test_result { struct timespec slot_runtime, guest_runtime, iter_runtime; int64_t slottimens, runtimens; - uint64_t nloops; + u64 nloops; }; static bool test_loop(const struct test_data *data, @@ -1049,7 +1049,7 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestslottime, struct test_result *rbestruntime) { - uint64_t maxslots; + u64 maxslots; struct test_result result = {}; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 51c070556f3e..c1f1e318e059 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -20,19 +20,19 @@ static bool mprotect_ro_done; static bool all_vcpus_hit_ro_fault; -static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) +static void guest_code(u64 start_gpa, u64 end_gpa, u64 stride) { - uint64_t gpa; + u64 gpa; int i; for (i = 0; i < 2; i++) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa); GUEST_SYNC(i); } for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa); + *((volatile u64 *)gpa); GUEST_SYNC(2); /* @@ -55,7 +55,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #elif defined(__aarch64__) asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); #else - vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa); #endif } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); @@ -68,7 +68,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #endif for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa); GUEST_SYNC(4); GUEST_ASSERT(0); @@ -76,8 +76,8 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) struct vcpu_info { struct kvm_vcpu *vcpu; - uint64_t start_gpa; - uint64_t end_gpa; + u64 start_gpa; + u64 end_gpa; }; static int nr_vcpus; @@ -203,10 +203,10 @@ static void *vcpu_worker(void *data) } static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, - uint64_t start_gpa, uint64_t end_gpa) + u64 start_gpa, u64 end_gpa) { struct vcpu_info *info; - uint64_t gpa, nr_bytes; + u64 gpa, nr_bytes; pthread_t *threads; int i; @@ -217,7 +217,7 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & - ~((uint64_t)vm->page_size - 1); + ~((u64)vm->page_size - 1); TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { @@ -278,11 +278,11 @@ int main(int argc, char *argv[]) * just below the 4gb boundary. This test could create memory at * 1gb-3gb,but it's simpler to skip straight to 4gb. */ - const uint64_t start_gpa = SZ_4G; + const u64 start_gpa = SZ_4G; const int first_slot = 1; struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; - uint64_t max_gpa, gpa, slot_size, max_mem, i; + u64 max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; struct kvm_vcpu **vcpus; diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index f3de0386ba7b..1fa9800aa7df 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -17,13 +17,13 @@ #define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) #define TEST_SLOT 10 -static void guest_code(uint64_t base_gva) +static void guest_code(u64 base_gva) { - volatile uint64_t val __used; + volatile u64 val __used; int i; for (i = 0; i < TEST_NPAGES; i++) { - uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE); + u64 *src = (u64 *)(base_gva + i * PAGE_SIZE); val = *src; } @@ -161,7 +161,7 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, static void __test_pre_fault_memory(unsigned long vm_type, bool private) { - uint64_t gpa, gva, alignment, guest_page_size; + u64 gpa, gva, alignment, guest_page_size; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, .type = vm_type, diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index f962fefc48fa..99d87e6eb5aa 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -17,7 +17,7 @@ static int timer_irq = IRQ_S_TIMER; static void guest_irq_handler(struct pt_regs *regs) { - uint64_t xcnt, xcnt_diff_us, cmp; + u64 xcnt, xcnt_diff_us, cmp; unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; uint32_t cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 739d17befb5a..3f44b045a22e 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -8,10 +8,10 @@ #include "kvm_util.h" #include "ucall_common.h" -#define LABEL_ADDRESS(v) ((uint64_t)&(v)) +#define LABEL_ADDRESS(v) ((u64)&(v)) extern unsigned char sw_bp_1, sw_bp_2; -static uint64_t sw_bp_addr; +static u64 sw_bp_addr; static void guest_code(void) { @@ -37,7 +37,7 @@ int main(void) { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint64_t pc; + u64 pc; struct kvm_guest_debug debug = { .control = KVM_GUESTDBG_ENABLE, }; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8d6b951434eb..8d6fdb5d38b8 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -162,7 +162,7 @@ bool check_reject_set(int err) } static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, - uint64_t feature) + u64 feature) { unsigned long vlenb_reg = 0; int rc; @@ -197,7 +197,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; struct vcpu_reg_sublist *s; - uint64_t feature; + u64 feature; int rc; for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 207dc5cd36f0..e56a3dd6a51e 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -86,7 +86,7 @@ unsigned long pmu_csr_read_num(int csr_num) #undef switchcase_csr_read } -static inline void dummy_func_loop(uint64_t iter) +static inline void dummy_func_loop(u64 iter) { int i = 0; diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c index ad8095968601..751c61c0f056 100644 --- a/tools/testing/selftests/kvm/s390/debug_test.c +++ b/tools/testing/selftests/kvm/s390/debug_test.c @@ -17,7 +17,7 @@ asm("int_handler:\n" "j .\n"); static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, - size_t new_psw_off, uint64_t *new_psw) + size_t new_psw_off, u64 *new_psw) { struct kvm_guest_debug debug = {}; struct kvm_regs regs; @@ -27,7 +27,7 @@ static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, vm = vm_create_with_one_vcpu(vcpu, guest_code); lowcore = addr_gpa2hva(vm, 0); new_psw[0] = (*vcpu)->run->psw_mask; - new_psw[1] = (uint64_t)int_handler; + new_psw[1] = (u64)int_handler; memcpy(lowcore + new_psw_off, new_psw, 16); vcpu_regs_get(*vcpu, ®s); regs.gprs[2] = -1; @@ -42,7 +42,7 @@ static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, static void test_step_int(void *guest_code, size_t new_psw_off) { struct kvm_vcpu *vcpu; - uint64_t new_psw[2]; + u64 new_psw[2]; struct kvm_vm *vm; vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); @@ -79,7 +79,7 @@ static void test_step_pgm_diag(void) .u.pgm.code = PGM_SPECIFICATION, }; struct kvm_vcpu *vcpu; - uint64_t new_psw[2]; + u64 new_psw[2]; struct kvm_vm *vm; vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 0e8dc8e5d8bd..e8bda1ab7fb0 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -34,7 +34,7 @@ enum mop_access_mode { struct mop_desc { uintptr_t gaddr; uintptr_t gaddr_v; - uint64_t set_flags; + u64 set_flags; unsigned int f_check : 1; unsigned int f_inject : 1; unsigned int f_key : 1; @@ -85,7 +85,7 @@ static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; if (desc->mode == CMPXCHG) { ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG; - ksmo.old_addr = (uint64_t)desc->old; + ksmo.old_addr = (u64)desc->old; memcpy(desc->old_value, desc->old, desc->size); } break; @@ -489,7 +489,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) case 4: return (uint32_t)val; case 8: - return (uint64_t)val; + return (u64)val; case 16: return val; } @@ -501,10 +501,10 @@ static bool popcount_eq(__uint128_t a, __uint128_t b) { unsigned int count_a, count_b; - count_a = __builtin_popcountl((uint64_t)(a >> 64)) + - __builtin_popcountl((uint64_t)a); - count_b = __builtin_popcountl((uint64_t)(b >> 64)) + - __builtin_popcountl((uint64_t)b); + count_a = __builtin_popcountl((u64)(a >> 64)) + + __builtin_popcountl((u64)a); + count_b = __builtin_popcountl((u64)(b >> 64)) + + __builtin_popcountl((u64)b); return count_a == count_b; } @@ -598,15 +598,15 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t return ret; } case 8: { - uint64_t old = *old_addr; + u64 old = *old_addr; asm volatile ("csg %[old],%[new],%[address]" : [old] "+d" (old), - [address] "+Q" (*(uint64_t *)(target)) - : [new] "d" ((uint64_t)new) + [address] "+Q" (*(u64 *)(target)) + : [new] "d" ((u64)new) : "cc" ); - ret = old == (uint64_t)*old_addr; + ret = old == (u64)*old_addr; *old_addr = old; return ret; } @@ -811,10 +811,10 @@ static void test_errors_cmpxchg_key(void) static void test_termination(void) { struct test_default t = test_default_init(guest_error_key); - uint64_t prefix; - uint64_t teid; - uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); - uint64_t psw[2]; + u64 prefix; + u64 teid; + u64 teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); + u64 psw[2]; HOST_SYNC(t.vcpu, STAGE_INITED); HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); @@ -855,7 +855,7 @@ static void test_errors_key_storage_prot_override(void) kvm_vm_free(t.kvm_vm); } -const uint64_t last_page_addr = -PAGE_SIZE; +const u64 last_page_addr = -PAGE_SIZE; static void guest_copy_key_fetch_prot_override(void) { diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c index b58f75b381e5..7a81d07500bd 100644 --- a/tools/testing/selftests/kvm/s390/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -57,9 +57,9 @@ static void guest_code_initial(void) ); } -static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) +static void test_one_reg(struct kvm_vcpu *vcpu, u64 id, u64 value) { - uint64_t eval_reg; + u64 eval_reg; eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index fd8e997de693..e94a640f6f32 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -46,7 +46,7 @@ enum permission { static enum permission test_protection(void *addr, uint8_t key) { - uint64_t mask; + u64 mask; asm volatile ( "tprot %[addr], 0(%[key])\n" diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a398dc3a8c4b..413281e277ac 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -30,19 +30,19 @@ #define MEM_REGION_GPA 0xc0000000 #define MEM_REGION_SLOT 10 -static const uint64_t MMIO_VAL = 0xbeefull; +static const u64 MMIO_VAL = 0xbeefull; -extern const uint64_t final_rip_start; -extern const uint64_t final_rip_end; +extern const u64 final_rip_start; +extern const u64 final_rip_end; static sem_t vcpu_ready; -static inline uint64_t guest_spin_on_val(uint64_t spin_val) +static inline u64 guest_spin_on_val(u64 spin_val) { - uint64_t val; + u64 val; do { - val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA)); + val = READ_ONCE(*((u64 *)MEM_REGION_GPA)); } while (val == spin_val); GUEST_SYNC(0); @@ -54,7 +54,7 @@ static void *vcpu_worker(void *data) struct kvm_vcpu *vcpu = data; struct kvm_run *run = vcpu->run; struct ucall uc; - uint64_t cmd; + u64 cmd; /* * Loop until the guest is done. Re-enter the guest on all MMIO exits, @@ -111,8 +111,8 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, void *guest_code) { struct kvm_vm *vm; - uint64_t *hva; - uint64_t gpa; + u64 *hva; + u64 gpa; vm = vm_create_with_one_vcpu(vcpu, guest_code); @@ -144,7 +144,7 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, static void guest_code_move_memory_region(void) { - uint64_t val; + u64 val; GUEST_SYNC(0); @@ -180,7 +180,7 @@ static void test_move_memory_region(bool disable_slot_zap_quirk) pthread_t vcpu_thread; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t *hva; + u64 *hva; vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); @@ -224,7 +224,7 @@ static void test_move_memory_region(bool disable_slot_zap_quirk) static void guest_code_delete_memory_region(void) { struct desc_ptr idt; - uint64_t val; + u64 val; /* * Clobber the IDT so that a #PF due to the memory region being deleted @@ -434,16 +434,16 @@ static void test_add_max_memory_regions(void) for (slot = 0; slot < max_mem_slots; slot++) vm_set_user_memory_region(vm, slot, 0, - ((uint64_t)slot * MEM_REGION_SIZE), + ((u64)slot * MEM_REGION_SIZE), MEM_REGION_SIZE, - mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); + mem_aligned + (u64)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, - (uint64_t)max_mem_slots * MEM_REGION_SIZE, + (u64)max_mem_slots * MEM_REGION_SIZE, MEM_REGION_SIZE, mem_extra); TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index f461eb7a0f6e..6379f47af422 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -25,7 +25,7 @@ #define ST_GPA_BASE (1 << 30) static void *st_gva[NR_VCPUS]; -static uint64_t guest_stolen_time[NR_VCPUS]; +static u64 guest_stolen_time[NR_VCPUS]; #if defined(__x86_64__) @@ -44,7 +44,7 @@ static void guest_code(int cpu) struct kvm_steal_time *st = st_gva[cpu]; uint32_t version; - GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); + GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((u64)st_gva[cpu] | KVM_MSR_ENABLED)); memset(st, 0, sizeof(*st)); GUEST_SYNC(0); @@ -120,10 +120,10 @@ static void check_steal_time_uapi(void) struct st_time { uint32_t rev; uint32_t attr; - uint64_t st_time; + u64 st_time; }; -static int64_t smccc(uint32_t func, uint64_t arg) +static int64_t smccc(uint32_t func, u64 arg) { struct arm_smccc_res res; @@ -178,12 +178,12 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) { struct kvm_vm *vm = vcpu->vm; - uint64_t st_ipa; + u64 st_ipa; struct kvm_device_attr dev = { .group = KVM_ARM_VCPU_PVTIME_CTRL, .attr = KVM_ARM_VCPU_PVTIME_IPA, - .addr = (uint64_t)&st_ipa, + .addr = (u64)&st_ipa, }; /* ST_GPA_BASE is identity mapped */ @@ -208,7 +208,7 @@ static void check_steal_time_uapi(void) { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint64_t st_ipa; + u64 st_ipa; int ret; vm = vm_create_with_one_vcpu(&vcpu, NULL); @@ -216,7 +216,7 @@ static void check_steal_time_uapi(void) struct kvm_device_attr dev = { .group = KVM_ARM_VCPU_PVTIME_CTRL, .attr = KVM_ARM_VCPU_PVTIME_IPA, - .addr = (uint64_t)&st_ipa, + .addr = (u64)&st_ipa, }; vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); @@ -244,7 +244,7 @@ static gpa_t st_gpa[NR_VCPUS]; struct sta_struct { uint32_t sequence; uint32_t flags; - uint64_t steal; + u64 steal; uint8_t preempted; uint8_t pad[47]; } __packed; @@ -297,7 +297,7 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { - uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); + u64 id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); unsigned long enabled = vcpu_get_reg(vcpu, id); TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); @@ -335,7 +335,7 @@ static void check_steal_time_uapi(void) struct kvm_vm *vm; struct kvm_vcpu *vcpu; struct kvm_one_reg reg; - uint64_t shmem; + u64 shmem; int ret; vm = vm_create_with_one_vcpu(&vcpu, NULL); @@ -345,7 +345,7 @@ static void check_steal_time_uapi(void) KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo); - reg.addr = (uint64_t)&shmem; + reg.addr = (u64)&shmem; shmem = ST_GPA_BASE + 1; ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); @@ -410,11 +410,11 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { int err; - uint64_t val; + u64 val; struct kvm_device_attr attr = { .group = KVM_LOONGARCH_VCPU_CPUCFG, .attr = CPUCFG_KVM_FEATURE, - .addr = (uint64_t)&val, + .addr = (u64)&val, }; err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr); @@ -431,12 +431,12 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) { int err; - uint64_t st_gpa; + u64 st_gpa; struct kvm_vm *vm = vcpu->vm; struct kvm_device_attr attr = { .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, - .addr = (uint64_t)&st_gpa, + .addr = (u64)&st_gpa, }; /* ST_GPA_BASE is identity mapped */ diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 513d421a9bff..dc5e30b7b77f 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -17,7 +17,7 @@ #ifdef __x86_64__ struct test_case { - uint64_t tsc_offset; + u64 tsc_offset; }; static struct test_case test_cases[] = { @@ -39,12 +39,12 @@ static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test) &test->tsc_offset); } -static uint64_t guest_read_system_counter(struct test_case *test) +static u64 guest_read_system_counter(struct test_case *test) { return rdtsc(); } -static uint64_t host_read_guest_system_counter(struct test_case *test) +static u64 host_read_guest_system_counter(struct test_case *test) { return rdtsc() + test->tsc_offset; } @@ -69,9 +69,9 @@ static void guest_main(void) } } -static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end) +static void handle_sync(struct ucall *uc, u64 start, u64 end) { - uint64_t obs = uc->args[2]; + u64 obs = uc->args[2]; TEST_ASSERT(start <= obs && obs <= end, "unexpected system counter value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]", @@ -88,7 +88,7 @@ static void handle_abort(struct ucall *uc) static void enter_guest(struct kvm_vcpu *vcpu) { - uint64_t start, end; + u64 start, end; struct ucall uc; int i; diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index 6f934732c014..7e3aab912082 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -80,7 +80,7 @@ static inline void __tilerelease(void) asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::); } -static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) +static inline void __xsavec(struct xstate *xstate, u64 rfbm) { uint32_t rfbm_lo = rfbm; uint32_t rfbm_hi = rfbm >> 32; diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c index 2b547fc93ba8..d3f21f2f5d28 100644 --- a/tools/testing/selftests/kvm/x86/aperfmperf_test.c +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -35,9 +35,9 @@ static int open_dev_msr(int cpu) return open_path_or_exit(path, O_RDONLY); } -static uint64_t read_dev_msr(int msr_fd, uint32_t msr) +static u64 read_dev_msr(int msr_fd, uint32_t msr) { - uint64_t data; + u64 data; ssize_t rc; rc = pread(msr_fd, &data, sizeof(data), msr); @@ -107,7 +107,7 @@ static void guest_code(void *nested_test_data) static void guest_no_aperfmperf(void) { - uint64_t msr_val; + u64 msr_val; uint8_t vector; vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); @@ -122,7 +122,7 @@ static void guest_no_aperfmperf(void) int main(int argc, char *argv[]) { const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); - uint64_t host_aperf_before, host_mperf_before; + u64 host_aperf_before, host_mperf_before; gva_t nested_test_data_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -166,8 +166,8 @@ int main(int argc, char *argv[]) host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { - uint64_t host_aperf_after, host_mperf_after; - uint64_t guest_aperf, guest_mperf; + u64 host_aperf_after, host_mperf_after; + u64 guest_aperf, guest_mperf; struct ucall uc; vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index f8916bb34405..81f76c7d5621 100644 --- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -55,11 +55,11 @@ static void apic_write_reg(unsigned int reg, uint32_t val) xapic_write_reg(reg, val); } -static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +static void apic_guest_code(u64 apic_hz, u64 delay_ms) { - uint64_t tsc_hz = guest_tsc_khz * 1000; + u64 tsc_hz = guest_tsc_khz * 1000; const uint32_t tmict = ~0u; - uint64_t tsc0, tsc1, freq; + u64 tsc0, tsc1, freq; uint32_t tmcct; int i; @@ -121,7 +121,7 @@ static void test_apic_bus_clock(struct kvm_vcpu *vcpu) } } -static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, +static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms, bool x2apic) { struct kvm_vcpu *vcpu; @@ -168,8 +168,8 @@ int main(int argc, char *argv[]) * Arbitrarilty default to 25MHz for the APIC bus frequency, which is * different enough from the default 1GHz to be interesting. */ - uint64_t apic_hz = 25 * 1000 * 1000; - uint64_t delay_ms = 100; + u64 apic_hz = 25 * 1000 * 1000; + u64 delay_ms = 100; int opt; TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index 2d814c1d1dc4..542a0eac0f32 100644 --- a/tools/testing/selftests/kvm/x86/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c @@ -86,7 +86,7 @@ int main(void) struct kvm_run *run; struct kvm_vm *vm; struct ucall uc; - uint64_t cmd; + u64 cmd; int i; /* Instruction lengths starting at ss_start */ int ss_size[6] = { diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index b0d2b04a7ff2..388ba4101f97 100644 --- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -23,7 +23,7 @@ #define SLOTS 2 #define ITERATIONS 2 -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB; @@ -33,10 +33,10 @@ static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; struct kvm_page_stats { - uint64_t pages_4k; - uint64_t pages_2m; - uint64_t pages_1g; - uint64_t hugepages; + u64 pages_4k; + u64 pages_2m; + u64 pages_1g; + u64 hugepages; }; static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) @@ -89,9 +89,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) { struct kvm_vm *vm; unsigned long **bitmaps; - uint64_t guest_num_pages; - uint64_t host_num_pages; - uint64_t pages_per_slot; + u64 guest_num_pages; + u64 host_num_pages; + u64 pages_per_slot; int i; struct kvm_page_stats stats_populated; struct kvm_page_stats stats_dirty_logging_enabled; diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c index 62cfde273f71..9ff24d4851f5 100644 --- a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c +++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c @@ -35,7 +35,7 @@ static uint8_t smi_handler[] = { 0x0f, 0xaa, /* rsm */ }; -static inline void sync_with_host(uint64_t phase) +static inline void sync_with_host(u64 phase) { asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n" : "+a" (phase)); diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c index 8926cfe0e209..51416328cc69 100644 --- a/tools/testing/selftests/kvm/x86/fastops_test.c +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -28,17 +28,17 @@ #define guest_test_fastop_1(insn, type_t, __val) \ ({ \ type_t val = __val, ex_val = __val, input = __val; \ - uint64_t flags, ex_flags; \ + u64 flags, ex_flags; \ \ guest_execute_fastop_1("", insn, ex_val, ex_flags); \ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \ \ __GUEST_ASSERT(val == ex_val, \ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \ - (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \ + (u64)ex_val, insn, (u64)input, (u64)val); \ __GUEST_ASSERT(flags == ex_flags, \ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \ - ex_flags, insn, (uint64_t)input, flags); \ + ex_flags, insn, (u64)input, flags); \ }) #define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ @@ -52,18 +52,18 @@ #define guest_test_fastop_2(insn, type_t, __val1, __val2) \ ({ \ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \ - uint64_t flags, ex_flags; \ + u64 flags, ex_flags; \ \ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \ \ __GUEST_ASSERT(output == ex_output, \ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \ - (uint64_t)ex_output, insn, (uint64_t)input, \ - (uint64_t)input2, (uint64_t)output); \ + (u64)ex_output, insn, (u64)input, \ + (u64)input2, (u64)output); \ __GUEST_ASSERT(flags == ex_flags, \ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \ - ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \ + ex_flags, insn, (u64)input, (u64)input2, flags); \ }) #define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ @@ -78,23 +78,23 @@ ({ \ type_t output = __val2, ex_output = __val2, input = __val2; \ uint8_t shift = __val1; \ - uint64_t flags, ex_flags; \ + u64 flags, ex_flags; \ \ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \ \ __GUEST_ASSERT(output == ex_output, \ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ - (uint64_t)ex_output, insn, shift, (uint64_t)input, \ - (uint64_t)output); \ + (u64)ex_output, insn, shift, (u64)input, \ + (u64)output); \ __GUEST_ASSERT(flags == ex_flags, \ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ - ex_flags, insn, shift, (uint64_t)input, flags); \ + ex_flags, insn, shift, (u64)input, flags); \ }) #define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ ({ \ - uint64_t ign_error_code; \ + u64 ign_error_code; \ uint8_t vector; \ \ __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ @@ -109,7 +109,7 @@ ({ \ type_t _a = __val1, _d = __val1, rm = __val2; \ type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ - uint64_t flags, ex_flags; \ + u64 flags, ex_flags; \ uint8_t v, ex_v; \ \ ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ @@ -118,17 +118,17 @@ GUEST_ASSERT_EQ(v, ex_v); \ __GUEST_ASSERT(v == ex_v, \ "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \ - ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \ + ex_v, insn, (u64)_a, (u64)_d, (u64)rm, v); \ __GUEST_ASSERT(a == ex_a && d == ex_d, \ "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\ - (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \ - (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \ + (u64)ex_a, (u64)ex_d, insn, (u64)_a, \ + (u64)_d, (u64)rm, (u64)a, (u64)d); \ __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \ "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \ - ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\ + ex_flags, insn, (u64)_a, (u64)_d, (u64)rm, flags);\ }) -static const uint64_t vals[] = { +static const u64 vals[] = { 0, 1, 2, @@ -188,7 +188,7 @@ static void guest_code(void) guest_test_fastops(uint8_t, "b"); guest_test_fastops(uint16_t, "w"); guest_test_fastops(uint32_t, "l"); - guest_test_fastops(uint64_t, "q"); + guest_test_fastops(u64, "q"); GUEST_DONE(); } diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c index a72f13ae2edb..a0e54af60544 100644 --- a/tools/testing/selftests/kvm/x86/feature_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c @@ -41,8 +41,8 @@ static bool is_quirked_msr(uint32_t msr) static void test_feature_msr(uint32_t msr) { - const uint64_t supported_mask = kvm_get_feature_msr(msr); - uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + const u64 supported_mask = kvm_get_feature_msr(msr); + u64 reset_value = is_quirked_msr(msr) ? supported_mask : 0; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index 00b6e85735dd..df7a94400d3d 100644 --- a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c @@ -30,14 +30,14 @@ static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; -static uint64_t do_sched_yield(uint8_t apic_id) +static u64 do_sched_yield(uint8_t apic_id) { - uint64_t ret; + u64 ret; asm volatile("hypercall_insn:\n\t" ".byte 0xcc,0xcc,0xcc\n\t" : "=a"(ret) - : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id) + : "a"((u64)KVM_HC_SCHED_YIELD), "b"((u64)apic_id) : "memory"); return ret; @@ -47,7 +47,7 @@ static void guest_main(void) { const uint8_t *native_hypercall_insn; const uint8_t *other_hypercall_insn; - uint64_t ret; + u64 ret; if (host_cpu_is_intel) { native_hypercall_insn = vmx_vmcall; @@ -72,7 +72,7 @@ static void guest_main(void) * the "right" hypercall. */ if (quirk_disabled) { - GUEST_ASSERT(ret == (uint64_t)-EFAULT); + GUEST_ASSERT(ret == (u64)-EFAULT); GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE)); } else { diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index 37b1a9f52864..c7e4f08765fb 100644 --- a/tools/testing/selftests/kvm/x86/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h @@ -12,7 +12,7 @@ * KVM to emulate the instruction (e.g. by providing an MMIO address) to * exercise emulation failures. */ -static inline void flds(uint64_t address) +static inline void flds(u64 address) { __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); } @@ -22,7 +22,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; struct kvm_regs regs; uint8_t *insn_bytes; - uint64_t flags; + u64 flags; TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c index 10b1b0ba374e..8e20a03b3329 100644 --- a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c @@ -10,11 +10,11 @@ void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit) { - const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); - const uint64_t valid = BIT_ULL(18) | BIT_ULL(24); - const uint64_t legal = ignored | valid; - uint64_t val = BIT_ULL(bit); - uint64_t actual; + const u64 ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); + const u64 valid = BIT_ULL(18) | BIT_ULL(24); + const u64 legal = ignored | valid; + u64 val = BIT_ULL(bit); + u64 actual; int r; r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val); diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 5f561fcda55a..be7a2a631789 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -18,16 +18,16 @@ static void guest_code(gpa_t in_pg_gpa, gpa_t out_pg_gpa, gva_t out_pg_gva) { - uint64_t *output_gva; + u64 *output_gva; wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa); - output_gva = (uint64_t *)out_pg_gva; + output_gva = (u64 *)out_pg_gva; hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa); - /* TLFS states output will be a uint64_t value */ + /* TLFS states output will be a u64 value */ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES); GUEST_DONE(); @@ -40,7 +40,7 @@ int main(void) struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; - uint64_t *outval; + u64 *outval; struct ucall uc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 0360fa5915c0..7bce2bcc3a73 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -29,8 +29,8 @@ struct msr_data { }; struct hcall_data { - uint64_t control; - uint64_t expect; + u64 control; + u64 expect; bool ud_expected; }; @@ -42,7 +42,7 @@ static bool is_write_only_msr(uint32_t msr) static void guest_msr(struct msr_data *msr) { uint8_t vector = 0; - uint64_t msr_val = 0; + u64 msr_val = 0; GUEST_ASSERT(msr->idx); diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 5369867efac3..beafcfa4043a 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -18,7 +18,7 @@ #define IPI_VECTOR 0xfe -static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; +static volatile u64 ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; struct hv_vpset { u64 format; diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 2de01da9d11d..a4fb63112cac 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -135,10 +135,10 @@ static void set_expected_val(void *addr, u64 val, int vcpu_id) */ static void swap_two_test_pages(gpa_t pte_gva1, gpa_t pte_gva2) { - uint64_t tmp = *(uint64_t *)pte_gva1; + u64 tmp = *(u64 *)pte_gva1; - *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; - *(uint64_t *)pte_gva2 = tmp; + *(u64 *)pte_gva1 = *(u64 *)pte_gva2; + *(u64 *)pte_gva2 = tmp; } /* @@ -583,7 +583,7 @@ int main(int argc, char *argv[]) pthread_t threads[2]; gva_t test_data_page, gva; gpa_t gpa; - uint64_t *pte; + u64 *pte; struct test_data *data; struct ucall uc; int stage = 1, r, i; diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5721e035e38c..5df0cceec03b 100644 --- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -17,7 +17,7 @@ #include "processor.h" struct test_case { - uint64_t kvmclock_base; + u64 kvmclock_base; int64_t realtime_offset; }; @@ -52,7 +52,7 @@ static inline void assert_flags(struct kvm_clock_data *data) static void handle_sync(struct ucall *uc, struct kvm_clock_data *start, struct kvm_clock_data *end) { - uint64_t obs, exp_lo, exp_hi; + u64 obs, exp_lo, exp_hi; obs = uc->args[2]; exp_lo = start->clock; diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 1b805cbdb47b..e49ae65f8171 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -40,7 +40,7 @@ static struct msr_data msrs_to_test[] = { static void test_msr(struct msr_data *msr) { - uint64_t ignored; + u64 ignored; uint8_t vector; PR_MSR(msr); @@ -53,7 +53,7 @@ static void test_msr(struct msr_data *msr) } struct hcall_data { - uint64_t nr; + u64 nr; const char *name; }; @@ -73,7 +73,7 @@ static struct hcall_data hcalls_to_test[] = { static void test_hcall(struct hcall_data *hc) { - uint64_t r; + u64 r; PR_HCALL(hc); r = kvm_hypercall(hc->nr, 0, 0, 0, 0); diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index e45c028d2a7e..9c156cf7db0e 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -67,7 +67,7 @@ static void guest_monitor_wait(void *arg) int main(int argc, char *argv[]) { - uint64_t disabled_quirks; + u64 disabled_quirks; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/nested_set_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c index 0f2102b43629..831380732671 100644 --- a/tools/testing/selftests/kvm/x86/nested_set_state_test.c +++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c @@ -250,14 +250,14 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu) { - uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER); + u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER); vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME); } static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu) { - uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER); + u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER); vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME); } diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index d9238116d30d..db0d44b8fbd6 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -64,7 +64,7 @@ static void check_ia32_tsc_adjust(int64_t max) static void l2_guest_code(void) { - uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + u64 l1_tsc = rdtsc() - TSC_OFFSET_VALUE; wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index b76f29e1e775..b37b0fef7fde 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -19,7 +19,7 @@ /* L2 is scaled up (from L1's perspective) by this factor */ #define L2_SCALE_FACTOR 4ULL -#define TSC_OFFSET_L2 ((uint64_t) -33125236320908) +#define TSC_OFFSET_L2 ((u64)-33125236320908) #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) #define L2_GUEST_STACK_SIZE 64 @@ -35,9 +35,9 @@ enum { USLEEP, UCHECK_L1, UCHECK_L2 }; * measurements, a difference of 1% between the actual and the expected value * is tolerated. */ -static void compare_tsc_freq(uint64_t actual, uint64_t expected) +static void compare_tsc_freq(u64 actual, u64 expected) { - uint64_t tolerance, thresh_low, thresh_high; + u64 tolerance, thresh_low, thresh_high; tolerance = expected / 100; thresh_low = expected - tolerance; @@ -55,7 +55,7 @@ static void compare_tsc_freq(uint64_t actual, uint64_t expected) static void check_tsc_freq(int level) { - uint64_t tsc_start, tsc_end, tsc_freq; + u64 tsc_start, tsc_end, tsc_freq; /* * Reading the TSC twice with about a second's difference should give @@ -154,12 +154,12 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; gva_t guest_gva = 0; - uint64_t tsc_start, tsc_end; - uint64_t tsc_khz; - uint64_t l1_scale_factor; - uint64_t l0_tsc_freq = 0; - uint64_t l1_tsc_freq = 0; - uint64_t l2_tsc_freq = 0; + u64 tsc_start, tsc_end; + u64 tsc_khz; + u64 l1_scale_factor; + u64 l0_tsc_freq = 0; + u64 l1_tsc_freq = 0; + u64 l2_tsc_freq = 0; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index c0d84827f736..70950067b989 100644 --- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -32,7 +32,7 @@ #define RETURN_OPCODE 0xC3 /* Call the specified memory address. */ -static void guest_do_CALL(uint64_t target) +static void guest_do_CALL(u64 target) { ((void (*)(void)) target)(); } @@ -46,14 +46,14 @@ static void guest_do_CALL(uint64_t target) */ void guest_code(void) { - uint64_t hpage_1 = HPAGE_GVA; - uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512); - uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512); + u64 hpage_1 = HPAGE_GVA; + u64 hpage_2 = hpage_1 + (PAGE_SIZE * 512); + u64 hpage_3 = hpage_2 + (PAGE_SIZE * 512); - READ_ONCE(*(uint64_t *)hpage_1); + READ_ONCE(*(u64 *)hpage_1); GUEST_SYNC(1); - READ_ONCE(*(uint64_t *)hpage_2); + READ_ONCE(*(u64 *)hpage_2); GUEST_SYNC(2); guest_do_CALL(hpage_1); @@ -62,10 +62,10 @@ void guest_code(void) guest_do_CALL(hpage_3); GUEST_SYNC(4); - READ_ONCE(*(uint64_t *)hpage_1); + READ_ONCE(*(u64 *)hpage_1); GUEST_SYNC(5); - READ_ONCE(*(uint64_t *)hpage_3); + READ_ONCE(*(u64 *)hpage_3); GUEST_SYNC(6); } @@ -107,7 +107,7 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t nr_bytes; + u64 nr_bytes; void *hva; int r; diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c index 9cbf283ebc55..86d1ab0db1e8 100644 --- a/tools/testing/selftests/kvm/x86/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c @@ -23,7 +23,7 @@ static void guest_code(void) { - uint64_t msr_platform_info; + u64 msr_platform_info; uint8_t vector; GUEST_SYNC(true); @@ -42,7 +42,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t msr_platform_info; + u64 msr_platform_info; struct ucall uc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 3eaa216b96c0..16fabcf1eabd 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -90,7 +90,7 @@ static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, uint8_t pmu_version, - uint64_t perf_capabilities) + u64 perf_capabilities) { struct kvm_vm *vm; @@ -155,7 +155,7 @@ static uint8_t guest_get_pmu_version(void) */ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) { - uint64_t count; + u64 count; count = _rdpmc(pmc); if (!(hardware_pmu_arch_events & BIT(idx))) @@ -256,7 +256,7 @@ do { \ } while (0) static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, - uint32_t ctrl_msr, uint64_t ctrl_msr_value) + uint32_t ctrl_msr, u64 ctrl_msr_value) { GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); @@ -289,7 +289,7 @@ static void guest_test_arch_event(uint8_t idx) GUEST_ASSERT(nr_gp_counters); for (i = 0; i < nr_gp_counters; i++) { - uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | + u64 eventsel = ARCH_PERFMON_EVENTSEL_OS | ARCH_PERFMON_EVENTSEL_ENABLE | intel_pmu_arch_events[idx]; @@ -328,7 +328,7 @@ static void guest_test_arch_events(void) GUEST_DONE(); } -static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, +static void test_arch_events(uint8_t pmu_version, u64 perf_capabilities, uint8_t length, uint32_t unavailable_mask) { struct kvm_vcpu *vcpu; @@ -374,10 +374,10 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ msr, expected, val); static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, - uint64_t expected_val) + u64 expected_val) { uint8_t vector; - uint64_t val; + u64 val; vector = rdpmc_safe(rdpmc_idx, &val); GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); @@ -404,7 +404,7 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters * TODO: Test a value that validates full-width writes and the * width of the counters. */ - const uint64_t test_val = 0xffff; + const u64 test_val = 0xffff; const uint32_t msr = base_msr + i; /* @@ -418,12 +418,12 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are * unsupported, i.e. doesn't #GP and reads back '0'. */ - const uint64_t expected_val = expect_success ? test_val : 0; + const u64 expected_val = expect_success ? test_val : 0; const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1; uint32_t rdpmc_idx; uint8_t vector; - uint64_t val; + u64 val; vector = wrmsr_safe(msr, test_val); GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); @@ -477,7 +477,7 @@ static void guest_test_gp_counters(void) * counters, of which there are none. */ if (pmu_version > 1) { - uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); + u64 global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); if (nr_gp_counters) GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); @@ -495,7 +495,7 @@ static void guest_test_gp_counters(void) GUEST_DONE(); } -static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, +static void test_gp_counters(uint8_t pmu_version, u64 perf_capabilities, uint8_t nr_gp_counters) { struct kvm_vcpu *vcpu; @@ -514,7 +514,7 @@ static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, static void guest_test_fixed_counters(void) { - uint64_t supported_bitmask = 0; + u64 supported_bitmask = 0; uint8_t nr_fixed_counters = 0; uint8_t i; @@ -534,7 +534,7 @@ static void guest_test_fixed_counters(void) for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { uint8_t vector; - uint64_t val; + u64 val; if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, @@ -561,7 +561,7 @@ static void guest_test_fixed_counters(void) GUEST_DONE(); } -static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, +static void test_fixed_counters(uint8_t pmu_version, u64 perf_capabilities, uint8_t nr_fixed_counters, uint32_t supported_bitmask) { @@ -590,7 +590,7 @@ static void test_intel_counters(void) uint8_t v, j; uint32_t k; - const uint64_t perf_caps[] = { + const u64 perf_caps[] = { 0, PMU_CAP_FW_WRITES, }; diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index 93b61c077991..88857670ab93 100644 --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -53,11 +53,11 @@ static const struct __kvm_pmu_event_filter base_event_filter = { }; struct { - uint64_t loads; - uint64_t stores; - uint64_t loads_stores; - uint64_t branches_retired; - uint64_t instructions_retired; + u64 loads; + u64 stores; + u64 loads_stores; + u64 branches_retired; + u64 instructions_retired; } pmc_results; /* @@ -75,9 +75,9 @@ static void guest_gp_handler(struct ex_regs *regs) * * Return on success. GUEST_SYNC(0) on error. */ -static void check_msr(uint32_t msr, uint64_t bits_to_flip) +static void check_msr(uint32_t msr, u64 bits_to_flip) { - uint64_t v = rdmsr(msr) ^ bits_to_flip; + u64 v = rdmsr(msr) ^ bits_to_flip; wrmsr(msr, v); if (rdmsr(msr) != v) @@ -91,8 +91,8 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip) static void run_and_measure_loop(uint32_t msr_base) { - const uint64_t branches_retired = rdmsr(msr_base + 0); - const uint64_t insn_retired = rdmsr(msr_base + 1); + const u64 branches_retired = rdmsr(msr_base + 0); + const u64 insn_retired = rdmsr(msr_base + 1); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); @@ -147,7 +147,7 @@ static void amd_guest_code(void) * Run the VM to the next GUEST_SYNC(value), and return the value passed * to the sync. Any other exit from the guest is fatal. */ -static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) +static u64 run_vcpu_to_sync(struct kvm_vcpu *vcpu) { struct ucall uc; @@ -161,7 +161,7 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) { - uint64_t r; + u64 r; memset(&pmc_results, 0, sizeof(pmc_results)); sync_global_to_guest(vcpu->vm, pmc_results); @@ -182,7 +182,7 @@ static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) */ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) { - uint64_t r; + u64 r; vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); r = run_vcpu_to_sync(vcpu); @@ -195,7 +195,7 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) * Remove the first occurrence of 'event' (if any) from the filter's * event list. */ -static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) +static void remove_event(struct __kvm_pmu_event_filter *f, u64 event) { bool found = false; int i; @@ -212,8 +212,8 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) #define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ do { \ - uint64_t br = pmc_results.branches_retired; \ - uint64_t ir = pmc_results.instructions_retired; \ + u64 br = pmc_results.branches_retired; \ + u64 ir = pmc_results.instructions_retired; \ bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \ br >= NUM_BRANCHES : br == NUM_BRANCHES; \ \ @@ -228,8 +228,8 @@ do { \ #define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \ do { \ - uint64_t br = pmc_results.branches_retired; \ - uint64_t ir = pmc_results.instructions_retired; \ + u64 br = pmc_results.branches_retired; \ + u64 ir = pmc_results.instructions_retired; \ \ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \ __func__, br); \ @@ -421,9 +421,9 @@ static void masked_events_guest_test(uint32_t msr_base) * The actual value of the counters don't determine the outcome of * the test. Only that they are zero or non-zero. */ - const uint64_t loads = rdmsr(msr_base + 0); - const uint64_t stores = rdmsr(msr_base + 1); - const uint64_t loads_stores = rdmsr(msr_base + 2); + const u64 loads = rdmsr(msr_base + 0); + const u64 stores = rdmsr(msr_base + 1); + const u64 loads_stores = rdmsr(msr_base + 2); int val; @@ -476,7 +476,7 @@ static void amd_masked_events_guest_code(void) } static void run_masked_events_test(struct kvm_vcpu *vcpu, - const uint64_t masked_events[], + const u64 masked_events[], const int nmasked_events) { struct __kvm_pmu_event_filter f = { @@ -485,7 +485,7 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, }; - memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); + memcpy(f.events, masked_events, sizeof(u64) * nmasked_events); test_with_filter(vcpu, &f); } @@ -494,10 +494,10 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, #define ALLOW_LOADS_STORES BIT(2) struct masked_events_test { - uint64_t intel_events[MAX_TEST_EVENTS]; - uint64_t intel_event_end; - uint64_t amd_events[MAX_TEST_EVENTS]; - uint64_t amd_event_end; + u64 intel_events[MAX_TEST_EVENTS]; + u64 intel_event_end; + u64 amd_events[MAX_TEST_EVENTS]; + u64 amd_event_end; const char *msg; uint32_t flags; }; @@ -582,9 +582,9 @@ const struct masked_events_test test_cases[] = { }; static int append_test_events(const struct masked_events_test *test, - uint64_t *events, int nevents) + u64 *events, int nevents) { - const uint64_t *evts; + const u64 *evts; int i; evts = use_intel_pmu() ? test->intel_events : test->amd_events; @@ -603,7 +603,7 @@ static bool bool_eq(bool a, bool b) return a == b; } -static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, +static void run_masked_events_tests(struct kvm_vcpu *vcpu, u64 *events, int nevents) { int ntests = ARRAY_SIZE(test_cases); @@ -630,7 +630,7 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, } } -static void add_dummy_events(uint64_t *events, int nevents) +static void add_dummy_events(u64 *events, int nevents) { int i; @@ -650,7 +650,7 @@ static void add_dummy_events(uint64_t *events, int nevents) static void test_masked_events(struct kvm_vcpu *vcpu) { int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS; - uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; + u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; /* Run the test cases against a sparse PMU event filter. */ run_masked_events_tests(vcpu, events, 0); @@ -668,7 +668,7 @@ static int set_pmu_event_filter(struct kvm_vcpu *vcpu, return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); } -static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, +static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, u64 event, uint32_t flags, uint32_t action) { struct __kvm_pmu_event_filter f = { @@ -687,7 +687,7 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu) { uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); struct __kvm_pmu_event_filter f; - uint64_t e = ~0ul; + u64 e = ~0ul; int r; /* @@ -745,8 +745,8 @@ static void intel_run_fixed_counter_guest_code(uint8_t idx) } } -static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, - uint32_t action, uint32_t bitmap) +static u64 test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, + uint32_t action, uint32_t bitmap) { struct __kvm_pmu_event_filter f = { .action = action, @@ -757,9 +757,9 @@ static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, return run_vcpu_to_sync(vcpu); } -static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, - uint32_t action, - uint32_t bitmap) +static u64 test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, + uint32_t action, + uint32_t bitmap) { struct __kvm_pmu_event_filter f = base_event_filter; @@ -775,7 +775,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, { unsigned int i; uint32_t bitmap; - uint64_t count; + u64 count; TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, "Invalid nr_fixed_counters"); diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 1969f4ab9b28..2e3a68837d0e 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -23,8 +23,8 @@ #include #define BASE_DATA_SLOT 10 -#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) -#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) +#define BASE_DATA_GPA ((u64)(1ull << 32)) +#define PER_CPU_DATA_SIZE ((u64)(SZ_2M + PAGE_SIZE)) /* Horrific macro so that the line info is captured accurately :-( */ #define memcmp_g(gpa, pattern, size) \ @@ -38,7 +38,7 @@ do { \ pattern, i, gpa + i, mem[i]); \ } while (0) -static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) +static void memcmp_h(uint8_t *mem, u64 gpa, uint8_t pattern, size_t size) { size_t i; @@ -70,13 +70,13 @@ enum ucall_syncs { SYNC_PRIVATE, }; -static void guest_sync_shared(uint64_t gpa, uint64_t size, +static void guest_sync_shared(u64 gpa, u64 size, uint8_t current_pattern, uint8_t new_pattern) { GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); } -static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) +static void guest_sync_private(u64 gpa, u64 size, uint8_t pattern) { GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); } @@ -86,10 +86,10 @@ static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) #define MAP_GPA_SHARED BIT(1) #define MAP_GPA_DO_FALLOCATE BIT(2) -static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, +static void guest_map_mem(u64 gpa, u64 size, bool map_shared, bool do_fallocate) { - uint64_t flags = MAP_GPA_SET_ATTRIBUTES; + u64 flags = MAP_GPA_SET_ATTRIBUTES; if (map_shared) flags |= MAP_GPA_SHARED; @@ -98,19 +98,19 @@ static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, kvm_hypercall_map_gpa_range(gpa, size, flags); } -static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) +static void guest_map_shared(u64 gpa, u64 size, bool do_fallocate) { guest_map_mem(gpa, size, true, do_fallocate); } -static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) +static void guest_map_private(u64 gpa, u64 size, bool do_fallocate) { guest_map_mem(gpa, size, false, do_fallocate); } struct { - uint64_t offset; - uint64_t size; + u64 offset; + u64 size; } static const test_ranges[] = { GUEST_STAGE(0, PAGE_SIZE), GUEST_STAGE(0, SZ_2M), @@ -119,11 +119,11 @@ struct { GUEST_STAGE(SZ_2M, PAGE_SIZE), }; -static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) +static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate) { const uint8_t def_p = 0xaa; const uint8_t init_p = 0xcc; - uint64_t j; + u64 j; int i; /* Memory should be shared by default. */ @@ -134,8 +134,8 @@ static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - uint64_t gpa = base_gpa + test_ranges[i].offset; - uint64_t size = test_ranges[i].size; + u64 gpa = base_gpa + test_ranges[i].offset; + u64 size = test_ranges[i].size; uint8_t p1 = 0x11; uint8_t p2 = 0x22; uint8_t p3 = 0x33; @@ -214,10 +214,10 @@ skip: } } -static void guest_punch_hole(uint64_t gpa, uint64_t size) +static void guest_punch_hole(u64 gpa, u64 size) { /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ - uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; + u64 flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; kvm_hypercall_map_gpa_range(gpa, size, flags); } @@ -227,7 +227,7 @@ static void guest_punch_hole(uint64_t gpa, uint64_t size) * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating * (subsequent fault) should zero memory. */ -static void guest_test_punch_hole(uint64_t base_gpa, bool precise) +static void guest_test_punch_hole(u64 base_gpa, bool precise) { const uint8_t init_p = 0xcc; int i; @@ -239,8 +239,8 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise) guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - uint64_t gpa = base_gpa + test_ranges[i].offset; - uint64_t size = test_ranges[i].size; + u64 gpa = base_gpa + test_ranges[i].offset; + u64 size = test_ranges[i].size; /* * Free all memory before each iteration, even for the !precise @@ -268,7 +268,7 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise) } } -static void guest_code(uint64_t base_gpa) +static void guest_code(u64 base_gpa) { /* * Run the conversion test twice, with and without doing fallocate() on @@ -289,8 +289,8 @@ static void guest_code(uint64_t base_gpa) static void handle_exit_hypercall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint64_t gpa = run->hypercall.args[0]; - uint64_t size = run->hypercall.args[1] * PAGE_SIZE; + u64 gpa = run->hypercall.args[0]; + u64 size = run->hypercall.args[1] * PAGE_SIZE; bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; @@ -337,7 +337,7 @@ static void *__test_mem_conversions(void *__vcpu) case UCALL_ABORT: REPORT_GUEST_ASSERT(uc); case UCALL_SYNC: { - uint64_t gpa = uc.args[1]; + u64 gpa = uc.args[1]; size_t size = uc.args[2]; size_t i; @@ -402,7 +402,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); for (i = 0; i < nr_vcpus; i++) { - uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; + u64 gpa = BASE_DATA_GPA + i * per_cpu_size; vcpu_args_set(vcpus[i], 1, gpa); diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c index 13e72fcec8dd..925040f394de 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c @@ -17,12 +17,12 @@ #define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) #define EXITS_TEST_SLOT 10 -static uint64_t guest_repeatedly_read(void) +static u64 guest_repeatedly_read(void) { - volatile uint64_t value; + volatile u64 value; while (true) - value = *((uint64_t *) EXITS_TEST_GVA); + value = *((u64 *)EXITS_TEST_GVA); return value; } @@ -72,7 +72,7 @@ static void test_private_access_memslot_deleted(void) vm_mem_region_delete(vm, EXITS_TEST_SLOT); pthread_join(vm_thread, &thread_return); - exit_reason = (uint32_t)(uint64_t)thread_return; + exit_reason = (uint32_t)(u64)thread_return; TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index f4095a3d1278..8e654cc9ab16 100644 --- a/tools/testing/selftests/kvm/x86/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -46,9 +46,9 @@ do { \ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) -static uint64_t calc_supported_cr4_feature_bits(void) +static u64 calc_supported_cr4_feature_bits(void) { - uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; + u64 cr4 = KVM_ALWAYS_ALLOWED_CR4; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; @@ -74,7 +74,7 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) +static void test_cr_bits(struct kvm_vcpu *vcpu, u64 cr4) { struct kvm_sregs sregs; int rc, i; diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index b238615196ad..eec093819ff3 100644 --- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -34,7 +34,7 @@ static int __sev_ioctl(int vm_fd, int cmd_id, void *data) { struct kvm_sev_cmd cmd = { .id = cmd_id, - .data = (uint64_t)data, + .data = (u64)data, .sev_fd = open_sev_dev_path_or_exit(), }; int ret; @@ -104,7 +104,7 @@ void test_flags(uint32_t vm_type) "invalid flag"); } -void test_features(uint32_t vm_type, uint64_t supported_features) +void test_features(uint32_t vm_type, u64 supported_features) { int i; diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index dcb3aee699b9..f9be10d9b92d 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -15,7 +15,7 @@ static void guest_sev_test_msr(uint32_t msr) { - uint64_t val = rdmsr(msr); + u64 val = rdmsr(msr); wrmsr(msr, val); GUEST_ASSERT(val == rdmsr(msr)); @@ -23,7 +23,7 @@ static void guest_sev_test_msr(uint32_t msr) #define guest_sev_test_reg(reg) \ do { \ - uint64_t val = get_##reg(); \ + u64 val = get_##reg(); \ \ set_##reg(val); \ GUEST_ASSERT(val == get_##reg()); \ @@ -42,7 +42,7 @@ static void guest_sev_test_regs(void) static void guest_snp_code(void) { - uint64_t sev_msr = rdmsr(MSR_AMD64_SEV); + u64 sev_msr = rdmsr(MSR_AMD64_SEV); GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED); GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); @@ -104,7 +104,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) abort(); } -static void test_sync_vmsa(uint32_t type, uint64_t policy) +static void test_sync_vmsa(uint32_t type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -150,7 +150,7 @@ static void test_sync_vmsa(uint32_t type, uint64_t policy) kvm_vm_free(vm); } -static void test_sev(void *guest_code, uint32_t type, uint64_t policy) +static void test_sev(void *guest_code, uint32_t type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -201,7 +201,7 @@ static void guest_shutdown_code(void) __asm__ __volatile__("ud2"); } -static void test_sev_shutdown(uint32_t type, uint64_t policy) +static void test_sev_shutdown(uint32_t type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -218,7 +218,7 @@ static void test_sev_shutdown(uint32_t type, uint64_t policy) kvm_vm_free(vm); } -static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy) +static void test_sev_smoke(void *guest, uint32_t type, u64 policy) { const u64 xf_mask = XFEATURE_MASK_X87_AVX; diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index 0e8aec568010..27cded643699 100644 --- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c @@ -20,8 +20,8 @@ static void guest_code(bool tdp_enabled) { - uint64_t error_code; - uint64_t vector; + u64 error_code; + u64 vector; vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); @@ -47,8 +47,8 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint64_t *hva; - uint64_t gpa; + u64 *hva; + u64 gpa; int rc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 3ede3ed8ae5c..39e89350c2e7 100644 --- a/tools/testing/selftests/kvm/x86/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -40,7 +40,7 @@ uint8_t smi_handler[] = { 0x0f, 0xaa, /* rsm */ }; -static inline void sync_with_host(uint64_t phase) +static inline void sync_with_host(u64 phase) { asm volatile("in $" XSTR(SYNC_PORT)", %%al \n" : "+a" (phase)); @@ -65,7 +65,7 @@ static void guest_code(void *arg) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + u64 apicbase = rdmsr(MSR_IA32_APICBASE); struct svm_test_data *svm = arg; struct vmx_pages *vmx_pages = arg; diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 6797da4bd9d9..62e14843e5af 100644 --- a/tools/testing/selftests/kvm/x86/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -144,7 +144,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) GUEST_SYNC(1); if (this_cpu_has(X86_FEATURE_XSAVE)) { - uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + u64 supported_xcr0 = this_cpu_supported_xcr0(); uint8_t buffer[PAGE_SIZE]; memset(buffer, 0xcc, sizeof(buffer)); @@ -172,8 +172,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg) } if (this_cpu_has(X86_FEATURE_MPX)) { - uint64_t bounds[2] = { 10, 0xffffffffull }; - uint64_t output[2] = { }; + u64 bounds[2] = { 10, 0xffffffffull }; + u64 output[2] = { }; GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); @@ -257,7 +257,7 @@ void check_nested_state(int stage, struct kvm_x86_state *state) int main(int argc, char *argv[]) { - uint64_t *xstate_bv, saved_xstate_bv; + u64 *xstate_bv, saved_xstate_bv; gva_t nested_gva = 0; struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index c739d071d3b3..5fefb319d9be 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -76,7 +76,7 @@ static void l2_guest_code_nmi(void) ud2(); } -static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt) +static void l1_guest_code(struct svm_test_data *svm, u64 is_nmi, u64 idt_alt) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -168,7 +168,7 @@ static void run_test(bool is_nmi) } else { idt_alt_vm = 0; } - vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm); + vcpu_args_set(vcpu, 3, svm_gva, (u64)is_nmi, (u64)idt_alt_vm); memset(&debug, 0, sizeof(debug)); vcpu_guest_debug_set(vcpu, &debug); diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c index 12b0964f4f13..91583969a14f 100644 --- a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c @@ -95,7 +95,7 @@ int main(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t val; + u64 val; ksft_print_header(); ksft_set_plan(5); diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c index 59c7304f805e..59da8d4da607 100644 --- a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c @@ -21,10 +21,10 @@ pthread_spinlock_t create_lock; #define TEST_TSC_KHZ 2345678UL #define TEST_TSC_OFFSET 200000000 -uint64_t tsc_sync; +u64 tsc_sync; static void guest_code(void) { - uint64_t start_tsc, local_tsc, tmp; + u64 start_tsc, local_tsc, tmp; start_tsc = rdtsc(); do { diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 1e5e564523b3..27aae6c92a38 100644 --- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -45,7 +45,7 @@ #define MCI_CTL2_RESERVED_BIT BIT_ULL(29) -static uint64_t supported_mcg_caps; +static u64 supported_mcg_caps; /* * Record states about the injected UCNA. @@ -53,30 +53,30 @@ static uint64_t supported_mcg_caps; * handler. Variables without the 'i_' prefixes are recorded in guest main * execution thread. */ -static volatile uint64_t i_ucna_rcvd; -static volatile uint64_t i_ucna_addr; -static volatile uint64_t ucna_addr; -static volatile uint64_t ucna_addr2; +static volatile u64 i_ucna_rcvd; +static volatile u64 i_ucna_addr; +static volatile u64 ucna_addr; +static volatile u64 ucna_addr2; struct thread_params { struct kvm_vcpu *vcpu; - uint64_t *p_i_ucna_rcvd; - uint64_t *p_i_ucna_addr; - uint64_t *p_ucna_addr; - uint64_t *p_ucna_addr2; + u64 *p_i_ucna_rcvd; + u64 *p_i_ucna_addr; + u64 *p_ucna_addr; + u64 *p_ucna_addr2; }; static void verify_apic_base_addr(void) { - uint64_t msr = rdmsr(MSR_IA32_APICBASE); - uint64_t base = GET_APIC_BASE(msr); + u64 msr = rdmsr(MSR_IA32_APICBASE); + u64 base = GET_APIC_BASE(msr); GUEST_ASSERT(base == APIC_DEFAULT_GPA); } static void ucna_injection_guest_code(void) { - uint64_t ctl2; + u64 ctl2; verify_apic_base_addr(); xapic_enable(); @@ -106,7 +106,7 @@ static void ucna_injection_guest_code(void) static void cmci_disabled_guest_code(void) { - uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); GUEST_DONE(); @@ -114,7 +114,7 @@ static void cmci_disabled_guest_code(void) static void cmci_enabled_guest_code(void) { - uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT); GUEST_DONE(); @@ -145,14 +145,15 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) printf("vCPU received GP in guest.\n"); } -static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) { +static void inject_ucna(struct kvm_vcpu *vcpu, u64 addr) +{ /* * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in * the IA32_MCi_STATUS register. * MSCOD=1 (BIT[16] - MscodDataRdErr). * MCACOD=0x0090 (Memory controller error format, channel 0) */ - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + u64 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090; struct kvm_x86_mce mce = {}; mce.status = status; @@ -216,10 +217,10 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par { struct kvm_vm *vm = vcpu->vm; params->vcpu = vcpu; - params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd); - params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr); - params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr); - params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2); + params->p_i_ucna_rcvd = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_rcvd); + params->p_i_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_addr); + params->p_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr); + params->p_ucna_addr2 = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr2); run_ucna_injection(params); @@ -242,7 +243,7 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) { - uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; + u64 mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; if (enable_cmci_p) mcg_caps |= MCG_CMCI_P; diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 8463a9956410..b673c3450886 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -66,7 +66,7 @@ struct kvm_msr_filter filter_gs = { }, }; -static uint64_t msr_non_existent_data; +static u64 msr_non_existent_data; static int guest_exception_count; static u32 msr_reads, msr_writes; @@ -142,7 +142,7 @@ struct kvm_msr_filter no_filter_deny = { * Note: Force test_rdmsr() to not be inlined to prevent the labels, * rdmsr_start and rdmsr_end, from being defined multiple times. */ -static noinline uint64_t test_rdmsr(uint32_t msr) +static noinline u64 test_rdmsr(uint32_t msr) { uint32_t a, d; @@ -151,14 +151,14 @@ static noinline uint64_t test_rdmsr(uint32_t msr) __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - return a | ((uint64_t) d << 32); + return a | ((u64)d << 32); } /* * Note: Force test_wrmsr() to not be inlined to prevent the labels, * wrmsr_start and wrmsr_end, from being defined multiple times. */ -static noinline void test_wrmsr(uint32_t msr, uint64_t value) +static noinline void test_wrmsr(uint32_t msr, u64 value) { uint32_t a = value; uint32_t d = value >> 32; @@ -176,7 +176,7 @@ extern char wrmsr_start, wrmsr_end; * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, * rdmsr_start and rdmsr_end, from being defined multiple times. */ -static noinline uint64_t test_em_rdmsr(uint32_t msr) +static noinline u64 test_em_rdmsr(uint32_t msr) { uint32_t a, d; @@ -185,14 +185,14 @@ static noinline uint64_t test_em_rdmsr(uint32_t msr) __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - return a | ((uint64_t) d << 32); + return a | ((u64)d << 32); } /* * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, * wrmsr_start and wrmsr_end, from being defined multiple times. */ -static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) +static noinline void test_em_wrmsr(uint32_t msr, u64 value) { uint32_t a = value; uint32_t d = value >> 32; @@ -208,7 +208,7 @@ extern char em_wrmsr_start, em_wrmsr_end; static void guest_code_filter_allow(void) { - uint64_t data; + u64 data; /* * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS. @@ -328,7 +328,7 @@ static void guest_code_filter_deny(void) static void guest_code_permission_bitmap(void) { - uint64_t data; + u64 data; data = test_rdmsr(MSR_FS_BASE); GUEST_ASSERT(data == MSR_FS_BASE); @@ -464,7 +464,7 @@ static void process_ucall_done(struct kvm_vcpu *vcpu) uc.cmd, UCALL_DONE); } -static uint64_t process_ucall(struct kvm_vcpu *vcpu) +static u64 process_ucall(struct kvm_vcpu *vcpu) { struct ucall uc = {}; @@ -502,7 +502,7 @@ static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu, process_wrmsr(vcpu, msr_index); } -static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu) +static u64 run_guest_then_process_ucall(struct kvm_vcpu *vcpu) { vcpu_run(vcpu); return process_ucall(vcpu); @@ -519,7 +519,7 @@ KVM_ONE_VCPU_TEST_SUITE(user_msr); KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) { struct kvm_vm *vm = vcpu->vm; - uint64_t cmd; + u64 cmd; int rc; rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index 90720b6205f4..d61c8c69ade3 100644 --- a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c @@ -13,10 +13,10 @@ #include "vmx.h" static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - uint64_t mask) + u64 mask) { - uint64_t val = vcpu_get_msr(vcpu, msr_index); - uint64_t bit; + u64 val = vcpu_get_msr(vcpu, msr_index); + u64 bit; mask &= val; @@ -27,10 +27,10 @@ static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, } static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - uint64_t mask) + u64 mask) { - uint64_t val = vcpu_get_msr(vcpu, msr_index); - uint64_t bit; + u64 val = vcpu_get_msr(vcpu, msr_index); + u64 bit; mask = ~mask | val; @@ -68,10 +68,10 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) } static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, - uint64_t msr_bit, + u64 msr_bit, struct kvm_x86_cpu_feature feature) { - uint64_t val; + u64 val; vcpu_clear_cpuid_feature(vcpu, feature); @@ -90,7 +90,7 @@ static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) { - uint64_t supported_bits = FEAT_CTL_LOCKED | + u64 supported_bits = FEAT_CTL_LOCKED | FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | FEAT_CTL_SGX_LC_ENABLED | diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index 7ff6f62e20a3..1f3638c6ee14 100644 --- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -52,7 +52,7 @@ static const union perf_capabilities format_caps = { .pebs_format = -1, }; -static void guest_test_perf_capabilities_gp(uint64_t val) +static void guest_test_perf_capabilities_gp(u64 val) { uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); @@ -61,7 +61,7 @@ static void guest_test_perf_capabilities_gp(uint64_t val) val, ex_str(vector)); } -static void guest_code(uint64_t current_val) +static void guest_code(u64 current_val) { int i; @@ -129,7 +129,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code) KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) { - const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; + const u64 fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; int bit; for_each_set_bit(bit, &fungible_caps, 64) { @@ -148,7 +148,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) */ KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code) { - const uint64_t reserved_caps = (~host_cap.capabilities | + const u64 reserved_caps = (~host_cap.capabilities | immutable_caps.capabilities) & ~format_caps.capabilities; union perf_capabilities val = host_cap; @@ -210,7 +210,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) { - uint64_t val; + u64 val; int i, r; vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index 0b10dfbfa3ea..97e07b7bc3dd 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -48,16 +48,16 @@ * Incremented in the IPI handler. Provides evidence to the sender that the IPI * arrived at the destination */ -static volatile uint64_t ipis_rcvd; +static volatile u64 ipis_rcvd; /* Data struct shared between host main thread and vCPUs */ struct test_data_page { uint32_t halter_apic_id; - volatile uint64_t hlt_count; - volatile uint64_t wake_count; - uint64_t ipis_sent; - uint64_t migrations_attempted; - uint64_t migrations_completed; + volatile u64 hlt_count; + volatile u64 wake_count; + u64 ipis_sent; + u64 migrations_attempted; + u64 migrations_completed; uint32_t icr; uint32_t icr2; uint32_t halter_tpr; @@ -75,13 +75,13 @@ struct test_data_page { struct thread_params { struct test_data_page *data; struct kvm_vcpu *vcpu; - uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ + u64 *pipis_rcvd; /* host address of ipis_rcvd global */ }; void verify_apic_base_addr(void) { - uint64_t msr = rdmsr(MSR_IA32_APICBASE); - uint64_t base = GET_APIC_BASE(msr); + u64 msr = rdmsr(MSR_IA32_APICBASE); + u64 base = GET_APIC_BASE(msr); GUEST_ASSERT(base == APIC_DEFAULT_GPA); } @@ -125,12 +125,12 @@ static void guest_ipi_handler(struct ex_regs *regs) static void sender_guest_code(struct test_data_page *data) { - uint64_t last_wake_count; - uint64_t last_hlt_count; - uint64_t last_ipis_rcvd_count; + u64 last_wake_count; + u64 last_hlt_count; + u64 last_ipis_rcvd_count; uint32_t icr_val; uint32_t icr2_val; - uint64_t tsc_start; + u64 tsc_start; verify_apic_base_addr(); xapic_enable(); @@ -248,7 +248,7 @@ static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) } void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, - uint64_t *pipis_rcvd) + u64 *pipis_rcvd) { long pages_not_moved; unsigned long nodemask = 0; @@ -259,9 +259,9 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, int i; int from, to; unsigned long bit; - uint64_t hlt_count; - uint64_t wake_count; - uint64_t ipis_sent; + u64 hlt_count; + u64 wake_count; + u64 ipis_sent; fprintf(stderr, "Calling migrate_pages every %d microseconds\n", delay_usecs); @@ -398,7 +398,7 @@ int main(int argc, char *argv[]) pthread_t threads[2]; struct thread_params params[2]; struct kvm_vm *vm; - uint64_t *pipis_rcvd; + u64 *pipis_rcvd; get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); if (run_secs <= 0) @@ -423,7 +423,7 @@ int main(int argc, char *argv[]) vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); - pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); + pipis_rcvd = (u64 *)addr_gva2hva(vm, (u64)&ipis_rcvd); params[0].pipis_rcvd = pipis_rcvd; params[1].pipis_rcvd = pipis_rcvd; diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 0c5e12f5f14e..e71471ac5bd5 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -23,7 +23,7 @@ static void xapic_guest_code(void) xapic_enable(); while (1) { - uint64_t val = (u64)xapic_read_reg(APIC_IRR) | + u64 val = (u64)xapic_read_reg(APIC_IRR) | (u64)xapic_read_reg(APIC_IRR + 0x10) << 32; xapic_write_reg(APIC_ICR2, val >> 32); @@ -43,7 +43,7 @@ static void x2apic_guest_code(void) x2apic_enable(); do { - uint64_t val = x2apic_read_reg(APIC_IRR) | + u64 val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; if (val & X2APIC_RSVD_BITS_MASK) { @@ -56,12 +56,12 @@ static void x2apic_guest_code(void) } while (1); } -static void ____test_icr(struct xapic_vcpu *x, uint64_t val) +static void ____test_icr(struct xapic_vcpu *x, u64 val) { struct kvm_vcpu *vcpu = x->vcpu; struct kvm_lapic_state xapic; struct ucall uc; - uint64_t icr; + u64 icr; /* * Tell the guest what ICR value to write. Use the IRR to pass info, @@ -93,7 +93,7 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } -static void __test_icr(struct xapic_vcpu *x, uint64_t val) +static void __test_icr(struct xapic_vcpu *x, u64 val) { /* * The BUSY bit is reserved on both AMD and Intel, but only AMD treats @@ -109,7 +109,7 @@ static void __test_icr(struct xapic_vcpu *x, uint64_t val) static void test_icr(struct xapic_vcpu *x) { struct kvm_vcpu *vcpu = x->vcpu; - uint64_t icr, i, j; + u64 icr, i, j; icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED; for (i = 0; i <= 0xff; i++) @@ -142,7 +142,7 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK); } -static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base) +static void __test_apic_id(struct kvm_vcpu *vcpu, u64 apic_base) { uint32_t apic_id, expected; struct kvm_lapic_state xapic; @@ -172,7 +172,7 @@ static void test_apic_id(void) { const uint32_t NR_VCPUS = 3; struct kvm_vcpu *vcpus[NR_VCPUS]; - uint64_t apic_base; + u64 apic_base; struct kvm_vm *vm; int i; diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c index 3862134d9d40..14052e94d553 100644 --- a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c @@ -95,7 +95,7 @@ static uint8_t tpr_guest_ppr_get(void) static uint8_t tpr_guest_cr8_get(void) { - uint64_t cr8; + u64 cr8; asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8)); diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index d038c1571729..40dc9e6b3fad 100644 --- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -21,7 +21,7 @@ */ #define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ do { \ - uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ + u64 __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ \ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ __supported == ((xfeatures) | (dependencies)), \ @@ -39,7 +39,7 @@ do { \ */ #define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \ do { \ - uint64_t __supported = (supported_xcr0) & (xfeatures); \ + u64 __supported = (supported_xcr0) & (xfeatures); \ \ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ "supported = 0x%lx, xfeatures = 0x%llx", \ @@ -48,8 +48,8 @@ do { \ static void guest_code(void) { - uint64_t initial_xcr0; - uint64_t supported_xcr0; + u64 initial_xcr0; + u64 supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 23909b501ac2..83aab75ac792 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -117,14 +117,14 @@ struct pvclock_wall_clock { struct vcpu_runstate_info { uint32_t state; - uint64_t state_entry_time; - uint64_t time[5]; /* Extra field for overrun check */ + u64 state_entry_time; + u64 time[5]; /* Extra field for overrun check */ }; struct compat_vcpu_runstate_info { uint32_t state; - uint64_t state_entry_time; - uint64_t time[5]; + u64 state_entry_time; + u64 time[5]; } __attribute__((__packed__)); struct arch_vcpu_info { @@ -658,7 +658,7 @@ int main(int argc, char *argv[]) printf("Testing RUNSTATE_ADJUST\n"); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; memset(&rst.u, 0, sizeof(rst.u)); - rst.u.runstate.state = (uint64_t)-1; + rst.u.runstate.state = (u64)-1; rst.u.runstate.time_blocked = 0x5a - rs->time[RUNSTATE_blocked]; rst.u.runstate.time_offline = @@ -1113,7 +1113,7 @@ int main(int argc, char *argv[]) /* Don't change the address, just trigger a write */ struct kvm_xen_vcpu_attr adj = { .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, - .u.runstate.state = (uint64_t)-1 + .u.runstate.state = (u64)-1 }; vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c index f331a4e9bae3..12c63df6bbce 100644 --- a/tools/testing/selftests/kvm/x86/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) bool xss_in_msr_list; struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint64_t xss_val; + u64 xss_val; int i, r; /* Create VM */ -- cgit v1.2.3 From 286e8903aed14cc4f64be8e72d5b28ab2b8982aa Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:50 -0700 Subject: KVM: selftests: Use s64 instead of int64_t Use s64 instead of int64_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/int64_t/s64/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack Link: https://patch.msgid.link/20260420212004.3938325-6-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/sea_to_user.c | 2 +- tools/testing/selftests/kvm/arm64/set_id_regs.c | 2 +- tools/testing/selftests/kvm/guest_print_test.c | 2 +- tools/testing/selftests/kvm/include/test_util.h | 4 ++-- tools/testing/selftests/kvm/lib/test_util.c | 16 ++++++++-------- tools/testing/selftests/kvm/lib/userfaultfd_util.c | 2 +- tools/testing/selftests/kvm/lib/x86/processor.c | 2 +- tools/testing/selftests/kvm/memslot_perf_test.c | 2 +- tools/testing/selftests/kvm/steal_time.c | 4 ++-- tools/testing/selftests/kvm/x86/kvm_clock_test.c | 2 +- tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c | 6 +++--- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c index 61954f2221e4..7285eade4acf 100644 --- a/tools/testing/selftests/kvm/arm64/sea_to_user.c +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -59,7 +59,7 @@ static bool far_invalid; static u64 translate_to_host_paddr(unsigned long vaddr) { u64 pinfo; - int64_t offset = vaddr / getpagesize() * sizeof(pinfo); + s64 offset = vaddr / getpagesize() * sizeof(pinfo); int fd; u64 page_addr; u64 paddr; diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 9b9c04c963a1..4402f317f7d9 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -36,7 +36,7 @@ struct reg_ftr_bits { * For FTR_EXACT, safe_val is used as the exact safe value. * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value. */ - int64_t safe_val; + s64 safe_val; /* Allowed to be changed by the host after run */ bool mutable; diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 894ef7d2481e..b059abcf1a5b 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -25,7 +25,7 @@ static struct guest_vals vals; /* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */ #define TYPE_LIST \ -TYPE(test_type_i64, I64, "%ld", int64_t) \ +TYPE(test_type_i64, I64, "%ld", s64) \ TYPE(test_type_u64, U64u, "%lu", u64) \ TYPE(test_type_x64, U64x, "0x%lx", u64) \ TYPE(test_type_X64, U64X, "0x%lX", u64) \ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 62fe83763021..d7489db738bf 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -101,8 +101,8 @@ do { \ size_t parse_size(const char *size); -int64_t timespec_to_ns(struct timespec ts); -struct timespec timespec_add_ns(struct timespec ts, int64_t ns); +s64 timespec_to_ns(struct timespec ts); +struct timespec timespec_add_ns(struct timespec ts, s64 ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_elapsed(struct timespec start); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index d863705f6795..f5b460c445be 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -83,12 +83,12 @@ size_t parse_size(const char *size) return base << shift; } -int64_t timespec_to_ns(struct timespec ts) +s64 timespec_to_ns(struct timespec ts) { - return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec; + return (s64)ts.tv_nsec + 1000000000LL * (s64)ts.tv_sec; } -struct timespec timespec_add_ns(struct timespec ts, int64_t ns) +struct timespec timespec_add_ns(struct timespec ts, s64 ns) { struct timespec res; @@ -101,15 +101,15 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns) struct timespec timespec_add(struct timespec ts1, struct timespec ts2) { - int64_t ns1 = timespec_to_ns(ts1); - int64_t ns2 = timespec_to_ns(ts2); + s64 ns1 = timespec_to_ns(ts1); + s64 ns2 = timespec_to_ns(ts2); return timespec_add_ns((struct timespec){0}, ns1 + ns2); } struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) { - int64_t ns1 = timespec_to_ns(ts1); - int64_t ns2 = timespec_to_ns(ts2); + s64 ns1 = timespec_to_ns(ts1); + s64 ns2 = timespec_to_ns(ts2); return timespec_add_ns((struct timespec){0}, ns1 - ns2); } @@ -123,7 +123,7 @@ struct timespec timespec_elapsed(struct timespec start) struct timespec timespec_div(struct timespec ts, int divisor) { - int64_t ns = timespec_to_ns(ts) / divisor; + s64 ns = timespec_to_ns(ts) / divisor; return timespec_add_ns((struct timespec){0}, ns); } diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 2f069ce6a446..ef8d76f71f83 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -27,7 +27,7 @@ static void *uffd_handler_thread_fn(void *arg) { struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; int uffd = reader_args->uffd; - int64_t pages = 0; + s64 pages = 0; struct timespec start; struct timespec ts_diff; struct epoll_event evt; diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 81f5dea51fc3..802543aa588c 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -379,7 +379,7 @@ static u64 *__vm_get_page_table_entry(struct kvm_vm *vm, * Check that the vaddr is a sign-extended va_width value. */ TEST_ASSERT(vaddr == - (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))), + (((s64)vaddr << (64 - va_width) >> (64 - va_width))), "Canonical check failed. The virtual address is invalid."); for (current_level = mmu->pgtable_levels; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index d5161e8aee14..bf62b522d32e 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -1040,7 +1040,7 @@ static bool parse_args(int argc, char *argv[], struct test_result { struct timespec slot_runtime, guest_runtime, iter_runtime; - int64_t slottimens, runtimens; + s64 slottimens, runtimens; u64 nloops; }; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 6379f47af422..d0a41a2bcccb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -123,7 +123,7 @@ struct st_time { u64 st_time; }; -static int64_t smccc(uint32_t func, u64 arg) +static s64 smccc(uint32_t func, u64 arg) { struct arm_smccc_res res; @@ -140,7 +140,7 @@ static void check_status(struct st_time *st) static void guest_code(int cpu) { struct st_time *st; - int64_t status; + s64 status; status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES); GUEST_ASSERT_EQ(status, 0); diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5df0cceec03b..2b8a3feee1f8 100644 --- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -18,7 +18,7 @@ struct test_case { u64 kvmclock_base; - int64_t realtime_offset; + s64 realtime_offset; }; static struct test_case test_cases[] = { diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index db0d44b8fbd6..a18b0cfd42e2 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -53,9 +53,9 @@ enum { /* The virtual machine object. */ static struct kvm_vm *vm; -static void check_ia32_tsc_adjust(int64_t max) +static void check_ia32_tsc_adjust(s64 max) { - int64_t adjust; + s64 adjust; adjust = rdmsr(MSR_IA32_TSC_ADJUST); GUEST_SYNC(adjust); @@ -117,7 +117,7 @@ static void l1_guest_code(void *data) GUEST_DONE(); } -static void report(int64_t val) +static void report(s64 val) { pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); -- cgit v1.2.3 From 0c3a8774692aaf211b6916aaa9ecc5ca1a72c451 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:51 -0700 Subject: KVM: selftests: Use u32 instead of uint32_t Use u32 instead of uint32_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/uint32_t/u32/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack Link: https://patch.msgid.link/20260420212004.3938325-7-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arch_timer.c | 6 +- tools/testing/selftests/kvm/arm64/arch_timer.c | 6 +- .../selftests/kvm/arm64/arch_timer_edge_cases.c | 26 ++--- .../testing/selftests/kvm/arm64/debug-exceptions.c | 12 +- tools/testing/selftests/kvm/arm64/hypercalls.c | 6 +- .../testing/selftests/kvm/arm64/page_fault_test.c | 6 +- tools/testing/selftests/kvm/arm64/psci_test.c | 2 +- tools/testing/selftests/kvm/arm64/set_id_regs.c | 8 +- tools/testing/selftests/kvm/arm64/smccc_filter.c | 10 +- tools/testing/selftests/kvm/arm64/vgic_init.c | 30 ++--- tools/testing/selftests/kvm/arm64/vgic_irq.c | 91 +++++++-------- tools/testing/selftests/kvm/arm64/vgic_v5.c | 8 +- tools/testing/selftests/kvm/coalesced_io_test.c | 22 ++-- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +- tools/testing/selftests/kvm/dirty_log_test.c | 36 +++--- tools/testing/selftests/kvm/guest_print_test.c | 6 +- .../testing/selftests/kvm/hardware_disable_test.c | 6 +- .../selftests/kvm/include/arm64/arch_timer.h | 10 +- tools/testing/selftests/kvm/include/arm64/gic.h | 2 +- .../selftests/kvm/include/arm64/processor.h | 10 +- tools/testing/selftests/kvm/include/arm64/vgic.h | 16 +-- tools/testing/selftests/kvm/include/kvm_util.h | 124 ++++++++++----------- tools/testing/selftests/kvm/include/memstress.h | 10 +- .../selftests/kvm/include/riscv/arch_timer.h | 2 +- tools/testing/selftests/kvm/include/test_util.h | 20 ++-- tools/testing/selftests/kvm/include/timer_test.h | 12 +- tools/testing/selftests/kvm/include/x86/apic.h | 10 +- tools/testing/selftests/kvm/include/x86/evmcs.h | 2 +- .../testing/selftests/kvm/include/x86/processor.h | 100 ++++++++--------- tools/testing/selftests/kvm/include/x86/sev.h | 4 +- tools/testing/selftests/kvm/include/x86/vmx.h | 6 +- tools/testing/selftests/kvm/kvm_page_table_test.c | 2 +- tools/testing/selftests/kvm/lib/arm64/gic.c | 2 +- .../testing/selftests/kvm/lib/arm64/gic_private.h | 22 ++-- tools/testing/selftests/kvm/lib/arm64/gic_v3.c | 80 ++++++------- tools/testing/selftests/kvm/lib/arm64/processor.c | 22 ++-- tools/testing/selftests/kvm/lib/arm64/vgic.c | 22 ++-- tools/testing/selftests/kvm/lib/guest_modes.c | 2 +- tools/testing/selftests/kvm/lib/guest_sprintf.c | 6 +- tools/testing/selftests/kvm/lib/kvm_util.c | 77 +++++++------ .../selftests/kvm/lib/loongarch/processor.c | 6 +- tools/testing/selftests/kvm/lib/memstress.c | 4 +- tools/testing/selftests/kvm/lib/riscv/processor.c | 6 +- tools/testing/selftests/kvm/lib/s390/processor.c | 2 +- tools/testing/selftests/kvm/lib/sparsebit.c | 4 +- tools/testing/selftests/kvm/lib/test_util.c | 14 +-- tools/testing/selftests/kvm/lib/x86/processor.c | 24 ++-- tools/testing/selftests/kvm/lib/x86/sev.c | 4 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 10 +- tools/testing/selftests/kvm/loongarch/arch_timer.c | 20 ++-- tools/testing/selftests/kvm/loongarch/pmu_test.c | 6 +- tools/testing/selftests/kvm/memslot_perf_test.c | 50 ++++----- .../testing/selftests/kvm/pre_fault_memory_test.c | 2 +- tools/testing/selftests/kvm/riscv/arch_timer.c | 6 +- tools/testing/selftests/kvm/s390/memop.c | 18 +-- .../testing/selftests/kvm/set_memory_region_test.c | 8 +- tools/testing/selftests/kvm/steal_time.c | 32 +++--- tools/testing/selftests/kvm/x86/amx_test.c | 4 +- tools/testing/selftests/kvm/x86/aperfmperf_test.c | 2 +- .../selftests/kvm/x86/apic_bus_clock_test.c | 12 +- tools/testing/selftests/kvm/x86/debug_regs.c | 2 +- tools/testing/selftests/kvm/x86/fastops_test.c | 4 +- .../testing/selftests/kvm/x86/feature_msrs_test.c | 8 +- tools/testing/selftests/kvm/x86/hyperv_evmcs.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_features.c | 4 +- tools/testing/selftests/kvm/x86/hyperv_svm_test.c | 2 +- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 2 +- .../selftests/kvm/x86/nested_emulation_test.c | 10 +- .../selftests/kvm/x86/nested_exceptions_test.c | 4 +- .../selftests/kvm/x86/nested_tsc_adjust_test.c | 2 +- .../selftests/kvm/x86/nested_tsc_scaling_test.c | 2 +- .../testing/selftests/kvm/x86/pmu_counters_test.c | 36 +++--- .../selftests/kvm/x86/pmu_event_filter_test.c | 20 ++-- .../kvm/x86/private_mem_conversions_test.c | 8 +- .../selftests/kvm/x86/private_mem_kvm_exits_test.c | 8 +- tools/testing/selftests/kvm/x86/set_boot_cpu_id.c | 6 +- tools/testing/selftests/kvm/x86/sev_init2_tests.c | 4 +- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 10 +- .../selftests/kvm/x86/ucna_injection_test.c | 2 +- .../selftests/kvm/x86/userspace_msr_exit_test.c | 28 ++--- .../selftests/kvm/x86/vmx_apic_access_test.c | 2 +- .../selftests/kvm/x86/vmx_apicv_updates_test.c | 2 +- tools/testing/selftests/kvm/x86/vmx_msrs_test.c | 8 +- tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 16 +-- tools/testing/selftests/kvm/x86/xapic_state_test.c | 4 +- tools/testing/selftests/kvm/x86/xapic_tpr_test.c | 6 +- tools/testing/selftests/kvm/x86/xen_shinfo_test.c | 6 +- 87 files changed, 642 insertions(+), 646 deletions(-) diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index cf8fb67104f1..90c475a61b22 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -78,9 +78,9 @@ static void *test_vcpu_run(void *arg) return NULL; } -static uint32_t test_get_pcpu(void) +static u32 test_get_pcpu(void) { - uint32_t pcpu; + u32 pcpu; unsigned int nproc_conf; cpu_set_t online_cpuset; @@ -98,7 +98,7 @@ static uint32_t test_get_pcpu(void) static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; - uint32_t new_pcpu = test_get_pcpu(); + u32 new_pcpu = test_get_pcpu(); pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index 3e5f32bd2352..5fa5c0ec2b3e 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -105,7 +105,7 @@ static void guest_validate_irq(unsigned int intid, static void guest_irq_handler(struct ex_regs *regs) { unsigned int intid = gic_get_and_ack_irq(); - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; guest_validate_irq(intid, shared_data); @@ -116,7 +116,7 @@ static void guest_irq_handler(struct ex_regs *regs) static void guest_run_stage(struct test_vcpu_shared_data *shared_data, enum guest_stage stage) { - uint32_t irq_iter, config_iter; + u32 irq_iter, config_iter; shared_data->guest_stage = stage; shared_data->nr_iter = 0; @@ -140,7 +140,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data, static void guest_code(void) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; local_irq_disable(); diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 7aed5dd2a347..f8b183f13864 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -29,19 +29,19 @@ static const int32_t TVAL_MAX = INT32_MAX; static const int32_t TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ -static const uint32_t TIMEOUT_NO_IRQ_US = 50000; +static const u32 TIMEOUT_NO_IRQ_US = 50000; /* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ static u64 DEF_CNT; /* Number of runs. */ -static const uint32_t NR_TEST_ITERS_DEF = 5; +static const u32 NR_TEST_ITERS_DEF = 5; /* Default wait test time in ms. */ -static const uint32_t WAIT_TEST_MS = 10; +static const u32 WAIT_TEST_MS = 10; /* Default "long" wait test time in ms. */ -static const uint32_t LONG_WAIT_TEST_MS = 100; +static const u32 LONG_WAIT_TEST_MS = 100; /* Shared with IRQ handler. */ struct test_vcpu_shared_data { @@ -115,7 +115,7 @@ enum timer_view { TIMER_TVAL, }; -static void assert_irqs_handled(uint32_t n) +static void assert_irqs_handled(u32 n) { int h = atomic_read(&shared_data.handled); @@ -147,7 +147,7 @@ static void guest_irq_handler(struct ex_regs *regs) unsigned int intid = gic_get_and_ack_irq(); enum arch_timer timer; u64 cnt, cval; - uint32_t ctl; + u32 ctl; bool timer_condition, istatus; if (intid == IAR_SPURIOUS) { @@ -179,7 +179,7 @@ out: } static void set_cval_irq(enum arch_timer timer, u64 cval_cycles, - uint32_t ctl) + u32 ctl) { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); @@ -188,7 +188,7 @@ static void set_cval_irq(enum arch_timer timer, u64 cval_cycles, } static void set_tval_irq(enum arch_timer timer, u64 tval_cycles, - uint32_t ctl) + u32 ctl) { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); @@ -196,7 +196,7 @@ static void set_tval_irq(enum arch_timer timer, u64 tval_cycles, timer_set_ctl(timer, ctl); } -static void set_xval_irq(enum arch_timer timer, u64 xval, uint32_t ctl, +static void set_xval_irq(enum arch_timer timer, u64 xval, u32 ctl, enum timer_view tv) { switch (tv) { @@ -845,11 +845,11 @@ static void guest_code(enum arch_timer timer) static cpu_set_t default_cpuset; -static uint32_t next_pcpu(void) +static u32 next_pcpu(void) { - uint32_t max = get_nprocs(); - uint32_t cur = sched_getcpu(); - uint32_t next = cur; + u32 max = get_nprocs(); + u32 cur = sched_getcpu(); + u32 next = cur; cpu_set_t cpuset = default_cpuset; TEST_ASSERT(max > 1, "Need at least two physical cpus"); diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c19f143bed25..5931915ea00a 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -151,7 +151,7 @@ static void enable_monitor_debug_exceptions(void) static void install_wp(uint8_t wpn, u64 addr) { - uint32_t wcr; + u32 wcr; wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; write_dbgwcr(wpn, wcr); @@ -164,7 +164,7 @@ static void install_wp(uint8_t wpn, u64 addr) static void install_hw_bp(uint8_t bpn, u64 addr) { - uint32_t bcr; + u32 bcr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; write_dbgbcr(bpn, bcr); @@ -177,7 +177,7 @@ static void install_hw_bp(uint8_t bpn, u64 addr) static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, u64 addr, u64 ctx) { - uint32_t wcr; + u32 wcr; u64 ctx_bcr; /* Setup a context-aware breakpoint for Linked Context ID Match */ @@ -188,7 +188,7 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, u64 addr, /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | - DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); + DBGWCR_WT_LINK | ((u32)ctx_bp << DBGWCR_LBN_SHIFT); write_dbgwcr(addr_wp, wcr); write_dbgwvr(addr_wp, addr); isb(); @@ -199,7 +199,7 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, u64 addr, void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, u64 addr, u64 ctx) { - uint32_t addr_bcr, ctx_bcr; + u32 addr_bcr, ctx_bcr; /* Setup a context-aware breakpoint for Linked Context ID Match */ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | @@ -213,7 +213,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, u64 addr, */ addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | DBGBCR_BT_ADDR_LINK_CTX | - ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); + ((u32)ctx_bp << DBGBCR_LBN_SHIFT); write_dbgbcr(addr_bp, addr_bcr); write_dbgbvr(addr_bp, addr); isb(); diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index 4f24105baaf3..5d96cdf382c4 100644 --- a/tools/testing/selftests/kvm/arm64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -59,7 +59,7 @@ enum test_stage { static int stage = TEST_STAGE_REG_IFACE; struct test_hvc_info { - uint32_t func_id; + u32 func_id; u64 arg1; }; @@ -152,8 +152,8 @@ static void guest_code(void) } struct st_time { - uint32_t rev; - uint32_t attr; + u32 rev; + u32 attr; u64 st_time; }; diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index 5d629ea95c4d..cb52ac8aa0a5 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -59,8 +59,8 @@ struct test_desc { void (*iabt_handler)(struct ex_regs *regs); void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); void (*fail_vcpu_run_handler)(int ret); - uint32_t pt_memslot_flags; - uint32_t data_memslot_flags; + u32 pt_memslot_flags; + u32 data_memslot_flags; bool skip; struct event_cnt expected_events; }; @@ -510,7 +510,7 @@ void fail_vcpu_run_mmio_no_syndrome_handler(int ret) events.fail_vcpu_runs += 1; } -typedef uint32_t aarch64_insn_t; +typedef u32 aarch64_insn_t; extern aarch64_insn_t __exec_test[2]; noinline void __return_0x77(void) diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index 0a67dc3136d4..e775faf20868 100644 --- a/tools/testing/selftests/kvm/arm64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -61,7 +61,7 @@ static u64 psci_system_off2(u64 type, u64 cookie) return res.a0; } -static u64 psci_features(uint32_t func_id) +static u64 psci_features(u32 func_id) { struct arm_smccc_res res; diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 4402f317f7d9..8bf9c717b698 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -43,7 +43,7 @@ struct reg_ftr_bits { }; struct test_feature_reg { - uint32_t reg; + u32 reg; const struct reg_ftr_bits *ftr_bits; }; @@ -457,7 +457,7 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; - uint32_t reg_id = test_regs[i].reg; + u32 reg_id = test_regs[i].reg; u64 reg = KVM_ARM64_SYS_REG(reg_id); int idx; @@ -643,7 +643,7 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); } -static u64 reset_mutable_bits(uint32_t id, u64 val) +static u64 reset_mutable_bits(u32 id, u64 val) { struct test_feature_reg *reg = NULL; @@ -771,7 +771,7 @@ static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu) ksft_test_result_pass("%s\n", __func__); } -static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) +static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, u32 encoding) { size_t idx = encoding_to_range_idx(encoding); u64 observed; diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 1763b9d45400..21e41880261b 100644 --- a/tools/testing/selftests/kvm/arm64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -37,7 +37,7 @@ static bool test_runs_at_el2(void) for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ conduit <= SMC_INSN; conduit++) -static void guest_main(uint32_t func_id, enum smccc_conduit conduit) +static void guest_main(u32 func_id, enum smccc_conduit conduit) { struct arm_smccc_res res; @@ -49,7 +49,7 @@ static void guest_main(uint32_t func_id, enum smccc_conduit conduit) GUEST_SYNC(res.a0); } -static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, +static int __set_smccc_filter(struct kvm_vm *vm, u32 start, u32 nr_functions, enum kvm_smccc_filter_action action) { struct kvm_smccc_filter filter = { @@ -62,7 +62,7 @@ static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_fun KVM_ARM_VM_SMCCC_FILTER, &filter); } -static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, +static void set_smccc_filter(struct kvm_vm *vm, u32 start, u32 nr_functions, enum kvm_smccc_filter_action action) { int ret = __set_smccc_filter(vm, start, nr_functions, action); @@ -112,7 +112,7 @@ static void test_filter_reserved_range(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm = setup_vm(&vcpu); - uint32_t smc64_fn; + u32 smc64_fn; int r; r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1, @@ -217,7 +217,7 @@ static void test_filter_denied(void) } } -static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id, +static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, u32 func_id, enum smccc_conduit conduit) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index 0d8ddb371b89..47e34b43afb2 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -27,7 +27,7 @@ struct vm_gic { struct kvm_vm *vm; int gic_fd; - uint32_t gic_dev_type; + u32 gic_dev_type; }; static u64 max_phys_size; @@ -39,17 +39,17 @@ static u64 max_phys_size; static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset, int want, const char *msg) { - uint32_t ignored_val; + u32 ignored_val; int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, REG_OFFSET(vcpu, offset), &ignored_val); TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want); } -static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want, +static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, u32 want, const char *msg) { - uint32_t val; + u32 val; kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, REG_OFFSET(vcpu, offset), &val); @@ -71,8 +71,8 @@ static int run_vcpu(struct kvm_vcpu *vcpu) return __vcpu_run(vcpu) ? -errno : 0; } -static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, - uint32_t nr_vcpus, +static struct vm_gic vm_gic_create_with_vcpus(u32 gic_dev_type, + u32 nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vm_gic v; @@ -84,7 +84,7 @@ static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, return v; } -static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type) +static struct vm_gic vm_gic_create_barebones(u32 gic_dev_type) { struct vm_gic v; @@ -332,7 +332,7 @@ static void subtest_v3_redist_regions(struct vm_gic *v) * VGIC KVM device is created and initialized before the secondary CPUs * get created */ -static void test_vgic_then_vcpus(uint32_t gic_dev_type) +static void test_vgic_then_vcpus(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; @@ -353,7 +353,7 @@ static void test_vgic_then_vcpus(uint32_t gic_dev_type) } /* All the VCPUs are created before the VGIC KVM device gets initialized */ -static void test_vcpus_then_vgic(uint32_t gic_dev_type) +static void test_vcpus_then_vgic(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; @@ -518,7 +518,7 @@ static void test_v3_typer_accesses(void) } static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, - uint32_t vcpuids[]) + u32 vcpuids[]) { struct vm_gic v; int i; @@ -544,7 +544,7 @@ static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, */ static void test_v3_last_bit_redist_regions(void) { - uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + u32 vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; u64 addr; @@ -578,7 +578,7 @@ static void test_v3_last_bit_redist_regions(void) /* Test last bit with legacy region */ static void test_v3_last_bit_single_rdist(void) { - uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + u32 vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; u64 addr; @@ -717,11 +717,11 @@ static void test_v3_nassgicap(void) /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ -int test_kvm_device(uint32_t gic_dev_type) +int test_kvm_device(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; - uint32_t other; + u32 other; int ret; v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); @@ -968,7 +968,7 @@ static void test_v3_sysregs(void) kvm_vm_free(vm); } -void run_tests(uint32_t gic_dev_type) +void run_tests(u32 gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); test_vgic_then_vcpus(gic_dev_type); diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index eae4aeb44926..8a9dd79123d4 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -24,12 +24,12 @@ * function. */ struct test_args { - uint32_t nr_irqs; /* number of KVM supported IRQs. */ + u32 nr_irqs; /* number of KVM supported IRQs. */ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */ bool level_sensitive; /* 1 is level, 0 is edge */ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ - uint32_t shared_data; + u32 shared_data; }; /* @@ -64,15 +64,15 @@ typedef enum { struct kvm_inject_args { kvm_inject_cmd cmd; - uint32_t first_intid; - uint32_t num; + u32 first_intid; + u32 num; int level; bool expect_failure; }; /* Used on the guest side to perform the hypercall. */ -static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, - uint32_t num, int level, bool expect_failure); +static void kvm_inject_call(kvm_inject_cmd cmd, u32 first_intid, + u32 num, int level, bool expect_failure); /* Used on the host side to get the hypercall info. */ static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, @@ -134,7 +134,7 @@ static struct kvm_inject_desc set_active_fns[] = { /* Shared between the guest main thread and the IRQ handlers. */ volatile u64 irq_handled; -volatile uint32_t irqnr_received[MAX_SPI + 1]; +volatile u32 irqnr_received[MAX_SPI + 1]; static void reset_stats(void) { @@ -159,11 +159,11 @@ static void gic_write_ap1r0(u64 val) isb(); } -static void guest_set_irq_line(uint32_t intid, uint32_t level); +static void guest_set_irq_line(u32 intid, u32 level); static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) { - uint32_t intid = gic_get_and_ack_irq(); + u32 intid = gic_get_and_ack_irq(); if (intid == IAR_SPURIOUS) return; @@ -189,8 +189,8 @@ static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) GUEST_ASSERT(!gic_irq_get_pending(intid)); } -static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, - uint32_t num, int level, bool expect_failure) +static void kvm_inject_call(kvm_inject_cmd cmd, u32 first_intid, + u32 num, int level, bool expect_failure) { struct kvm_inject_args args = { .cmd = cmd, @@ -204,7 +204,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, #define GUEST_ASSERT_IAR_EMPTY() \ do { \ - uint32_t _intid; \ + u32 _intid; \ _intid = gic_get_and_ack_irq(); \ GUEST_ASSERT(_intid == IAR_SPURIOUS); \ } while (0) @@ -237,13 +237,13 @@ static void reset_priorities(struct test_args *args) gic_set_priority(i, IRQ_DEFAULT_PRIO_REG); } -static void guest_set_irq_line(uint32_t intid, uint32_t level) +static void guest_set_irq_line(u32 intid, u32 level) { kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false); } static void test_inject_fail(struct test_args *args, - uint32_t intid, kvm_inject_cmd cmd) + u32 intid, kvm_inject_cmd cmd) { reset_stats(); @@ -255,10 +255,10 @@ static void test_inject_fail(struct test_args *args, } static void guest_inject(struct test_args *args, - uint32_t first_intid, uint32_t num, - kvm_inject_cmd cmd) + u32 first_intid, u32 num, + kvm_inject_cmd cmd) { - uint32_t i; + u32 i; reset_stats(); @@ -292,10 +292,10 @@ static void guest_inject(struct test_args *args, * deactivated yet. */ static void guest_restore_active(struct test_args *args, - uint32_t first_intid, uint32_t num, - kvm_inject_cmd cmd) + u32 first_intid, u32 num, + kvm_inject_cmd cmd) { - uint32_t prio, intid, ap1r; + u32 prio, intid, ap1r; int i; /* @@ -342,9 +342,9 @@ static void guest_restore_active(struct test_args *args, * This function should only be used in test_inject_preemption (with IRQs * masked). */ -static uint32_t wait_for_and_activate_irq(void) +static u32 wait_for_and_activate_irq(void) { - uint32_t intid; + u32 intid; do { asm volatile("wfi" : : : "memory"); @@ -360,11 +360,11 @@ static uint32_t wait_for_and_activate_irq(void) * interrupts for the whole test. */ static void test_inject_preemption(struct test_args *args, - uint32_t first_intid, int num, + u32 first_intid, int num, const unsigned long *exclude, kvm_inject_cmd cmd) { - uint32_t intid, prio, step = KVM_PRIO_STEPS; + u32 intid, prio, step = KVM_PRIO_STEPS; int i; /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs @@ -379,7 +379,7 @@ static void test_inject_preemption(struct test_args *args, local_irq_disable(); for (i = 0; i < num; i++) { - uint32_t tmp; + u32 tmp; intid = i + first_intid; if (exclude && test_bit(i, exclude)) @@ -431,7 +431,7 @@ static void test_inject_preemption(struct test_args *args, static void test_injection(struct test_args *args, struct kvm_inject_desc *f) { - uint32_t nr_irqs = args->nr_irqs; + u32 nr_irqs = args->nr_irqs; if (f->sgi) { guest_inject(args, MIN_SGI, 1, f->cmd); @@ -451,7 +451,7 @@ static void test_injection(struct test_args *args, struct kvm_inject_desc *f) static void test_injection_failure(struct test_args *args, struct kvm_inject_desc *f) { - uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; + u32 bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; int i; for (i = 0; i < ARRAY_SIZE(bad_intid); i++) @@ -490,7 +490,7 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc * static void guest_code(struct test_args *args) { - uint32_t i, nr_irqs = args->nr_irqs; + u32 i, nr_irqs = args->nr_irqs; bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; @@ -529,8 +529,8 @@ static void guest_code(struct test_args *args) GUEST_DONE(); } -static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, - struct test_args *test_args, bool expect_failure) +static void kvm_irq_line_check(struct kvm_vm *vm, u32 intid, int level, + struct test_args *test_args, bool expect_failure) { int ret; @@ -548,8 +548,8 @@ static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, } } -void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, - bool expect_failure) +void kvm_irq_set_level_info_check(int gic_fd, u32 intid, int level, + bool expect_failure) { if (!expect_failure) { kvm_irq_set_level_info(gic_fd, intid, level); @@ -573,8 +573,9 @@ void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, } static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, - uint32_t intid, uint32_t num, uint32_t kvm_max_routes, - bool expect_failure) + u32 intid, u32 num, + u32 kvm_max_routes, + bool expect_failure) { struct kvm_irq_routing *routing; int ret; @@ -602,7 +603,7 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, } } -static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, +static void kvm_irq_write_ispendr_check(int gic_fd, u32 intid, struct kvm_vcpu *vcpu, bool expect_failure) { @@ -618,8 +619,8 @@ static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, } static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, - uint32_t intid, uint32_t num, uint32_t kvm_max_routes, - bool expect_failure) + u32 intid, u32 num, u32 kvm_max_routes, + bool expect_failure) { int fd[MAX_SPI]; u64 val; @@ -673,13 +674,13 @@ static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, struct test_args *test_args) { kvm_inject_cmd cmd = inject_args->cmd; - uint32_t intid = inject_args->first_intid; - uint32_t num = inject_args->num; + u32 intid = inject_args->first_intid; + u32 num = inject_args->num; int level = inject_args->level; bool expect_failure = inject_args->expect_failure; struct kvm_vm *vm = vcpu->vm; u64 tmp; - uint32_t i; + u32 i; /* handles the valid case: intid=0xffffffff num=1 */ assert(intid < UINT_MAX - num || num == 1); @@ -745,7 +746,7 @@ static void print_args(struct test_args *args) args->eoi_split); } -static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) +static void test_vgic(u32 nr_irqs, bool level_sensitive, bool eoi_split) { struct ucall uc; int gic_fd; @@ -810,7 +811,7 @@ static void guest_code_asym_dir(struct test_args *args, int cpuid) gic_set_priority_mask(CPU_PRIO_MASK); if (cpuid == 0) { - uint32_t intid; + u32 intid; local_irq_disable(); @@ -848,7 +849,7 @@ static void guest_code_asym_dir(struct test_args *args, int cpuid) static void guest_code_group_en(struct test_args *args, int cpuid) { - uint32_t intid; + u32 intid; gic_init(GIC_V3, 2); @@ -896,7 +897,7 @@ static void guest_code_group_en(struct test_args *args, int cpuid) static void guest_code_timer_spi(struct test_args *args, int cpuid) { - uint32_t intid; + u32 intid; u64 val; gic_init(GIC_V3, 2); @@ -1033,7 +1034,7 @@ static void help(const char *name) int main(int argc, char **argv) { - uint32_t nr_irqs = 64; + u32 nr_irqs = 64; bool default_args = true; bool level_sensitive = false; int opt; diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c index 80be71704450..d785b660d847 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_v5.c +++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c @@ -17,7 +17,7 @@ struct vm_gic { struct kvm_vm *vm; int gic_fd; - uint32_t gic_dev_type; + u32 gic_dev_type; }; static u64 max_phys_size; @@ -96,7 +96,7 @@ static void vm_gic_destroy(struct vm_gic *v) kvm_vm_free(v->vm); } -static void test_vgic_v5_ppis(uint32_t gic_dev_type) +static void test_vgic_v5_ppis(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct ucall uc; @@ -173,7 +173,7 @@ done: /* * Returns 0 if it's possible to create GIC device of a given type (V5). */ -int test_kvm_device(uint32_t gic_dev_type) +int test_kvm_device(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; @@ -199,7 +199,7 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } -void run_tests(uint32_t gic_dev_type) +void run_tests(u32 gic_dev_type) { pr_info("Test VGICv5 PPIs\n"); test_vgic_v5_ppis(gic_dev_type); diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c index ed6a66020b1e..f5ab412d2042 100644 --- a/tools/testing/selftests/kvm/coalesced_io_test.c +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -14,7 +14,7 @@ struct kvm_coalesced_io { struct kvm_coalesced_mmio_ring *ring; - uint32_t ring_size; + u32 ring_size; u64 mmio_gpa; u64 *mmio; @@ -70,13 +70,13 @@ static void guest_code(struct kvm_coalesced_io *io) static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, struct kvm_coalesced_io *io, - uint32_t ring_start, - uint32_t expected_exit) + u32 ring_start, + u32 expected_exit) { const bool want_pio = expected_exit == KVM_EXIT_IO; struct kvm_coalesced_mmio_ring *ring = io->ring; struct kvm_run *run = vcpu->run; - uint32_t pio_value; + u32 pio_value; WRITE_ONCE(ring->first, ring_start); WRITE_ONCE(ring->last, ring_start); @@ -88,7 +88,7 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, * data_offset is garbage, e.g. an MMIO gpa. */ if (run->exit_reason == KVM_EXIT_IO) - pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + pio_value = *(u32 *)((void *)run + run->io.data_offset); else pio_value = 0; @@ -111,8 +111,8 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, struct kvm_coalesced_io *io, - uint32_t ring_start, - uint32_t expected_exit) + u32 ring_start, + u32 expected_exit) { struct kvm_coalesced_mmio_ring *ring = io->ring; int i; @@ -124,18 +124,18 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, ring->first, ring->last, io->ring_size, ring_start); for (i = 0; i < io->ring_size - 1; i++) { - uint32_t idx = (ring->first + i) % io->ring_size; + u32 idx = (ring->first + i) % io->ring_size; struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; #ifdef __x86_64__ if (i & 1) TEST_ASSERT(entry->phys_addr == io->pio_port && entry->len == 4 && entry->pio && - *(uint32_t *)entry->data == io->pio_port + i, + *(u32 *)entry->data == io->pio_port + i, "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", io->pio_port, io->pio_port + i, i, entry->len, entry->pio ? "PIO" : "MMIO", - entry->phys_addr, *(uint32_t *)entry->data); + entry->phys_addr, *(u32 *)entry->data); else #endif TEST_ASSERT(entry->phys_addr == io->mmio_gpa && @@ -148,7 +148,7 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, } static void test_coalesced_io(struct kvm_vcpu *vcpu, - struct kvm_coalesced_io *io, uint32_t ring_start) + struct kvm_coalesced_io *io, u32 ring_start) { struct kvm_coalesced_mmio_ring *ring = io->ring; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 7d170694e9c1..ef779fa91827 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -97,7 +97,7 @@ struct test_params { bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; - uint32_t write_percent; + u32 write_percent; bool random_access; }; diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index fae924ddb64e..12446a4b6e8d 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -236,7 +236,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; -static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; +static u32 test_dirty_ring_count = TEST_DIRTY_RING_COUNT; static bool clear_log_supported(void) { @@ -255,15 +255,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *unused) + void *bitmap, u32 num_pages, + u32 *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *unused) + void *bitmap, u32 num_pages, + u32 *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -298,7 +298,7 @@ static bool dirty_ring_supported(void) static void dirty_ring_create_vm_done(struct kvm_vm *vm) { u64 pages; - uint32_t limit; + u32 limit; /* * We rely on vcpu exit due to full dirty ring state. Adjust @@ -333,12 +333,12 @@ static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET); } -static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, - int slot, void *bitmap, - uint32_t num_pages, uint32_t *fetch_index) +static u32 dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, + int slot, void *bitmap, + u32 num_pages, u32 *fetch_index) { struct kvm_dirty_gfn *cur; - uint32_t count = 0; + u32 count = 0; while (true) { cur = &dirty_gfns[*fetch_index % test_dirty_ring_count]; @@ -359,10 +359,10 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, } static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *ring_buf_idx) + void *bitmap, u32 num_pages, + u32 *ring_buf_idx) { - uint32_t count, cleared; + u32 count, cleared; /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), @@ -404,8 +404,8 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *ring_buf_idx); + void *bitmap, u32 num_pages, + u32 *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu); } log_modes[LOG_MODE_NUM] = { @@ -459,8 +459,8 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *ring_buf_idx) + void *bitmap, u32 num_pages, + u32 *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; @@ -601,7 +601,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap[2]; - uint32_t ring_buf_idx = 0; + u32 ring_buf_idx = 0; int sem_val; if (!log_mode_supported()) { diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index b059abcf1a5b..79d3fc326e91 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -29,9 +29,9 @@ TYPE(test_type_i64, I64, "%ld", s64) \ TYPE(test_type_u64, U64u, "%lu", u64) \ TYPE(test_type_x64, U64x, "0x%lx", u64) \ TYPE(test_type_X64, U64X, "0x%lX", u64) \ -TYPE(test_type_u32, U32u, "%u", uint32_t) \ -TYPE(test_type_x32, U32x, "0x%x", uint32_t) \ -TYPE(test_type_X32, U32X, "0x%X", uint32_t) \ +TYPE(test_type_u32, U32u, "%u", u32) \ +TYPE(test_type_x32, U32x, "0x%x", u32) \ +TYPE(test_type_X32, U32X, "0x%X", u32) \ TYPE(test_type_int, INT, "%d", int) \ TYPE(test_type_char, CHAR, "%c", char) \ TYPE(test_type_str, STR, "'%s'", const char *) \ diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index 94bd6ed24cf3..3147f5c97e94 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -80,7 +80,7 @@ static inline void check_join(pthread_t thread, void **retval) TEST_ASSERT(r == 0, "%s: failed to join thread", __func__); } -static void run_test(uint32_t run) +static void run_test(u32 run) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -88,7 +88,7 @@ static void run_test(uint32_t run) pthread_t threads[VCPU_NUM]; pthread_t throw_away; void *b; - uint32_t i, j; + u32 i, j; CPU_ZERO(&cpu_set); for (i = 0; i < VCPU_NUM; i++) @@ -149,7 +149,7 @@ void wait_for_child_setup(pid_t pid) int main(int argc, char **argv) { - uint32_t i; + u32 i; int s, r; pid_t pid; diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index 290b65e58d3c..4fe0e0d07584 100644 --- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -26,7 +26,7 @@ enum arch_timer { #define cycles_to_usec(cycles) \ ((u64)(cycles) * 1000000 / timer_get_cntfrq()) -static inline uint32_t timer_get_cntfrq(void) +static inline u32 timer_get_cntfrq(void) { return read_sysreg(cntfrq_el0); } @@ -111,7 +111,7 @@ static inline int32_t timer_get_tval(enum arch_timer timer) return 0; } -static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) +static inline void timer_set_ctl(enum arch_timer timer, u32 ctl) { switch (timer) { case VIRTUAL: @@ -127,7 +127,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) isb(); } -static inline uint32_t timer_get_ctl(enum arch_timer timer) +static inline u32 timer_get_ctl(enum arch_timer timer) { switch (timer) { case VIRTUAL: @@ -142,7 +142,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer) return 0; } -static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec) +static inline void timer_set_next_cval_ms(enum arch_timer timer, u32 msec) { u64 now_ct = timer_get_cntct(timer); u64 next_ct = now_ct + msec_to_cycles(msec); @@ -150,7 +150,7 @@ static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec) timer_set_cval(timer, next_ct); } -static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) +static inline void timer_set_next_tval_ms(enum arch_timer timer, u32 msec) { timer_set_tval(timer, msec_to_cycles(msec)); } diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index c36a4eafeb5a..615745093c98 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -49,7 +49,7 @@ void gic_set_dir(unsigned int intid); */ void gic_set_eoi_split(bool split); void gic_set_priority_mask(u64 mask); -void gic_set_priority(uint32_t intid, uint32_t prio); +void gic_set_priority(u32 intid, u32 prio); void gic_irq_set_active(unsigned int intid); void gic_irq_clear_active(unsigned int intid); bool gic_irq_get_active(unsigned int intid); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 618d112cb08a..b8a902ba8573 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -128,7 +128,7 @@ #define PTE_ADDR_51_50_LPA2_SHIFT 8 void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init, void *guest_code); struct ex_regs { @@ -167,8 +167,8 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k); +void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k, + u32 *ipa16k, u32 *ipa64k); void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); @@ -287,7 +287,7 @@ struct arm_smccc_res { * @res: pointer to write the return values from registers x0-x3 * */ -void smccc_hvc(uint32_t function_id, u64 arg0, u64 arg1, +void smccc_hvc(u32 function_id, u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, struct arm_smccc_res *res); @@ -298,7 +298,7 @@ void smccc_hvc(uint32_t function_id, u64 arg0, u64 arg1, * @res: pointer to write the return values from registers x0-x3 * */ -void smccc_smc(uint32_t function_id, u64 arg0, u64 arg1, +void smccc_smc(u32 function_id, u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, struct arm_smccc_res *res); diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index 2fde1fd8f96a..1f8b04373987 100644 --- a/tools/testing/selftests/kvm/include/arm64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -17,21 +17,21 @@ index) bool kvm_supports_vgic_v3(void); -int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs); void __vgic_v3_init(int fd); -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs); #define VGIC_MAX_RESERVED 1023 -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); +void kvm_irq_set_level_info(int gic_fd, u32 intid, int level); +int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level); -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); +void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level); +int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level); /* The vcpu arg only applies to private interrupts. */ -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); +void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu); +void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 40bac473b460..bdb91f627433 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -58,7 +58,7 @@ struct kvm_binary_stats { struct kvm_vcpu { struct list_head list; - uint32_t id; + u32 id; int fd; struct kvm_vm *vm; struct kvm_run *run; @@ -70,8 +70,8 @@ struct kvm_vcpu { #endif struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; - uint32_t fetch_index; - uint32_t dirty_gfns_count; + u32 fetch_index; + u32 dirty_gfns_count; }; struct userspace_mem_regions { @@ -113,7 +113,7 @@ struct kvm_vm { bool has_irqchip; gpa_t ucall_mmio_addr; gva_t handlers; - uint32_t dirty_ring_size; + u32 dirty_ring_size; u64 gpa_tag_mask; /* @@ -132,7 +132,7 @@ struct kvm_vm { * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] * memslot. */ - uint32_t memslots[NR_MEM_REGIONS]; + u32 memslots[NR_MEM_REGIONS]; }; struct vcpu_reg_sublist { @@ -164,7 +164,7 @@ struct vcpu_reg_list { else struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot); +memslot2region(struct kvm_vm *vm, u32 memslot); static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, enum kvm_mem_region_type type) @@ -213,7 +213,7 @@ enum vm_guest_mode { }; struct vm_shape { - uint32_t type; + u32 type; uint8_t mode; uint8_t pad0; uint16_t pad1; @@ -404,14 +404,14 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap) return ret; } -static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, u64 arg0) +static inline int __vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } -static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, u64 arg0) +static inline void vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; @@ -466,8 +466,8 @@ static inline void vm_guest_mem_allocate(struct kvm_vm *vm, u64 gpa, vm_guest_mem_fallocate(vm, gpa, size, false); } -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); -const char *vm_guest_mode_string(uint32_t i); +void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size); +const char *vm_guest_mode_string(u32 i); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp); @@ -485,7 +485,7 @@ static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) } static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - u64 first_page, uint32_t num_pages) + u64 first_page, u32 num_pages) { struct kvm_clear_dirty_log args = { .dirty_bitmap = log, @@ -497,7 +497,7 @@ static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); } -static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +static inline u32 kvm_vm_reset_dirty_ring(struct kvm_vm *vm) { return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); } @@ -536,8 +536,8 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) return fd; } -static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, - uint32_t flags) +static inline int __kvm_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd, + u32 flags) { struct kvm_irqfd irqfd = { .fd = eventfd, @@ -549,20 +549,19 @@ static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, return __vm_ioctl(vm, KVM_IRQFD, &irqfd); } -static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, - uint32_t flags) +static inline void kvm_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd, u32 flags) { int ret = __kvm_irqfd(vm, gsi, eventfd, flags); TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); } -static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +static inline void kvm_assign_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd) { kvm_irqfd(vm, gsi, eventfd, 0); } -static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd) { kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); } @@ -685,23 +684,22 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, u64 size, return fd; } -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva); -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva); -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva, - uint32_t guest_memfd, u64 guest_memfd_offset); -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + u32 guest_memfd, u64 guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva, - uint32_t guest_memfd, u64 guest_memfd_offset); + u32 guest_memfd, u64 guest_memfd_offset); void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, uint32_t slot, u64 npages, - uint32_t flags); + u64 gpa, u32 slot, u64 npages, u32 flags); void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, uint32_t slot, u64 npages, uint32_t flags, + u64 gpa, u32 slot, u64 npages, u32 flags, int guest_memfd_fd, u64 guest_memfd_offset); #ifndef vm_arch_has_protected_memory @@ -711,11 +709,11 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) } #endif -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, u64 new_gpa); -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags); +void vm_mem_region_reload(struct kvm_vm *vm, u32 slot); +void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, u32 slot); +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); void vm_populate_vaddr_bitmap(struct kvm_vm *vm); gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); gva_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); @@ -754,7 +752,7 @@ static inline int __vcpu_run(struct kvm_vcpu *vcpu) void vcpu_run_complete_io(struct kvm_vcpu *vcpu); struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); -static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, +static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, u32 cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; @@ -882,18 +880,18 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) return fd; } -int __kvm_has_device_attr(int dev_fd, uint32_t group, u64 attr); +int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr); -static inline void kvm_has_device_attr(int dev_fd, uint32_t group, u64 attr) +static inline void kvm_has_device_attr(int dev_fd, u32 group, u64 attr) { int ret = __kvm_has_device_attr(dev_fd, group, attr); TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); } -int __kvm_device_attr_get(int dev_fd, uint32_t group, u64 attr, void *val); +int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val); -static inline void kvm_device_attr_get(int dev_fd, uint32_t group, +static inline void kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val) { int ret = __kvm_device_attr_get(dev_fd, group, attr, val); @@ -901,9 +899,9 @@ static inline void kvm_device_attr_get(int dev_fd, uint32_t group, TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, u64 attr, void *val); +int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val); -static inline void kvm_device_attr_set(int dev_fd, uint32_t group, +static inline void kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val) { int ret = __kvm_device_attr_set(dev_fd, group, attr, val); @@ -911,37 +909,37 @@ static inline void kvm_device_attr_set(int dev_fd, uint32_t group, TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); } -static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, +static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, u32 group, u64 attr) { return __kvm_has_device_attr(vcpu->fd, group, attr); } -static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, +static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, u32 group, u64 attr) { kvm_has_device_attr(vcpu->fd, group, attr); } -static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, +static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, u32 group, u64 attr, void *val) { return __kvm_device_attr_get(vcpu->fd, group, attr, val); } -static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, +static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, u32 group, u64 attr, void *val) { kvm_device_attr_get(vcpu->fd, group, attr, val); } -static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, +static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, u32 group, u64 attr, void *val) { return __kvm_device_attr_set(vcpu->fd, group, attr, val); } -static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, +static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, u32 group, u64 attr, void *val) { kvm_device_attr_set(vcpu->fd, group, attr, val); @@ -979,27 +977,27 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); */ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); +void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level); +int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level); #define KVM_MAX_IRQ_ROUTES 4096 struct kvm_irq_routing *kvm_gsi_routing_create(void); void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin); + u32 gsi, u32 pin); int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); const char *exit_reason_str(unsigned int exit_reason); -gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, uint32_t memslot); +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, u32 memslot); gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - gpa_t paddr_min, uint32_t memslot, + gpa_t paddr_min, u32 memslot, bool protected); gpa_t vm_alloc_page_table(struct kvm_vm *vm); static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - gpa_t paddr_min, uint32_t memslot) + gpa_t paddr_min, u32 memslot) { /* * By default, allocate memory as protected for VMs that support @@ -1017,7 +1015,7 @@ static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ struct kvm_vm *____vm_create(struct vm_shape shape); -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, +struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus, u64 nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) @@ -1035,16 +1033,16 @@ static inline struct kvm_vm *vm_create_barebones_type(unsigned long type) return ____vm_create(shape); } -static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) +static inline struct kvm_vm *vm_create(u32 nr_runnable_vcpus) { return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); } -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus, u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]); -static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, +static inline struct kvm_vm *vm_create_with_vcpus(u32 nr_vcpus, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -1085,7 +1083,7 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); -void kvm_set_files_rlimit(uint32_t nr_vcpus); +void kvm_set_files_rlimit(u32 nr_vcpus); int __pin_task_to_cpu(pthread_t task, int cpu); @@ -1116,7 +1114,7 @@ static inline int pin_self_to_any_cpu(void) } void kvm_print_vcpu_pinning_help(void); -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], +void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[], int nr_vcpus); unsigned long vm_compute_max_gfn(struct kvm_vm *vm); @@ -1172,10 +1170,10 @@ static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, * vm - Virtual Machine * vcpu_id - The id of the VCPU to add to the VM. */ -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); -static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, void *guest_code) { struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); @@ -1186,10 +1184,10 @@ static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, } /* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id); static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) + u32 vcpu_id) { return vm_arch_vcpu_recreate(vm, vcpu_id); } @@ -1296,7 +1294,7 @@ void kvm_arch_vm_release(struct kvm_vm *vm); bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t paddr); -uint32_t guest_get_vcpuid(void); +u32 guest_get_vcpuid(void); bool kvm_arch_has_default_irqchip(void); diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index 71296909302c..e3e4b4d6a27a 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -35,8 +35,8 @@ struct memstress_args { u64 gpa; u64 size; u64 guest_page_size; - uint32_t random_seed; - uint32_t write_percent; + u32 random_seed; + u32 write_percent; /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; @@ -45,7 +45,7 @@ struct memstress_args { /* True if all vCPUs are pinned to pCPUs */ bool pin_vcpus; /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ - uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; + u32 vcpu_to_pcpu[KVM_MAX_VCPUS]; /* Test is done, stop running vCPUs. */ bool stop_vcpus; @@ -61,12 +61,12 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, bool partition_vcpu_memory_access); void memstress_destroy_vm(struct kvm_vm *vm); -void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); +void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent); void memstress_set_random_access(struct kvm_vm *vm, bool random_access); void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); void memstress_join_vcpu_threads(int vcpus); -void memstress_guest_code(uint32_t vcpu_id); +void memstress_guest_code(u32 vcpu_id); u64 memstress_nested_pages(int nr_vcpus); void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h index 66ed7e36a7cb..28ffc014da2a 100644 --- a/tools/testing/selftests/kvm/include/riscv/arch_timer.h +++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h @@ -47,7 +47,7 @@ static inline void timer_irq_disable(void) csr_clear(CSR_SIE, IE_TIE); } -static inline void timer_set_next_cmp_ms(uint32_t msec) +static inline void timer_set_next_cmp_ms(u32 msec) { u64 now_ct = timer_get_cycles(); u64 next_ct = now_ct + msec_to_cycles(msec); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index d7489db738bf..fb24347c6e6c 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -109,14 +109,14 @@ struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); struct guest_random_state { - uint32_t seed; + u32 seed; }; -extern uint32_t guest_random_seed; +extern u32 guest_random_seed; extern struct guest_random_state guest_rng; -struct guest_random_state new_guest_random_state(uint32_t seed); -uint32_t guest_random_u32(struct guest_random_state *state); +struct guest_random_state new_guest_random_state(u32 seed); +u32 guest_random_u32(struct guest_random_state *state); static inline bool __guest_random_bool(struct guest_random_state *state, uint8_t percent) @@ -160,7 +160,7 @@ enum vm_mem_backing_src_type { struct vm_mem_backing_src_alias { const char *name; - uint32_t flag; + u32 flag; }; #define MIN_RUN_DELAY_NS 200000UL @@ -168,9 +168,9 @@ struct vm_mem_backing_src_alias { bool thp_configured(void); size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); -const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); -size_t get_backing_src_pagesz(uint32_t i); -bool is_backing_src_hugetlb(uint32_t i); +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i); +size_t get_backing_src_pagesz(u32 i); +bool is_backing_src_hugetlb(u32 i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); @@ -217,7 +217,7 @@ static inline void *align_ptr_up(void *x, size_t size) int atoi_paranoid(const char *num_str); -static inline uint32_t atoi_positive(const char *name, const char *num_str) +static inline u32 atoi_positive(const char *name, const char *num_str) { int num = atoi_paranoid(num_str); @@ -225,7 +225,7 @@ static inline uint32_t atoi_positive(const char *name, const char *num_str) return num; } -static inline uint32_t atoi_non_negative(const char *name, const char *num_str) +static inline u32 atoi_non_negative(const char *name, const char *num_str) { int num = atoi_paranoid(num_str); diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h index 9501c6c825e2..b7d5d2c84701 100644 --- a/tools/testing/selftests/kvm/include/timer_test.h +++ b/tools/testing/selftests/kvm/include/timer_test.h @@ -18,11 +18,11 @@ /* Timer test cmdline parameters */ struct test_args { - uint32_t nr_vcpus; - uint32_t nr_iter; - uint32_t timer_period_ms; - uint32_t migration_freq_ms; - uint32_t timer_err_margin_us; + u32 nr_vcpus; + u32 nr_iter; + u32 timer_period_ms; + u32 migration_freq_ms; + u32 timer_err_margin_us; /* Members of struct kvm_arm_counter_offset */ u64 counter_offset; u64 reserved; @@ -30,7 +30,7 @@ struct test_args { /* Shared variables between host and guest */ struct test_vcpu_shared_data { - uint32_t nr_iter; + u32 nr_iter; int guest_stage; u64 xcnt; }; diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 05573dedabd3..74eaa3bd335d 100644 --- a/tools/testing/selftests/kvm/include/x86/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -79,19 +79,19 @@ void apic_disable(void); void xapic_enable(void); void x2apic_enable(void); -static inline uint32_t get_bsp_flag(void) +static inline u32 get_bsp_flag(void) { return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; } -static inline uint32_t xapic_read_reg(unsigned int reg) +static inline u32 xapic_read_reg(unsigned int reg) { - return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2]; + return ((volatile u32 *)APIC_DEFAULT_GPA)[reg >> 2]; } -static inline void xapic_write_reg(unsigned int reg, uint32_t val) +static inline void xapic_write_reg(unsigned int reg, u32 val) { - ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val; + ((volatile u32 *)APIC_DEFAULT_GPA)[reg >> 2] = val; } static inline u64 x2apic_read_reg(unsigned int reg) diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 5ec5cca6f9e4..3b0f96b881f9 100644 --- a/tools/testing/selftests/kvm/include/x86/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -11,7 +11,7 @@ #include "vmx.h" #define u16 uint16_t -#define u32 uint32_t +#define u32 u32 #define u64 u64 #define EVMCS_VERSION 1 diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index dd7a68729ad0..3898665ad2e9 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -403,8 +403,8 @@ struct desc64 { uint16_t base0; unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; - uint32_t base3; - uint32_t zero1; + u32 base3; + u32 zero1; } __attribute__((packed)); struct desc_ptr { @@ -437,7 +437,7 @@ static inline u64 get_desc64_base(const struct desc64 *desc) static inline u64 rdtsc(void) { - uint32_t eax, edx; + u32 eax, edx; u64 tsc_val; /* * The lfence is to wait (on Intel CPUs) until all previous @@ -450,27 +450,27 @@ static inline u64 rdtsc(void) return tsc_val; } -static inline u64 rdtscp(uint32_t *aux) +static inline u64 rdtscp(u32 *aux) { - uint32_t eax, edx; + u32 eax, edx; __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); return ((u64)edx) << 32 | eax; } -static inline u64 rdmsr(uint32_t msr) +static inline u64 rdmsr(u32 msr) { - uint32_t a, d; + u32 a, d; __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); return a | ((u64)d << 32); } -static inline void wrmsr(uint32_t msr, u64 value) +static inline void wrmsr(u32 msr, u64 value) { - uint32_t a = value; - uint32_t d = value >> 32; + u32 a = value; + u32 d = value >> 32; __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); } @@ -651,14 +651,14 @@ static inline struct desc_ptr get_idt(void) return idt; } -static inline void outl(uint16_t port, uint32_t value) +static inline void outl(uint16_t port, u32 value) { __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); } -static inline void __cpuid(uint32_t function, uint32_t index, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static inline void __cpuid(u32 function, u32 index, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) { *eax = function; *ecx = index; @@ -672,35 +672,35 @@ static inline void __cpuid(uint32_t function, uint32_t index, : "memory"); } -static inline void cpuid(uint32_t function, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static inline void cpuid(u32 function, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) { return __cpuid(function, 0, eax, ebx, ecx, edx); } -static inline uint32_t this_cpu_fms(void) +static inline u32 this_cpu_fms(void) { - uint32_t eax, ebx, ecx, edx; + u32 eax, ebx, ecx, edx; cpuid(1, &eax, &ebx, &ecx, &edx); return eax; } -static inline uint32_t this_cpu_family(void) +static inline u32 this_cpu_family(void) { return x86_family(this_cpu_fms()); } -static inline uint32_t this_cpu_model(void) +static inline u32 this_cpu_model(void) { return x86_model(this_cpu_fms()); } static inline bool this_cpu_vendor_string_is(const char *vendor) { - const uint32_t *chunk = (const uint32_t *)vendor; - uint32_t eax, ebx, ecx, edx; + const u32 *chunk = (const u32 *)vendor; + u32 eax, ebx, ecx, edx; cpuid(0, &eax, &ebx, &ecx, &edx); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); @@ -724,10 +724,10 @@ static inline bool this_cpu_is_hygon(void) return this_cpu_vendor_string_is("HygonGenuine"); } -static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) +static inline u32 __this_cpu_has(u32 function, u32 index, + uint8_t reg, uint8_t lo, uint8_t hi) { - uint32_t gprs[4]; + u32 gprs[4]; __cpuid(function, index, &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], @@ -742,7 +742,7 @@ static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) feature.reg, feature.bit, feature.bit); } -static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +static inline u32 this_cpu_property(struct kvm_x86_cpu_property property) { return __this_cpu_has(property.function, property.index, property.reg, property.lo_bit, property.hi_bit); @@ -750,7 +750,7 @@ static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) { - uint32_t max_leaf; + u32 max_leaf; switch (property.function & 0xc0000000) { case 0: @@ -770,7 +770,7 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) { - uint32_t nr_bits; + u32 nr_bits; if (feature.f.reg == KVM_CPUID_EBX) { nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); @@ -898,7 +898,7 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state); const struct kvm_msr_list *kvm_get_msr_index_list(void); const struct kvm_msr_list *kvm_get_feature_msr_index_list(void); -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index); +bool kvm_msr_is_in_save_restore_list(u32 msr_index); u64 kvm_get_feature_msr(u64 msr_index); static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu, @@ -954,20 +954,20 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); + u32 function, u32 index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -static inline uint32_t kvm_cpu_fms(void) +static inline u32 kvm_cpu_fms(void) { return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; } -static inline uint32_t kvm_cpu_family(void) +static inline u32 kvm_cpu_family(void) { return x86_family(kvm_cpu_fms()); } -static inline uint32_t kvm_cpu_model(void) +static inline u32 kvm_cpu_model(void) { return x86_model(kvm_cpu_fms()); } @@ -980,17 +980,17 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); } -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property); +u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); -static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +static inline u32 kvm_cpu_property(struct kvm_x86_cpu_property property) { return kvm_cpuid_property(kvm_get_supported_cpuid(), property); } static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) { - uint32_t max_leaf; + u32 max_leaf; switch (property.function & 0xc0000000) { case 0: @@ -1010,7 +1010,7 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) { - uint32_t nr_bits; + u32 nr_bits; if (feature.f.reg == KVM_CPUID_EBX) { nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); @@ -1062,8 +1062,8 @@ static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) } static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function, - uint32_t index) + u32 function, + u32 index) { TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); @@ -1074,7 +1074,7 @@ static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *v } static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function) + u32 function) { return __vcpu_get_cpuid_entry(vcpu, function, 0); } @@ -1104,10 +1104,10 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, - uint32_t value); + u32 value); void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function); static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_feature feature) @@ -1161,7 +1161,7 @@ do { \ * is changing, etc. This is NOT an exhaustive list! The intent is to filter * out MSRs that are not durable _and_ that a selftest wants to write. */ -static inline bool is_durable_msr(uint32_t msr) +static inline bool is_durable_msr(u32 msr) { return msr != MSR_IA32_TSC; } @@ -1203,7 +1203,7 @@ struct idt_entry { uint16_t dpl : 2; uint16_t p : 1; uint16_t offset1; - uint32_t offset2; uint32_t reserved; + u32 offset2; u32 reserved; }; void vm_install_exception_handler(struct kvm_vm *vm, int vector, @@ -1307,11 +1307,11 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, }) #define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ -static inline uint8_t insn##_safe ##_fep(uint32_t idx, u64 *val) \ +static inline uint8_t insn##_safe ##_fep(u32 idx, u64 *val) \ { \ u64 error_code; \ uint8_t vector; \ - uint32_t a, d; \ + u32 a, d; \ \ asm volatile(KVM_ASM_SAFE##_FEP(#insn) \ : "=a"(a), "=d"(d), \ @@ -1335,12 +1335,12 @@ BUILD_READ_U64_SAFE_HELPERS(rdmsr) BUILD_READ_U64_SAFE_HELPERS(rdpmc) BUILD_READ_U64_SAFE_HELPERS(xgetbv) -static inline uint8_t wrmsr_safe(uint32_t msr, u64 val) +static inline uint8_t wrmsr_safe(u32 msr, u64 val) { return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); } -static inline uint8_t xsetbv_safe(uint32_t index, u64 value) +static inline uint8_t xsetbv_safe(u32 index, u64 value) { u32 eax = value; u32 edx = value >> 32; diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index f65e3361c7c0..4f91c1179416 100644 --- a/tools/testing/selftests/kvm/include/x86/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -46,14 +46,14 @@ static inline bool is_sev_vm(struct kvm_vm *vm) return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM; } -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); +void sev_vm_launch(struct kvm_vm *vm, u32 policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); void snp_vm_launch_start(struct kvm_vm *vm, u64 policy); void snp_vm_launch_update(struct kvm_vm *vm); void snp_vm_launch_finish(struct kvm_vm *vm); -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, struct kvm_vcpu **cpu); void vm_sev_launch(struct kvm_vm *vm, u64 policy, uint8_t *measurement); diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 5e9810fb9d20..6cd6bb7efbc2 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -285,8 +285,8 @@ enum vmcs_field { }; struct vmx_msr_entry { - uint32_t index; - uint32_t reserved; + u32 index; + u32 reserved; u64 value; } __attribute__ ((aligned(16))); @@ -490,7 +490,7 @@ static inline int vmwrite(u64 encoding, u64 value) return ret; } -static inline uint32_t vmcs_revision(void) +static inline u32 vmcs_revision(void) { return rdmsr(MSR_IA32_VMX_BASIC); } diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index fd3e0d03e370..fc5242fb956f 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -63,7 +63,7 @@ struct test_args { static enum test_stage guest_test_stage; /* Host variables */ -static uint32_t nr_vcpus = 1; +static u32 nr_vcpus = 1; static struct test_args test_args; static enum test_stage *current_stage; static bool host_quit; diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 3b0aa4eff557..011dfe1dfcb3 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -50,7 +50,7 @@ static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) void gic_init(enum gic_type type, unsigned int nr_cpus) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); GUEST_ASSERT(type < GIC_TYPE_MAX); GUEST_ASSERT(nr_cpus); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index 4ccc72533b35..6d393f5c5685 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -13,19 +13,19 @@ struct gic_common_ops { void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); u64 (*gic_read_iar)(void); - void (*gic_write_eoir)(uint32_t irq); - void (*gic_write_dir)(uint32_t irq); + void (*gic_write_eoir)(u32 irq); + void (*gic_write_dir)(u32 irq); void (*gic_set_eoi_split)(bool split); void (*gic_set_priority_mask)(u64 mask); - void (*gic_set_priority)(uint32_t intid, uint32_t prio); - void (*gic_irq_set_active)(uint32_t intid); - void (*gic_irq_clear_active)(uint32_t intid); - bool (*gic_irq_get_active)(uint32_t intid); - void (*gic_irq_set_pending)(uint32_t intid); - void (*gic_irq_clear_pending)(uint32_t intid); - bool (*gic_irq_get_pending)(uint32_t intid); - void (*gic_irq_set_config)(uint32_t intid, bool is_edge); - void (*gic_irq_set_group)(uint32_t intid, bool group); + void (*gic_set_priority)(u32 intid, u32 prio); + void (*gic_irq_set_active)(u32 intid); + void (*gic_irq_clear_active)(u32 intid); + bool (*gic_irq_get_active)(u32 intid); + void (*gic_irq_set_pending)(u32 intid); + void (*gic_irq_clear_pending)(u32 intid); + bool (*gic_irq_get_pending)(u32 intid); + void (*gic_irq_set_config)(u32 intid, bool is_edge); + void (*gic_irq_set_group)(u32 intid, bool group); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index cdd29245c84e..a99a53accfe9 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -50,13 +50,13 @@ static void gicv3_gicd_wait_for_rwp(void) } } -static inline volatile void *gicr_base_cpu(uint32_t cpu) +static inline volatile void *gicr_base_cpu(u32 cpu) { /* Align all the redistributors sequentially */ return GICR_BASE_GVA + cpu * SZ_64K * 2; } -static void gicv3_gicr_wait_for_rwp(uint32_t cpu) +static void gicv3_gicr_wait_for_rwp(u32 cpu) { unsigned int count = 100000; /* 1s */ @@ -66,7 +66,7 @@ static void gicv3_gicr_wait_for_rwp(uint32_t cpu) } } -static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) +static void gicv3_wait_for_rwp(u32 cpu_or_dist) { if (cpu_or_dist & DIST_BIT) gicv3_gicd_wait_for_rwp(); @@ -99,13 +99,13 @@ static u64 gicv3_read_iar(void) return irqstat; } -static void gicv3_write_eoir(uint32_t irq) +static void gicv3_write_eoir(u32 irq) { write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); isb(); } -static void gicv3_write_dir(uint32_t irq) +static void gicv3_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); @@ -118,7 +118,7 @@ static void gicv3_set_priority_mask(u64 mask) static void gicv3_set_eoi_split(bool split) { - uint32_t val; + u32 val; /* * All other fields are read-only, so no need to read CTLR first. In @@ -129,29 +129,29 @@ static void gicv3_set_eoi_split(bool split) isb(); } -uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, u64 offset) +u32 gicv3_reg_readl(u32 cpu_or_dist, u64 offset) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } -void gicv3_reg_writel(uint32_t cpu_or_dist, u64 offset, uint32_t reg_val) +void gicv3_reg_writel(u32 cpu_or_dist, u64 offset, u32 reg_val) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } -uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, u64 offset, uint32_t mask) +u32 gicv3_getl_fields(u32 cpu_or_dist, u64 offset, u32 mask) { return gicv3_reg_readl(cpu_or_dist, offset) & mask; } -void gicv3_setl_fields(uint32_t cpu_or_dist, u64 offset, - uint32_t mask, uint32_t reg_val) +void gicv3_setl_fields(u32 cpu_or_dist, u64 offset, + u32 mask, u32 reg_val) { - uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; + u32 tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; tmp |= (reg_val & mask); gicv3_reg_writel(cpu_or_dist, offset, tmp); @@ -165,14 +165,14 @@ void gicv3_setl_fields(uint32_t cpu_or_dist, u64 offset, * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being * marked as "Reserved" in the Distributor map. */ -static void gicv3_access_reg(uint32_t intid, u64 offset, - uint32_t reg_bits, uint32_t bits_per_field, - bool write, uint32_t *val) +static void gicv3_access_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field, + bool write, u32 *val) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); enum gicv3_intid_range intid_range = get_intid_range(intid); - uint32_t fields_per_reg, index, mask, shift; - uint32_t cpu_or_dist; + u32 fields_per_reg, index, mask, shift; + u32 cpu_or_dist; GUEST_ASSERT(bits_per_field <= reg_bits); GUEST_ASSERT(!write || *val < (1U << bits_per_field)); @@ -197,32 +197,32 @@ static void gicv3_access_reg(uint32_t intid, u64 offset, *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; } -static void gicv3_write_reg(uint32_t intid, u64 offset, - uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +static void gicv3_write_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field, u32 val) { gicv3_access_reg(intid, offset, reg_bits, bits_per_field, true, &val); } -static uint32_t gicv3_read_reg(uint32_t intid, u64 offset, - uint32_t reg_bits, uint32_t bits_per_field) +static u32 gicv3_read_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field) { - uint32_t val; + u32 val; gicv3_access_reg(intid, offset, reg_bits, bits_per_field, false, &val); return val; } -static void gicv3_set_priority(uint32_t intid, uint32_t prio) +static void gicv3_set_priority(u32 intid, u32 prio) { gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); } /* Sets the intid to be level-sensitive or edge-triggered. */ -static void gicv3_irq_set_config(uint32_t intid, bool is_edge) +static void gicv3_irq_set_config(u32 intid, bool is_edge) { - uint32_t val; + u32 val; /* N/A for private interrupts. */ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); @@ -230,57 +230,57 @@ static void gicv3_irq_set_config(uint32_t intid, bool is_edge) gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); } -static void gicv3_irq_enable(uint32_t intid) +static void gicv3_irq_enable(u32 intid) { bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); } -static void gicv3_irq_disable(uint32_t intid) +static void gicv3_irq_disable(u32 intid) { bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); } -static void gicv3_irq_set_active(uint32_t intid) +static void gicv3_irq_set_active(u32 intid) { gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); } -static void gicv3_irq_clear_active(uint32_t intid) +static void gicv3_irq_clear_active(u32 intid) { gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); } -static bool gicv3_irq_get_active(uint32_t intid) +static bool gicv3_irq_get_active(u32 intid) { return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); } -static void gicv3_irq_set_pending(uint32_t intid) +static void gicv3_irq_set_pending(u32 intid) { gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); } -static void gicv3_irq_clear_pending(uint32_t intid) +static void gicv3_irq_clear_pending(u32 intid) { gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); } -static bool gicv3_irq_get_pending(uint32_t intid) +static bool gicv3_irq_get_pending(u32 intid) { return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } static void gicv3_enable_redist(volatile void *redist_base) { - uint32_t val = readl(redist_base + GICR_WAKER); + u32 val = readl(redist_base + GICR_WAKER); unsigned int count = 100000; /* 1s */ val &= ~GICR_WAKER_ProcessorSleep; @@ -293,10 +293,10 @@ static void gicv3_enable_redist(volatile void *redist_base) } } -static void gicv3_set_group(uint32_t intid, bool grp) +static void gicv3_set_group(u32 intid, bool grp) { - uint32_t cpu_or_dist; - uint32_t val; + u32 cpu_or_dist; + u32 val; cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 159368036325..f96513262c5b 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -413,7 +413,7 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code); } -static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init) { size_t stack_size; @@ -432,7 +432,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, return vcpu; } -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init); @@ -442,7 +442,7 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { return __aarch64_vcpu_add(vm, vcpu_id, NULL); } @@ -563,13 +563,13 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, handlers->exception_handlers[vector][0] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return read_sysreg(tpidr_el1); } -static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, - uint32_t not_sup_val, uint32_t ipa52_min_val) +static u32 max_ipa_for_page_size(u32 vm_ipa, u32 gran, + u32 not_sup_val, u32 ipa52_min_val) { if (gran == not_sup_val) return 0; @@ -579,13 +579,13 @@ static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, return min(vm_ipa, 48U); } -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k) +void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k, + u32 *ipa16k, u32 *ipa64k) { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; u64 val; - uint32_t gran; + u32 gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), .addr = (u64)&val, @@ -646,7 +646,7 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") -void smccc_hvc(uint32_t function_id, u64 arg0, u64 arg1, +void smccc_hvc(u32 function_id, u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, struct arm_smccc_res *res) { @@ -654,7 +654,7 @@ void smccc_hvc(uint32_t function_id, u64 arg0, u64 arg1, arg6, res); } -void smccc_smc(uint32_t function_id, u64 arg0, u64 arg1, +void smccc_smc(u32 function_id, u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, struct arm_smccc_res *res) { diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 166637d6abf5..4ecebf3146a2 100644 --- a/tools/testing/selftests/kvm/lib/arm64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -41,7 +41,7 @@ bool kvm_supports_vgic_v3(void) * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs) { int gic_fd; u64 attr; @@ -77,7 +77,7 @@ void __vgic_v3_init(int fd) KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); } -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs) { unsigned int nr_vcpus_created = 0; struct list_head *iter; @@ -104,7 +104,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) } /* should only work for level sensitive interrupts */ -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level) { u64 attr = 32 * (intid / 32); u64 index = intid % 32; @@ -122,16 +122,16 @@ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) return ret; } -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +void kvm_irq_set_level_info(int gic_fd, u32 intid, int level) { int ret = _kvm_irq_set_level_info(gic_fd, intid, level); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret)); } -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level) { - uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; + u32 irq = intid & KVM_ARM_IRQ_NUM_MASK; TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " "doesn't allow injecting SGIs. There's no mask for it."); @@ -144,14 +144,14 @@ int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) return _kvm_irq_line(vm, irq, level); } -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level) { int ret = _kvm_arm_irq_line(vm, intid, level); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); } -static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, +static void vgic_poke_irq(int gic_fd, u32 intid, struct kvm_vcpu *vcpu, u64 reg_off) { u64 reg = intid / 32; @@ -160,7 +160,7 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, u64 val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + u32 group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; if (intid_is_private) { @@ -183,12 +183,12 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, kvm_device_attr_set(gic_fd, group, attr, &val); } -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); } -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index ce3099630397..7a96c43b5704 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -20,7 +20,7 @@ void guest_modes_append_default(void) #ifdef __aarch64__ { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - uint32_t ipa4k, ipa16k, ipa64k; + u32 ipa4k, ipa16k, ipa64k; int i; aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index a3a44657e72d..551ad6c658aa 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -35,8 +35,8 @@ static int skip_atoi(const char **s) ({ \ int __res; \ \ - __res = ((u64)n) % (uint32_t) base; \ - n = ((u64)n) / (uint32_t) base; \ + __res = ((u64)n) % (u32)base; \ + n = ((u64)n) / (u32)base; \ __res; \ }) @@ -292,7 +292,7 @@ repeat: } else if (flags & SIGN) num = va_arg(args, int); else - num = va_arg(args, uint32_t); + num = va_arg(args, u32); str = number(str, end, num, base, field_width, precision, flags); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d97bf1141a55..9f80e2e03001 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -20,9 +20,9 @@ #define KVM_UTIL_MIN_PFN 2 -uint32_t guest_random_seed; +u32 guest_random_seed; struct guest_random_state guest_rng; -static uint32_t last_guest_seed; +static u32 last_guest_seed; static size_t vcpu_mmap_sz(void); @@ -165,7 +165,7 @@ unsigned int kvm_check_cap(long cap) return (unsigned int)ret; } -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) +void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size) { if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); @@ -189,7 +189,7 @@ static void vm_open(struct kvm_vm *vm) vm->stats.fd = -1; } -const char *vm_guest_mode_string(uint32_t i) +const char *vm_guest_mode_string(u32 i) { static const char * const strings[] = { [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", @@ -397,7 +397,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape) } static u64 vm_nr_pages_required(enum vm_guest_mode mode, - uint32_t nr_runnable_vcpus, + u32 nr_runnable_vcpus, u64 extra_mem_pages) { u64 page_size = vm_guest_mode_params[mode].page_size; @@ -435,7 +435,7 @@ static u64 vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } -void kvm_set_files_rlimit(uint32_t nr_vcpus) +void kvm_set_files_rlimit(u32 nr_vcpus) { /* * Each vCPU will open two file descriptors: the vCPU itself and the @@ -476,7 +476,7 @@ static bool is_guest_memfd_required(struct vm_shape shape) #endif } -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, +struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus, u64 nr_extra_pages) { u64 nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, @@ -546,7 +546,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus, u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -614,7 +614,7 @@ void kvm_vm_restart(struct kvm_vm *vmp) } __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) + u32 vcpu_id) { return __vm_vcpu_add(vm, vcpu_id); } @@ -636,9 +636,9 @@ int __pin_task_to_cpu(pthread_t task, int cpu) return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } -static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +static u32 parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) { - uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + u32 pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), "Not allowed to run on pCPU '%d', check cgroups?", pcpu); @@ -662,7 +662,7 @@ void kvm_print_vcpu_pinning_help(void) " (default: no pinning)\n", name, name); } -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], +void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[], int nr_vcpus) { cpu_set_t allowed_mask; @@ -918,7 +918,7 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, } -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva) { struct kvm_userspace_memory_region region = { @@ -932,7 +932,7 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); } -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva) { int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); @@ -945,9 +945,9 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva, - uint32_t guest_memfd, u64 guest_memfd_offset) + u32 guest_memfd, u64 guest_memfd_offset) { struct kvm_userspace_memory_region2 region = { .slot = slot, @@ -964,9 +964,9 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); } -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, +void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, u64 gpa, u64 size, void *hva, - uint32_t guest_memfd, u64 guest_memfd_offset) + u32 guest_memfd, u64 guest_memfd_offset) { int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, guest_memfd, guest_memfd_offset); @@ -978,7 +978,7 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, uint32_t slot, u64 npages, uint32_t flags, + u64 gpa, u32 slot, u64 npages, u32 flags, int guest_memfd, u64 guest_memfd_offset) { int ret; @@ -1085,7 +1085,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (flags & KVM_MEM_GUEST_MEMFD) { if (guest_memfd < 0) { - uint32_t guest_memfd_flags = 0; + u32 guest_memfd_flags = 0; TEST_ASSERT(!guest_memfd_offset, "Offset must be zero when creating new guest_memfd"); guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); @@ -1141,8 +1141,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, uint32_t slot, u64 npages, - uint32_t flags) + u64 gpa, u32 slot, u64 npages, u32 flags) { vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); } @@ -1163,7 +1162,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * memory slot ID). */ struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot) +memslot2region(struct kvm_vm *vm, u32 memslot) { struct userspace_mem_region *region; @@ -1194,7 +1193,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) * Sets the flags of the memory region specified by the value of slot, * to the values given by flags. */ -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) +void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags) { int ret; struct userspace_mem_region *region; @@ -1210,7 +1209,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } -void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) +void vm_mem_region_reload(struct kvm_vm *vm, u32 slot) { struct userspace_mem_region *region = memslot2region(vm, slot); struct kvm_userspace_memory_region2 tmp = region->region; @@ -1234,7 +1233,7 @@ void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) * * Change the gpa of a memory region. */ -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, u64 new_gpa) +void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa) { struct userspace_mem_region *region; int ret; @@ -1263,7 +1262,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, u64 new_gpa) * * Delete a memory region. */ -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) +void vm_mem_region_delete(struct kvm_vm *vm, u32 slot) { struct userspace_mem_region *region = memslot2region(vm, slot); @@ -1317,7 +1316,7 @@ static size_t vcpu_mmap_sz(void) return ret; } -static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) +static bool vcpu_exists(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu; @@ -1333,7 +1332,7 @@ static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. * No additional vCPU setup is done. Returns the vCPU. */ -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu; @@ -1777,8 +1776,8 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - uint32_t page_size = getpagesize(); - uint32_t size = vcpu->vm->dirty_ring_size; + u32 page_size = getpagesize(); + u32 size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); @@ -1807,7 +1806,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) * Device Ioctl */ -int __kvm_has_device_attr(int dev_fd, uint32_t group, u64 attr) +int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr) { struct kvm_device_attr attribute = { .group = group, @@ -1842,7 +1841,7 @@ int __kvm_create_device(struct kvm_vm *vm, u64 type) return err ? : create_dev.fd; } -int __kvm_device_attr_get(int dev_fd, uint32_t group, u64 attr, void *val) +int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1854,7 +1853,7 @@ int __kvm_device_attr_get(int dev_fd, uint32_t group, u64 attr, void *val) return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, u64 attr, void *val) +int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1870,7 +1869,7 @@ int __kvm_device_attr_set(int dev_fd, uint32_t group, u64 attr, void *val) * IRQ related functions. */ -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level) { struct kvm_irq_level irq_level = { .irq = irq, @@ -1880,7 +1879,7 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); } -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level) { int ret = _kvm_irq_line(vm, irq, level); @@ -1902,7 +1901,7 @@ struct kvm_irq_routing *kvm_gsi_routing_create(void) } void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin) + u32 gsi, u32 pin) { int i; @@ -2088,7 +2087,7 @@ const char *exit_reason_str(unsigned int exit_reason) * not enough pages are available at or above paddr_min. */ gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - gpa_t paddr_min, uint32_t memslot, + gpa_t paddr_min, u32 memslot, bool protected) { struct userspace_mem_region *region; @@ -2133,7 +2132,7 @@ gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, return base * vm->page_size; } -gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, uint32_t memslot) +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, u32 memslot) { return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); } diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 989473b3da7b..38ee62c6cbfb 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -118,7 +118,7 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) { - uint32_t prot_bits; + u32 prot_bits; u64 *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -223,7 +223,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn hand handlers->exception_handlers[vector] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return csr_read(LOONGARCH_CSR_CPUID); } @@ -369,7 +369,7 @@ void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { size_t stack_size; u64 stack_vaddr; diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 5fcae17f687e..b59dc3344ff3 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -44,7 +44,7 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void memstress_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(u32 vcpu_idx) { struct memstress_args *args = &memstress_args; struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; @@ -232,7 +232,7 @@ void memstress_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) +void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent) { memstress_args.write_percent = write_percent; sync_global_to_guest(vm, memstress_args.write_percent); diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index a1f5c8660952..d7646eebcfab 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -45,7 +45,7 @@ static u64 pte_index_mask[] = { PGTBL_L3_INDEX_MASK, }; -static uint32_t pte_index_shift[] = { +static u32 pte_index_shift[] = { PGTBL_L0_INDEX_SHIFT, PGTBL_L1_INDEX_SHIFT, PGTBL_L2_INDEX_SHIFT, @@ -311,7 +311,7 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { int r; size_t stack_size; @@ -470,7 +470,7 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle handlers->exception_handlers[1][0] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return csr_read(CSR_SSCRATCH); } diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index a83a2b76524b..7591c5167927 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -160,7 +160,7 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu->run->psw_addr = (uintptr_t)guest_code; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); u64 stack_vaddr; diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 8a472dcb3d5e..7e7734088f2f 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -80,7 +80,7 @@ * typedef u64 sparsebit_num_t; * * sparsebit_idx_t idx; - * uint32_t mask; + * u32 mask; * sparsebit_num_t num_after; * * The idx member contains the bit index of the first bit described by this @@ -162,7 +162,7 @@ #define DUMP_LINE_MAX 100 /* Does not include indent amount */ -typedef uint32_t mask_t; +typedef u32 mask_t; #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) struct node { diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index f5b460c445be..bab1bd2b775b 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -30,15 +30,15 @@ void __attribute__((used)) expect_sigbus_handler(int signum) * Park-Miller LCG using standard constants. */ -struct guest_random_state new_guest_random_state(uint32_t seed) +struct guest_random_state new_guest_random_state(u32 seed) { struct guest_random_state s = {.seed = seed}; return s; } -uint32_t guest_random_u32(struct guest_random_state *state) +u32 guest_random_u32(struct guest_random_state *state) { - state->seed = (u64)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + state->seed = (u64)state->seed * 48271 % ((u32)(1 << 31) - 1); return state->seed; } @@ -225,7 +225,7 @@ size_t get_def_hugetlb_pagesz(void) #define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) #define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB) -const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i) { static const struct vm_mem_backing_src_alias aliases[] = { [VM_MEM_SRC_ANONYMOUS] = { @@ -317,9 +317,9 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) #define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK)) -size_t get_backing_src_pagesz(uint32_t i) +size_t get_backing_src_pagesz(u32 i) { - uint32_t flag = vm_mem_backing_src_alias(i)->flag; + u32 flag = vm_mem_backing_src_alias(i)->flag; switch (i) { case VM_MEM_SRC_ANONYMOUS: @@ -335,7 +335,7 @@ size_t get_backing_src_pagesz(uint32_t i) } } -bool is_backing_src_hugetlb(uint32_t i) +bool is_backing_src_hugetlb(u32 i) { return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); } diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 802543aa588c..dc31236b004b 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -525,7 +525,7 @@ void tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, */ void tdp_identity_map_default_memslots(struct kvm_vm *vm) { - uint32_t s, memslot = 0; + u32 s, memslot = 0; sparsebit_idx_t i, last; struct userspace_mem_region *region = memslot2region(vm, memslot); @@ -821,7 +821,7 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_regs_set(vcpu, ®s); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_mp_state mp_state; struct kvm_regs regs; @@ -872,7 +872,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); @@ -907,9 +907,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return kvm_supported_cpuid; } -static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) +static u32 __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + u32 function, u32 index, + uint8_t reg, uint8_t lo, uint8_t hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -936,8 +936,8 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, feature.reg, feature.bit, feature.bit); } -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property) +u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) { return __kvm_cpu_has(cpuid, property.function, property.index, property.reg, property.lo_bit, property.hi_bit); @@ -1019,7 +1019,7 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, - uint32_t value) + u32 value) { struct kvm_cpuid_entry2 *entry; @@ -1034,7 +1034,7 @@ void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value); } -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function) { struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); @@ -1196,7 +1196,7 @@ const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) return list; } -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) +bool kvm_msr_is_in_save_restore_list(u32 msr_index) { const struct kvm_msr_list *list = kvm_get_msr_index_list(); int i; @@ -1327,7 +1327,7 @@ void kvm_init_vm_address_properties(struct kvm_vm *vm) } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index) + u32 function, u32 index) { int i; diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index 555198348159..d82f677b7c5e 100644 --- a/tools/testing/selftests/kvm/lib/x86/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -79,7 +79,7 @@ void snp_vm_init(struct kvm_vm *vm) vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) +void sev_vm_launch(struct kvm_vm *vm, u32 policy) { struct kvm_sev_launch_start launch_start = { .policy = policy, @@ -158,7 +158,7 @@ void snp_vm_launch_finish(struct kvm_vm *vm) vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish); } -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, struct kvm_vcpu **cpu) { struct vm_shape shape = { diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 1d3d9bcfcf8a..73b7faa7f357 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -160,7 +160,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); /* Enter VMX root operation. */ - *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + *(u32 *)(vmx->vmxon) = vmcs_revision(); if (vmxon(vmx->vmxon_gpa)) return false; @@ -170,7 +170,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + *(u32 *)(vmx->vmcs) = vmcs_revision(); if (vmclear(vmx->vmcs_gpa)) return false; @@ -178,7 +178,7 @@ bool load_vmcs(struct vmx_pages *vmx) return false; /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + *(u32 *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; if (vmclear(vmx->shadow_vmcs_gpa)) return false; @@ -200,7 +200,7 @@ bool ept_1g_pages_supported(void) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { - uint32_t sec_exec_ctl = 0; + u32 sec_exec_ctl = 0; vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); @@ -259,7 +259,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) */ static inline void init_vmcs_host_state(void) { - uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + u32 exit_controls = vmreadz(VM_EXIT_CONTROLS); vmwrite(HOST_ES_SELECTOR, get_es()); vmwrite(HOST_CS_SELECTOR, get_cs()); diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c index f80e36962879..a7279ded8518 100644 --- a/tools/testing/selftests/kvm/loongarch/arch_timer.c +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -27,7 +27,7 @@ static void do_idle(void) static void guest_irq_handler(struct ex_regs *regs) { unsigned int intid; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); u64 xcnt, val, cfg, xcnt_diff_us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; @@ -62,9 +62,9 @@ static void guest_irq_handler(struct ex_regs *regs) WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); } -static void guest_test_period_timer(uint32_t cpu) +static void guest_test_period_timer(u32 cpu) { - uint32_t irq_iter, config_iter; + u32 irq_iter, config_iter; u64 us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; @@ -86,9 +86,9 @@ static void guest_test_period_timer(uint32_t cpu) irq_iter); } -static void guest_test_oneshot_timer(uint32_t cpu) +static void guest_test_oneshot_timer(u32 cpu) { - uint32_t irq_iter, config_iter; + u32 irq_iter, config_iter; u64 us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; @@ -112,9 +112,9 @@ static void guest_test_oneshot_timer(uint32_t cpu) } } -static void guest_test_emulate_timer(uint32_t cpu) +static void guest_test_emulate_timer(u32 cpu) { - uint32_t config_iter; + u32 config_iter; u64 xcnt_diff_us, us; struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; @@ -136,9 +136,9 @@ static void guest_test_emulate_timer(uint32_t cpu) local_irq_enable(); } -static void guest_time_count_test(uint32_t cpu) +static void guest_time_count_test(u32 cpu) { - uint32_t config_iter; + u32 config_iter; unsigned long start, end, prev, us; /* Assuming that test case starts to run in 1 second */ @@ -165,7 +165,7 @@ static void guest_time_count_test(uint32_t cpu) static void guest_code(void) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); /* must run at first */ guest_time_count_test(cpu); diff --git a/tools/testing/selftests/kvm/loongarch/pmu_test.c b/tools/testing/selftests/kvm/loongarch/pmu_test.c index bf5249c87f75..ec3fefb9ea97 100644 --- a/tools/testing/selftests/kvm/loongarch/pmu_test.c +++ b/tools/testing/selftests/kvm/loongarch/pmu_test.c @@ -15,7 +15,7 @@ static int pmu_irq_count; /* Check PMU support */ static bool has_pmu_support(void) { - uint32_t cfg6; + u32 cfg6; /* Read CPUCFG6 to check PMU */ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); @@ -34,7 +34,7 @@ static bool has_pmu_support(void) /* Dump PMU capabilities */ static void dump_pmu_caps(void) { - uint32_t cfg6; + u32 cfg6; int nr_counters, counter_bits; cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); @@ -51,7 +51,7 @@ static void dump_pmu_caps(void) static void guest_pmu_base_test(void) { int i; - uint32_t cfg6, pmnum; + u32 cfg6, pmnum; u64 cnt[4]; cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index bf62b522d32e..c2b36a4ac638 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -85,7 +85,7 @@ struct vm_data { struct kvm_vm *vm; struct kvm_vcpu *vcpu; pthread_t vcpu_thread; - uint32_t nslots; + u32 nslots; u64 npages; u64 pages_per_slot; void **hva_slots; @@ -95,7 +95,7 @@ struct vm_data { }; struct sync_area { - uint32_t guest_page_size; + u32 guest_page_size; atomic_bool start_flag; atomic_bool exit_flag; atomic_bool sync_flag; @@ -189,9 +189,9 @@ static void wait_for_vcpu(void) static void *vm_gpa2hva(struct vm_data *data, u64 gpa, u64 *rempages) { u64 gpage, pgoffs; - uint32_t slot, slotoffs; + u32 slot, slotoffs; void *base; - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, @@ -220,9 +220,9 @@ static void *vm_gpa2hva(struct vm_data *data, u64 gpa, u64 *rempages) return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; } -static u64 vm_slot2gpa(struct vm_data *data, uint32_t slot) +static u64 vm_slot2gpa(struct vm_data *data, u32 slot) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; TEST_ASSERT(slot < data->nslots, "Too high slot number"); @@ -243,7 +243,7 @@ static struct vm_data *alloc_vm(void) return data; } -static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, +static bool check_slot_pages(u32 host_page_size, u32 guest_page_size, u64 pages_per_slot, u64 rempages) { if (!pages_per_slot) @@ -259,9 +259,9 @@ static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, } -static u64 get_max_slots(struct vm_data *data, uint32_t host_page_size) +static u64 get_max_slots(struct vm_data *data, u32 host_page_size) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; u64 mempages, pages_per_slot, rempages; u64 slots; @@ -287,7 +287,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots, { u64 mempages, rempages; u64 guest_addr; - uint32_t slot, host_page_size, guest_page_size; + u32 slot, host_page_size, guest_page_size; struct timespec tstart; struct sync_area *sync; @@ -448,7 +448,7 @@ static bool guest_perform_sync(void) static void guest_code_test_memslot_move(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; - uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); GUEST_SYNC(0); @@ -477,7 +477,7 @@ static void guest_code_test_memslot_move(void) static void guest_code_test_memslot_map(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; - uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -544,7 +544,7 @@ static void guest_code_test_memslot_unmap(void) static void guest_code_test_memslot_rw(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; - uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -579,7 +579,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, struct sync_area *sync, u64 *maxslots, bool isactive) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; u64 movesrcgpa, movetestgpa; #ifdef __x86_64__ @@ -639,7 +639,7 @@ static void test_memslot_do_unmap(struct vm_data *data, u64 offsp, u64 count) { u64 gpa, ctr; - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { u64 npages; @@ -665,7 +665,7 @@ static void test_memslot_map_unmap_check(struct vm_data *data, { u64 gpa; u64 *val; - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; if (!map_unmap_verify) return; @@ -680,7 +680,7 @@ static void test_memslot_map_unmap_check(struct vm_data *data, static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; u64 guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; /* @@ -720,7 +720,7 @@ static void test_memslot_unmap_loop_common(struct vm_data *data, struct sync_area *sync, u64 chunk) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; u64 guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; u64 ctr; @@ -746,8 +746,8 @@ static void test_memslot_unmap_loop_common(struct vm_data *data, static void test_memslot_unmap_loop(struct vm_data *data, struct sync_area *sync) { - uint32_t host_page_size = getpagesize(); - uint32_t guest_page_size = data->vm->page_size; + u32 host_page_size = getpagesize(); + u32 guest_page_size = data->vm->page_size; u64 guest_chunk_pages = guest_page_size >= host_page_size ? 1 : host_page_size / guest_page_size; @@ -757,7 +757,7 @@ static void test_memslot_unmap_loop(struct vm_data *data, static void test_memslot_unmap_loop_chunked(struct vm_data *data, struct sync_area *sync) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; u64 guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); @@ -766,7 +766,7 @@ static void test_memslot_unmap_loop_chunked(struct vm_data *data, static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) { u64 gptr; - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; for (gptr = MEM_TEST_GPA + guest_page_size / 2; gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) @@ -924,8 +924,8 @@ static void help(char *name, struct test_args *targs) static bool check_memory_sizes(void) { - uint32_t host_page_size = getpagesize(); - uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + u32 host_page_size = getpagesize(); + u32 guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n", @@ -961,7 +961,7 @@ static bool check_memory_sizes(void) static bool parse_args(int argc, char *argv[], struct test_args *targs) { - uint32_t max_mem_slots; + u32 max_mem_slots; int opt; while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index 1fa9800aa7df..bfdaaeed3a8c 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -34,7 +34,7 @@ static void guest_code(u64 base_gva) struct slot_worker_data { struct kvm_vm *vm; u64 gpa; - uint32_t flags; + u32 flags; bool worker_ready; bool prefault_ready; bool recreate_slot; diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 99d87e6eb5aa..d67c918ee310 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -19,7 +19,7 @@ static void guest_irq_handler(struct pt_regs *regs) { u64 xcnt, xcnt_diff_us, cmp; unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; timer_irq_disable(); @@ -40,7 +40,7 @@ static void guest_irq_handler(struct pt_regs *regs) static void guest_run(struct test_vcpu_shared_data *shared_data) { - uint32_t irq_iter, config_iter; + u32 irq_iter, config_iter; shared_data->nr_iter = 0; shared_data->guest_stage = 0; @@ -66,7 +66,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data) static void guest_code(void) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; timer_irq_disable(); diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index e8bda1ab7fb0..1cd7b8f81fff 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -42,11 +42,11 @@ struct mop_desc { unsigned int _set_flags : 1; unsigned int _sida_offset : 1; unsigned int _ar : 1; - uint32_t size; + u32 size; enum mop_target target; enum mop_access_mode mode; void *buf; - uint32_t sida_offset; + u32 sida_offset; void *old; uint8_t old_value[16]; bool *cmpxchg_success; @@ -296,7 +296,7 @@ static void prepare_mem12(void) TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, uint32_t size, uint8_t key) + enum mop_target mop_target, u32 size, uint8_t key) { prepare_mem12(); CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, @@ -308,7 +308,7 @@ static void default_write_read(struct test_info copy_cpu, struct test_info mop_c } static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, uint32_t size, uint8_t key) + enum mop_target mop_target, u32 size, uint8_t key) { prepare_mem12(); CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); @@ -487,7 +487,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) case 2: return (uint16_t)val; case 4: - return (uint32_t)val; + return (u32)val; case 8: return (u64)val; case 16: @@ -585,15 +585,15 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t switch (size) { case 4: { - uint32_t old = *old_addr; + u32 old = *old_addr; asm volatile ("cs %[old],%[new],%[address]" : [old] "+d" (old), - [address] "+Q" (*(uint32_t *)(target)) - : [new] "d" ((uint32_t)new) + [address] "+Q" (*(u32 *)(target)) + : [new] "d" ((u32)new) : "cc" ); - ret = old == (uint32_t)*old_addr; + ret = old == (u32)*old_addr; *old_addr = old; return ret; } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 413281e277ac..59a6eae30946 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -345,8 +345,8 @@ static void test_zero_memory_regions(void) static void test_invalid_memory_region_flags(void) { - uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES; - const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD; + u32 supported_flags = KVM_MEM_LOG_DIRTY_PAGES; + const u32 v2_only_flags = KVM_MEM_GUEST_MEMFD; struct kvm_vm *vm; int r, i; @@ -410,8 +410,8 @@ static void test_add_max_memory_regions(void) { int ret; struct kvm_vm *vm; - uint32_t max_mem_slots; - uint32_t slot; + u32 max_mem_slots; + u32 slot; void *mem, *mem_aligned, *mem_extra; size_t alignment = 1; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index d0a41a2bcccb..85fabe262864 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -42,7 +42,7 @@ static void check_status(struct kvm_steal_time *st) static void guest_code(int cpu) { struct kvm_steal_time *st = st_gva[cpu]; - uint32_t version; + u32 version; GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((u64)st_gva[cpu] | KVM_MSR_ENABLED)); @@ -67,7 +67,7 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME); } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); @@ -76,7 +76,7 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); @@ -118,12 +118,12 @@ static void check_steal_time_uapi(void) #define PV_TIME_ST 0xc5000021 struct st_time { - uint32_t rev; - uint32_t attr; + u32 rev; + u32 attr; u64 st_time; }; -static s64 smccc(uint32_t func, u64 arg) +static s64 smccc(u32 func, u64 arg) { struct arm_smccc_res res; @@ -175,7 +175,7 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { struct kvm_vm *vm = vcpu->vm; u64 st_ipa; @@ -194,7 +194,7 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); @@ -242,8 +242,8 @@ static void check_steal_time_uapi(void) static gpa_t st_gpa[NR_VCPUS]; struct sta_struct { - uint32_t sequence; - uint32_t flags; + u32 sequence; + u32 flags; u64 steal; uint8_t preempted; uint8_t pad[47]; @@ -272,7 +272,7 @@ static void check_status(struct sta_struct *st) static void guest_code(int cpu) { struct sta_struct *st = st_gva[cpu]; - uint32_t sequence; + u32 sequence; long out_val = 0; bool probe; @@ -305,7 +305,7 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return enabled; } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); @@ -314,7 +314,7 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) sync_global_to_guest(vcpu->vm, st_gpa[i]); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); int i; @@ -388,7 +388,7 @@ static void check_status(struct kvm_steal_time *st) static void guest_code(int cpu) { - uint32_t version; + u32 version; struct kvm_steal_time *st = st_gva[cpu]; memset(st, 0, sizeof(*st)); @@ -428,7 +428,7 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return val & BIT(KVM_FEATURE_STEAL_TIME); } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { int err; u64 st_gpa; @@ -451,7 +451,7 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) TEST_ASSERT(err == 0, "Fail to set PV stealtime GPA"); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index 7e3aab912082..9ecf7515442b 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -82,8 +82,8 @@ static inline void __tilerelease(void) static inline void __xsavec(struct xstate *xstate, u64 rfbm) { - uint32_t rfbm_lo = rfbm; - uint32_t rfbm_hi = rfbm >> 32; + u32 rfbm_lo = rfbm; + u32 rfbm_hi = rfbm >> 32; asm volatile("xsavec (%%rdi)" : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi) diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c index d3f21f2f5d28..620809cf35da 100644 --- a/tools/testing/selftests/kvm/x86/aperfmperf_test.c +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -35,7 +35,7 @@ static int open_dev_msr(int cpu) return open_path_or_exit(path, O_RDONLY); } -static u64 read_dev_msr(int msr_fd, uint32_t msr) +static u64 read_dev_msr(int msr_fd, u32 msr) { u64 data; ssize_t rc; diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index 81f76c7d5621..404f0028e110 100644 --- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -19,8 +19,8 @@ * timer frequency. */ static const struct { - const uint32_t tdcr; - const uint32_t divide_count; + const u32 tdcr; + const u32 divide_count; } tdcrs[] = { {0x0, 2}, {0x1, 4}, @@ -42,12 +42,12 @@ static void apic_enable(void) xapic_enable(); } -static uint32_t apic_read_reg(unsigned int reg) +static u32 apic_read_reg(unsigned int reg) { return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); } -static void apic_write_reg(unsigned int reg, uint32_t val) +static void apic_write_reg(unsigned int reg, u32 val) { if (is_x2apic) x2apic_write_reg(reg, val); @@ -58,9 +58,9 @@ static void apic_write_reg(unsigned int reg, uint32_t val) static void apic_guest_code(u64 apic_hz, u64 delay_ms) { u64 tsc_hz = guest_tsc_khz * 1000; - const uint32_t tmict = ~0u; + const u32 tmict = ~0u; u64 tsc0, tsc1, freq; - uint32_t tmcct; + u32 tmcct; int i; apic_enable(); diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index 542a0eac0f32..0dfaf03cd0a0 100644 --- a/tools/testing/selftests/kvm/x86/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c @@ -16,7 +16,7 @@ #define IRQ_VECTOR 0xAA /* For testing data access debug BP */ -uint32_t guest_value; +u32 guest_value; extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start; diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c index 51416328cc69..a634bc281546 100644 --- a/tools/testing/selftests/kvm/x86/fastops_test.c +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -15,7 +15,7 @@ "pop %[flags]\n\t" #define flags_constraint(flags_val) [flags]"=r"(flags_val) -#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val) +#define bt_constraint(__bt_val) [bt_val]"rm"((u32)__bt_val) #define guest_execute_fastop_1(FEP, insn, __val, __flags) \ ({ \ @@ -187,7 +187,7 @@ static void guest_code(void) { guest_test_fastops(uint8_t, "b"); guest_test_fastops(uint16_t, "w"); - guest_test_fastops(uint32_t, "l"); + guest_test_fastops(u32, "l"); guest_test_fastops(u64, "q"); GUEST_DONE(); diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c index a0e54af60544..158550701771 100644 --- a/tools/testing/selftests/kvm/x86/feature_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c @@ -12,7 +12,7 @@ #include "kvm_util.h" #include "processor.h" -static bool is_kvm_controlled_msr(uint32_t msr) +static bool is_kvm_controlled_msr(u32 msr) { return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; } @@ -21,7 +21,7 @@ static bool is_kvm_controlled_msr(uint32_t msr) * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" * MSR, and doesn't allow setting the hidden version. */ -static bool is_hidden_vmx_msr(uint32_t msr) +static bool is_hidden_vmx_msr(u32 msr) { switch (msr) { case MSR_IA32_VMX_PINBASED_CTLS: @@ -34,12 +34,12 @@ static bool is_hidden_vmx_msr(uint32_t msr) } } -static bool is_quirked_msr(uint32_t msr) +static bool is_quirked_msr(u32 msr) { return msr != MSR_AMD64_DE_CFG; } -static void test_feature_msr(uint32_t msr) +static void test_feature_msr(u32 msr) { const u64 supported_mask = kvm_get_feature_msr(msr); u64 reset_value = is_quirked_msr(msr) ? supported_mask : 0; diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 2d1733f9303a..061d9e1f02c0 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -30,7 +30,7 @@ static void guest_nmi_handler(struct ex_regs *regs) { } -static inline void rdmsr_from_l2(uint32_t msr) +static inline void rdmsr_from_l2(u32 msr) { /* Currently, L1 doesn't preserve GPRs during vmexits. */ __asm__ __volatile__ ("rdmsr" : : "c"(msr) : diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 7bce2bcc3a73..80588b7ea259 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -22,7 +22,7 @@ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0) struct msr_data { - uint32_t idx; + u32 idx; bool fault_expected; bool write; u64 write_val; @@ -34,7 +34,7 @@ struct hcall_data { bool ud_expected; }; -static bool is_write_only_msr(uint32_t msr) +static bool is_write_only_msr(u32 msr) { return msr == HV_X64_MSR_EOI; } diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index 54a1a6dad4d5..77b774b5041c 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -21,7 +21,7 @@ #define L2_GUEST_STACK_SIZE 256 /* Exit to L1 from L2 with RDMSR instruction */ -static inline void rdmsr_from_l2(uint32_t msr) +static inline void rdmsr_from_l2(u32 msr) { /* Currently, L1 doesn't preserve GPRs during vmexits. */ __asm__ __volatile__ ("rdmsr" : : "c"(msr) : diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index e49ae65f8171..babf0f95165a 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -13,7 +13,7 @@ #include "processor.h" struct msr_data { - uint32_t idx; + u32 idx; const char *name; }; diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c index d398add21e4c..42fd24567e26 100644 --- a/tools/testing/selftests/kvm/x86/nested_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -14,7 +14,7 @@ enum { struct emulated_instruction { const char name[32]; uint8_t opcode[15]; - uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS]; + u32 exit_reason[NR_VIRTUALIZATION_FLAVORS]; }; static struct emulated_instruction instructions[] = { @@ -36,9 +36,9 @@ static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kv static uint8_t l2_guest_code[sizeof(kvm_fep) + 15]; static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; -static uint32_t get_instruction_length(struct emulated_instruction *insn) +static u32 get_instruction_length(struct emulated_instruction *insn) { - uint32_t i; + u32 i; for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++) ; @@ -81,8 +81,8 @@ static void guest_code(void *test_data) for (i = 0; i < ARRAY_SIZE(instructions); i++) { struct emulated_instruction *insn = &instructions[i]; - uint32_t insn_len = get_instruction_length(insn); - uint32_t exit_insn_len; + u32 insn_len = get_instruction_length(insn); + u32 exit_insn_len; u32 exit_reason; /* diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 646cfb0022b3..186e980aa8ee 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -72,7 +72,7 @@ static void l2_ss_injected_tf_test(void) } static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, - uint32_t error_code) + u32 error_code) { struct vmcb *vmcb = svm->vmcb; struct vmcb_control_area *ctrl = &vmcb->control; @@ -111,7 +111,7 @@ static void l1_svm_code(struct svm_test_data *svm) GUEST_DONE(); } -static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) +static void vmx_run_l2(void *l2_code, int vector, u32 error_code) { GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code)); diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index a18b0cfd42e2..f0e4adac4751 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -88,7 +88,7 @@ static void l1_guest_code(void *data) */ if (this_cpu_has(X86_FEATURE_VMX)) { struct vmx_pages *vmx_pages = data; - uint32_t control; + u32 control; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_ASSERT(load_vmcs(vmx_pages)); diff --git a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index b37b0fef7fde..190e93af20a1 100644 --- a/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -106,7 +106,7 @@ static void l1_svm_code(struct svm_test_data *svm) static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + u32 control; /* check that L1's frequency looks alright before launching L2 */ check_tsc_freq(UCHECK_L1); diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 16fabcf1eabd..2a12c2d42697 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -30,7 +30,7 @@ #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) /* Track which architectural events are supported by hardware. */ -static uint32_t hardware_pmu_arch_events; +static u32 hardware_pmu_arch_events; static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; @@ -153,7 +153,7 @@ static uint8_t guest_get_pmu_version(void) * Sanity check that in all cases, the event doesn't count when it's disabled, * and that KVM correctly emulates the write of an arbitrary value. */ -static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) +static void guest_assert_event_count(uint8_t idx, u32 pmc, u32 pmc_msr) { u64 count; @@ -236,7 +236,7 @@ do { \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ "wrmsr\n\t" \ - :: "a"((uint32_t)_value), "d"(_value >> 32), \ + :: "a"((u32)_value), "d"(_value >> 32), \ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ ); \ } while (0) @@ -255,8 +255,8 @@ do { \ guest_assert_event_count(_idx, _pmc, _pmc_msr); \ } while (0) -static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, - uint32_t ctrl_msr, u64 ctrl_msr_value) +static void __guest_test_arch_event(uint8_t idx, u32 pmc, u32 pmc_msr, + u32 ctrl_msr, u64 ctrl_msr_value) { GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); @@ -266,12 +266,12 @@ static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, static void guest_test_arch_event(uint8_t idx) { - uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - uint32_t pmu_version = guest_get_pmu_version(); + u32 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + u32 pmu_version = guest_get_pmu_version(); /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ bool guest_has_perf_global_ctrl = pmu_version >= 2; struct kvm_x86_pmu_feature gp_event, fixed_event; - uint32_t base_pmc_msr; + u32 base_pmc_msr; unsigned int i; /* The host side shouldn't invoke this without a guest PMU. */ @@ -329,7 +329,7 @@ static void guest_test_arch_events(void) } static void test_arch_events(uint8_t pmu_version, u64 perf_capabilities, - uint8_t length, uint32_t unavailable_mask) + uint8_t length, u32 unavailable_mask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -373,7 +373,7 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ msr, expected, val); -static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, +static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success, u64 expected_val) { uint8_t vector; @@ -393,8 +393,8 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); } -static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, - uint8_t nr_counters, uint32_t or_mask) +static void guest_rd_wr_counters(u32 base_msr, uint8_t nr_possible_counters, + uint8_t nr_counters, u32 or_mask) { const bool pmu_has_fast_mode = !guest_get_pmu_version(); uint8_t i; @@ -405,7 +405,7 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters * width of the counters. */ const u64 test_val = 0xffff; - const uint32_t msr = base_msr + i; + const u32 msr = base_msr + i; /* * Fixed counters are supported if the counter is less than the @@ -421,7 +421,7 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters const u64 expected_val = expect_success ? test_val : 0; const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1; - uint32_t rdpmc_idx; + u32 rdpmc_idx; uint8_t vector; u64 val; @@ -463,7 +463,7 @@ static void guest_test_gp_counters(void) { uint8_t pmu_version = guest_get_pmu_version(); uint8_t nr_gp_counters = 0; - uint32_t base_msr; + u32 base_msr; if (pmu_version) nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); @@ -563,7 +563,7 @@ static void guest_test_fixed_counters(void) static void test_fixed_counters(uint8_t pmu_version, u64 perf_capabilities, uint8_t nr_fixed_counters, - uint32_t supported_bitmask) + u32 supported_bitmask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -588,7 +588,7 @@ static void test_intel_counters(void) uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); unsigned int i; uint8_t v, j; - uint32_t k; + u32 k; const u64 perf_caps[] = { 0, @@ -602,7 +602,7 @@ static void test_intel_counters(void) * as alternating bit sequencues, e.g. to detect if KVM is checking the * wrong bit(s). */ - const uint32_t unavailable_masks[] = { + const u32 unavailable_masks[] = { 0x0, 0xffffffffu, 0xaaaaaaaau, diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index 88857670ab93..5d607b114aeb 100644 --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -75,7 +75,7 @@ static void guest_gp_handler(struct ex_regs *regs) * * Return on success. GUEST_SYNC(0) on error. */ -static void check_msr(uint32_t msr, u64 bits_to_flip) +static void check_msr(u32 msr, u64 bits_to_flip) { u64 v = rdmsr(msr) ^ bits_to_flip; @@ -89,7 +89,7 @@ static void check_msr(uint32_t msr, u64 bits_to_flip) GUEST_SYNC(-EIO); } -static void run_and_measure_loop(uint32_t msr_base) +static void run_and_measure_loop(u32 msr_base) { const u64 branches_retired = rdmsr(msr_base + 0); const u64 insn_retired = rdmsr(msr_base + 1); @@ -378,7 +378,7 @@ static bool use_amd_pmu(void) static bool supports_event_mem_inst_retired(void) { - uint32_t eax, ebx, ecx, edx; + u32 eax, ebx, ecx, edx; cpuid(1, &eax, &ebx, &ecx, &edx); if (x86_family(eax) == 0x6) { @@ -415,7 +415,7 @@ static bool supports_event_mem_inst_retired(void) #define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true) -static void masked_events_guest_test(uint32_t msr_base) +static void masked_events_guest_test(u32 msr_base) { /* * The actual value of the counters don't determine the outcome of @@ -499,7 +499,7 @@ struct masked_events_test { u64 amd_events[MAX_TEST_EVENTS]; u64 amd_event_end; const char *msg; - uint32_t flags; + u32 flags; }; /* @@ -669,7 +669,7 @@ static int set_pmu_event_filter(struct kvm_vcpu *vcpu, } static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, u64 event, - uint32_t flags, uint32_t action) + u32 flags, u32 action) { struct __kvm_pmu_event_filter f = { .nevents = 1, @@ -746,7 +746,7 @@ static void intel_run_fixed_counter_guest_code(uint8_t idx) } static u64 test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, - uint32_t action, uint32_t bitmap) + u32 action, u32 bitmap) { struct __kvm_pmu_event_filter f = { .action = action, @@ -758,8 +758,8 @@ static u64 test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, } static u64 test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, - uint32_t action, - uint32_t bitmap) + u32 action, + u32 bitmap) { struct __kvm_pmu_event_filter f = base_event_filter; @@ -774,7 +774,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, uint8_t nr_fixed_counters) { unsigned int i; - uint32_t bitmap; + u32 bitmap; u64 count; TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 2e3a68837d0e..0bf86d822ee0 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -366,8 +366,8 @@ static void *__test_mem_conversions(void *__vcpu) } } -static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, - uint32_t nr_memslots) +static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_vcpus, + u32 nr_memslots) { /* * Allocate enough memory so that each vCPU's chunk of memory can be @@ -450,8 +450,8 @@ static void usage(const char *cmd) int main(int argc, char *argv[]) { enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; - uint32_t nr_memslots = 1; - uint32_t nr_vcpus = 1; + u32 nr_memslots = 1; + u32 nr_vcpus = 1; int opt; TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c index 925040f394de..10db9fe6d906 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c @@ -27,7 +27,7 @@ static u64 guest_repeatedly_read(void) return value; } -static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) +static u32 run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) { int r; @@ -50,7 +50,7 @@ static void test_private_access_memslot_deleted(void) struct kvm_vcpu *vcpu; pthread_t vm_thread; void *thread_return; - uint32_t exit_reason; + u32 exit_reason; vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, guest_repeatedly_read); @@ -72,7 +72,7 @@ static void test_private_access_memslot_deleted(void) vm_mem_region_delete(vm, EXITS_TEST_SLOT); pthread_join(vm_thread, &thread_return); - exit_reason = (uint32_t)(u64)thread_return; + exit_reason = (u32)(u64)thread_return; TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); @@ -86,7 +86,7 @@ static void test_private_access_memslot_not_private(void) { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint32_t exit_reason; + u32 exit_reason; vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, guest_repeatedly_read); diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c index 49913784bc82..8e3898646c69 100644 --- a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c @@ -86,11 +86,11 @@ static void run_vcpu(struct kvm_vcpu *vcpu) } } -static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, +static struct kvm_vm *create_vm(u32 nr_vcpus, u32 bsp_vcpu_id, struct kvm_vcpu *vcpus[]) { struct kvm_vm *vm; - uint32_t i; + u32 i; vm = vm_create(nr_vcpus); @@ -104,7 +104,7 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, return vm; } -static void run_vm_bsp(uint32_t bsp_vcpu_id) +static void run_vm_bsp(u32 bsp_vcpu_id) { struct kvm_vcpu *vcpus[2]; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index eec093819ff3..8eeba2327c7c 100644 --- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -94,7 +94,7 @@ void test_vm_types(void) "VM type is KVM_X86_SW_PROTECTED_VM"); } -void test_flags(uint32_t vm_type) +void test_flags(u32 vm_type) { int i; @@ -104,7 +104,7 @@ void test_flags(uint32_t vm_type) "invalid flag"); } -void test_features(uint32_t vm_type, u64 supported_features) +void test_features(u32 vm_type, u64 supported_features) { int i; diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index f9be10d9b92d..4e037795dc33 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -13,7 +13,7 @@ #include "linux/psp-sev.h" #include "sev.h" -static void guest_sev_test_msr(uint32_t msr) +static void guest_sev_test_msr(u32 msr) { u64 val = rdmsr(msr); @@ -104,7 +104,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) abort(); } -static void test_sync_vmsa(uint32_t type, u64 policy) +static void test_sync_vmsa(u32 type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -150,7 +150,7 @@ static void test_sync_vmsa(uint32_t type, u64 policy) kvm_vm_free(vm); } -static void test_sev(void *guest_code, uint32_t type, u64 policy) +static void test_sev(void *guest_code, u32 type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -201,7 +201,7 @@ static void guest_shutdown_code(void) __asm__ __volatile__("ud2"); } -static void test_sev_shutdown(uint32_t type, u64 policy) +static void test_sev_shutdown(u32 type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -218,7 +218,7 @@ static void test_sev_shutdown(uint32_t type, u64 policy) kvm_vm_free(vm); } -static void test_sev_smoke(void *guest, uint32_t type, u64 policy) +static void test_sev_smoke(void *guest, u32 type, u64 policy) { const u64 xf_mask = XFEATURE_MASK_X87_AVX; diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 27aae6c92a38..df1ec8209c76 100644 --- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -251,7 +251,7 @@ static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps); } -static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid, +static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, u32 vcpuid, bool enable_cmci_p, void *guest_code) { struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code); diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index b673c3450886..98b8d285dbb7 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -142,9 +142,9 @@ struct kvm_msr_filter no_filter_deny = { * Note: Force test_rdmsr() to not be inlined to prevent the labels, * rdmsr_start and rdmsr_end, from being defined multiple times. */ -static noinline u64 test_rdmsr(uint32_t msr) +static noinline u64 test_rdmsr(u32 msr) { - uint32_t a, d; + u32 a, d; guest_exception_count = 0; @@ -158,10 +158,10 @@ static noinline u64 test_rdmsr(uint32_t msr) * Note: Force test_wrmsr() to not be inlined to prevent the labels, * wrmsr_start and wrmsr_end, from being defined multiple times. */ -static noinline void test_wrmsr(uint32_t msr, u64 value) +static noinline void test_wrmsr(u32 msr, u64 value) { - uint32_t a = value; - uint32_t d = value >> 32; + u32 a = value; + u32 d = value >> 32; guest_exception_count = 0; @@ -176,9 +176,9 @@ extern char wrmsr_start, wrmsr_end; * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, * rdmsr_start and rdmsr_end, from being defined multiple times. */ -static noinline u64 test_em_rdmsr(uint32_t msr) +static noinline u64 test_em_rdmsr(u32 msr) { - uint32_t a, d; + u32 a, d; guest_exception_count = 0; @@ -192,10 +192,10 @@ static noinline u64 test_em_rdmsr(uint32_t msr) * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, * wrmsr_start and wrmsr_end, from being defined multiple times. */ -static noinline void test_em_wrmsr(uint32_t msr, u64 value) +static noinline void test_em_wrmsr(u32 msr, u64 value) { - uint32_t a = value; - uint32_t d = value >> 32; + u32 a = value; + u32 d = value >> 32; guest_exception_count = 0; @@ -391,7 +391,7 @@ static void check_for_guest_assert(struct kvm_vcpu *vcpu) } } -static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) +static void process_rdmsr(struct kvm_vcpu *vcpu, u32 msr_index) { struct kvm_run *run = vcpu->run; @@ -423,7 +423,7 @@ static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) } } -static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) +static void process_wrmsr(struct kvm_vcpu *vcpu, u32 msr_index) { struct kvm_run *run = vcpu->run; @@ -489,14 +489,14 @@ static u64 process_ucall(struct kvm_vcpu *vcpu) } static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu, - uint32_t msr_index) + u32 msr_index) { vcpu_run(vcpu); process_rdmsr(vcpu, msr_index); } static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu, - uint32_t msr_index) + u32 msr_index) { vcpu_run(vcpu); process_wrmsr(vcpu, msr_index); diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index dc5c3d1db346..1720113eae79 100644 --- a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c @@ -38,7 +38,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + u32 control; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_ASSERT(load_vmcs(vmx_pages)); diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c index 7f84cc92feaf..80a4fd1e5bbb 100644 --- a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c @@ -33,7 +33,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + u32 control; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_ASSERT(load_vmcs(vmx_pages)); diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index d61c8c69ade3..c1e8632a1bb6 100644 --- a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c @@ -12,8 +12,7 @@ #include "kvm_util.h" #include "vmx.h" -static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - u64 mask) +static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask) { u64 val = vcpu_get_msr(vcpu, msr_index); u64 bit; @@ -26,8 +25,7 @@ static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, } } -static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - u64 mask) +static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask) { u64 val = vcpu_get_msr(vcpu, msr_index); u64 bit; @@ -40,7 +38,7 @@ static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, } } -static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index) +static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index) { vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0)); vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32)); diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index 97e07b7bc3dd..3df6df2a1b55 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -52,16 +52,16 @@ static volatile u64 ipis_rcvd; /* Data struct shared between host main thread and vCPUs */ struct test_data_page { - uint32_t halter_apic_id; + u32 halter_apic_id; volatile u64 hlt_count; volatile u64 wake_count; u64 ipis_sent; u64 migrations_attempted; u64 migrations_completed; - uint32_t icr; - uint32_t icr2; - uint32_t halter_tpr; - uint32_t halter_ppr; + u32 icr; + u32 icr2; + u32 halter_tpr; + u32 halter_ppr; /* * Record local version register as a cross-check that APIC access @@ -69,7 +69,7 @@ struct test_data_page { * arch/x86/kvm/lapic.c). If test is failing, check that values match * to determine whether APIC access exits are working. */ - uint32_t halter_lvr; + u32 halter_lvr; }; struct thread_params { @@ -128,8 +128,8 @@ static void sender_guest_code(struct test_data_page *data) u64 last_wake_count; u64 last_hlt_count; u64 last_ipis_rcvd_count; - uint32_t icr_val; - uint32_t icr2_val; + u32 icr_val; + u32 icr2_val; u64 tsc_start; verify_apic_base_addr(); diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index e71471ac5bd5..637bb90c1d93 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -144,7 +144,7 @@ static void test_icr(struct xapic_vcpu *x) static void __test_apic_id(struct kvm_vcpu *vcpu, u64 apic_base) { - uint32_t apic_id, expected; + u32 apic_id, expected; struct kvm_lapic_state xapic; vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base); @@ -170,7 +170,7 @@ static void __test_apic_id(struct kvm_vcpu *vcpu, u64 apic_base) */ static void test_apic_id(void) { - const uint32_t NR_VCPUS = 3; + const u32 NR_VCPUS = 3; struct kvm_vcpu *vcpus[NR_VCPUS]; u64 apic_base; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c index 14052e94d553..af1fe833ad4f 100644 --- a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c @@ -58,7 +58,7 @@ static void tpr_guest_irq_queue(void) if (is_x2apic) { x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR); } else { - uint32_t icr, icr2; + u32 icr, icr2; icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | IRQ_VECTOR; @@ -71,7 +71,7 @@ static void tpr_guest_irq_queue(void) static uint8_t tpr_guest_tpr_get(void) { - uint32_t taskpri; + u32 taskpri; if (is_x2apic) taskpri = x2apic_read_reg(APIC_TASKPRI); @@ -83,7 +83,7 @@ static uint8_t tpr_guest_tpr_get(void) static uint8_t tpr_guest_ppr_get(void) { - uint32_t procpri; + u32 procpri; if (is_x2apic) procpri = x2apic_read_reg(APIC_PROCPRI); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 83aab75ac792..c6d00205b59d 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -116,13 +116,13 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); struct vcpu_runstate_info { - uint32_t state; + u32 state; u64 state_entry_time; u64 time[5]; /* Extra field for overrun check */ }; struct compat_vcpu_runstate_info { - uint32_t state; + u32 state; u64 state_entry_time; u64 time[5]; } __attribute__((__packed__)); @@ -145,7 +145,7 @@ struct shared_info { unsigned long evtchn_pending[64]; unsigned long evtchn_mask[64]; struct pvclock_wall_clock wc; - uint32_t wc_sec_hi; + u32 wc_sec_hi; /* arch_shared_info here */ }; -- cgit v1.2.3 From 7b609187684db646d4854ada6f7e19a6420b4621 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:52 -0700 Subject: KVM: selftests: Use s32 instead of int32_t Use s32 instead of int32_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/int32_t/s32/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack Link: https://patch.msgid.link/20260420212004.3938325-8-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/arm64/arch_timer_edge_cases.c | 24 +++++++++++----------- .../selftests/kvm/include/arm64/arch_timer.h | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index f8b183f13864..f7625eb711d6 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -25,8 +25,8 @@ /* Depends on counter width. */ static u64 CVAL_MAX; /* tval is a signed 32-bit int. */ -static const int32_t TVAL_MAX = INT32_MAX; -static const int32_t TVAL_MIN = INT32_MIN; +static const s32 TVAL_MAX = INT32_MAX; +static const s32 TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ static const u32 TIMEOUT_NO_IRQ_US = 50000; @@ -355,7 +355,7 @@ static void test_timer_cval(enum arch_timer timer, u64 cval, test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); } -static void test_timer_tval(enum arch_timer timer, int32_t tval, +static void test_timer_tval(enum arch_timer timer, s32 tval, irq_wait_method_t wm, bool reset_state, u64 reset_cnt) { @@ -385,10 +385,10 @@ static void test_cval_no_irq(enum arch_timer timer, u64 cval, test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); } -static void test_tval_no_irq(enum arch_timer timer, int32_t tval, u64 usec, +static void test_tval_no_irq(enum arch_timer timer, s32 tval, u64 usec, sleep_method_t wm) { - /* tval will be cast to an int32_t in test_xval_check_no_irq */ + /* tval will be cast to an s32 in test_xval_check_no_irq */ test_xval_check_no_irq(timer, (u64)tval, usec, TIMER_TVAL, wm); } @@ -463,7 +463,7 @@ static void test_timers_fired_multiple_times(enum arch_timer timer) * timeout for the wait: we use the wfi instruction. */ static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, - int32_t delta_1_ms, int32_t delta_2_ms) + s32 delta_1_ms, s32 delta_2_ms) { local_irq_disable(); reset_timer_state(timer, DEF_CNT); @@ -504,7 +504,7 @@ static void test_reprogram_timers(enum arch_timer timer) static void test_basic_functionality(enum arch_timer timer) { - int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + s32 tval = (s32)msec_to_cycles(test_args.wait_ms); u64 cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); int i; @@ -685,7 +685,7 @@ static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, } static void test_set_cnt_after_tval(enum arch_timer timer, u64 cnt_1, - int32_t tval, u64 cnt_2, + s32 tval, u64 cnt_2, irq_wait_method_t wm) { test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); @@ -699,7 +699,7 @@ static void test_set_cnt_after_cval(enum arch_timer timer, u64 cnt_1, } static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, - u64 cnt_1, int32_t tval, + u64 cnt_1, s32 tval, u64 cnt_2, sleep_method_t wm) { test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, @@ -718,7 +718,7 @@ static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, static void test_move_counters_ahead_of_timers(enum arch_timer timer) { int i; - int32_t tval; + s32 tval; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { irq_wait_method_t wm = irq_wait_method[i]; @@ -753,7 +753,7 @@ static void test_move_counters_behind_timers(enum arch_timer timer) static void test_timers_in_the_past(enum arch_timer timer) { - int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + s32 tval = -1 * (s32)msec_to_cycles(test_args.wait_ms); u64 cval; int i; @@ -789,7 +789,7 @@ static void test_timers_in_the_past(enum arch_timer timer) static void test_long_timer_delays(enum arch_timer timer) { - int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + s32 tval = (s32)msec_to_cycles(test_args.long_wait_ms); u64 cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); int i; diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index 4fe0e0d07584..a5836d4ab7ee 100644 --- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -79,7 +79,7 @@ static inline u64 timer_get_cval(enum arch_timer timer) return 0; } -static inline void timer_set_tval(enum arch_timer timer, int32_t tval) +static inline void timer_set_tval(enum arch_timer timer, s32 tval) { switch (timer) { case VIRTUAL: @@ -95,7 +95,7 @@ static inline void timer_set_tval(enum arch_timer timer, int32_t tval) isb(); } -static inline int32_t timer_get_tval(enum arch_timer timer) +static inline s32 timer_get_tval(enum arch_timer timer) { isb(); switch (timer) { -- cgit v1.2.3 From 19d0914920042139097f74159d812a1584bdc5a4 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:53 -0700 Subject: KVM: selftests: Use u16 instead of uint16_t Use u16 instead of uint16_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/uint16_t/u16/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack Link: https://patch.msgid.link/20260420212004.3938325-9-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/arm64/page_fault_test.c | 2 +- tools/testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/include/x86/evmcs.h | 2 +- .../testing/selftests/kvm/include/x86/processor.h | 58 +++++++++++----------- tools/testing/selftests/kvm/lib/guest_sprintf.c | 2 +- tools/testing/selftests/kvm/lib/x86/processor.c | 8 +-- tools/testing/selftests/kvm/lib/x86/ucall.c | 2 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 2 +- tools/testing/selftests/kvm/s390/memop.c | 2 +- tools/testing/selftests/kvm/x86/fastops_test.c | 2 +- tools/testing/selftests/kvm/x86/sync_regs_test.c | 2 +- 11 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index cb52ac8aa0a5..b92a9614d7d2 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -148,7 +148,7 @@ static void guest_at(void) */ static void guest_dc_zva(void) { - uint16_t val; + u16 val; asm volatile("dc zva, %0" :: "r" (guest_test_memory)); dsb(ish); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bdb91f627433..34c8a7d94997 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -216,7 +216,7 @@ struct vm_shape { u32 type; uint8_t mode; uint8_t pad0; - uint16_t pad1; + u16 pad1; }; kvm_static_assert(sizeof(struct vm_shape) == sizeof(u64)); diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 3b0f96b881f9..be79bda024bf 100644 --- a/tools/testing/selftests/kvm/include/x86/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -10,7 +10,7 @@ #include "hyperv.h" #include "vmx.h" -#define u16 uint16_t +#define u16 u16 #define u32 u32 #define u64 u64 diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 3898665ad2e9..8700d37a5727 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -399,8 +399,8 @@ struct gpr64_regs { }; struct desc64 { - uint16_t limit0; - uint16_t base0; + u16 limit0; + u16 base0; unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; u32 base3; @@ -408,7 +408,7 @@ struct desc64 { } __attribute__((packed)); struct desc_ptr { - uint16_t size; + u16 size; u64 address; } __attribute__((packed)); @@ -476,9 +476,9 @@ static inline void wrmsr(u32 msr, u64 value) } -static inline uint16_t inw(uint16_t port) +static inline u16 inw(u16 port) { - uint16_t tmp; + u16 tmp; __asm__ __volatile__("in %%dx, %%ax" : /* output */ "=a" (tmp) @@ -487,63 +487,63 @@ static inline uint16_t inw(uint16_t port) return tmp; } -static inline uint16_t get_es(void) +static inline u16 get_es(void) { - uint16_t es; + u16 es; __asm__ __volatile__("mov %%es, %[es]" : /* output */ [es]"=rm"(es)); return es; } -static inline uint16_t get_cs(void) +static inline u16 get_cs(void) { - uint16_t cs; + u16 cs; __asm__ __volatile__("mov %%cs, %[cs]" : /* output */ [cs]"=rm"(cs)); return cs; } -static inline uint16_t get_ss(void) +static inline u16 get_ss(void) { - uint16_t ss; + u16 ss; __asm__ __volatile__("mov %%ss, %[ss]" : /* output */ [ss]"=rm"(ss)); return ss; } -static inline uint16_t get_ds(void) +static inline u16 get_ds(void) { - uint16_t ds; + u16 ds; __asm__ __volatile__("mov %%ds, %[ds]" : /* output */ [ds]"=rm"(ds)); return ds; } -static inline uint16_t get_fs(void) +static inline u16 get_fs(void) { - uint16_t fs; + u16 fs; __asm__ __volatile__("mov %%fs, %[fs]" : /* output */ [fs]"=rm"(fs)); return fs; } -static inline uint16_t get_gs(void) +static inline u16 get_gs(void) { - uint16_t gs; + u16 gs; __asm__ __volatile__("mov %%gs, %[gs]" : /* output */ [gs]"=rm"(gs)); return gs; } -static inline uint16_t get_tr(void) +static inline u16 get_tr(void) { - uint16_t tr; + u16 tr; __asm__ __volatile__("str %[tr]" : /* output */ [tr]"=rm"(tr)); @@ -651,7 +651,7 @@ static inline struct desc_ptr get_idt(void) return idt; } -static inline void outl(uint16_t port, u32 value) +static inline void outl(u16 port, u32 value) { __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); } @@ -1194,15 +1194,15 @@ struct ex_regs { }; struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; + u16 offset0; + u16 selector; + u16 ist : 3; + u16 : 5; + u16 type : 4; + u16 : 1; + u16 dpl : 2; + u16 p : 1; + u16 offset1; u32 offset2; u32 reserved; }; diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index 551ad6c658aa..8d60aa81e27e 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -286,7 +286,7 @@ repeat: if (qualifier == 'l') num = va_arg(args, u64); else if (qualifier == 'h') { - num = (uint16_t)va_arg(args, int); + num = (u16)va_arg(args, int); if (flags & SIGN) num = (int16_t)num; } else if (flags & SIGN) diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index dc31236b004b..8e6393384fa4 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -424,7 +424,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) "addr w exec dirty\n", indent, ""); pml4e_start = (u64 *)addr_gpa2hva(vm, mmu->pgd); - for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + for (u16 n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; if (!is_present_pte(mmu, pml4e)) continue; @@ -436,7 +436,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e)); pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); - for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + for (u16 n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; if (!is_present_pte(mmu, pdpe)) continue; @@ -449,7 +449,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) is_nx_pte(mmu, pdpe)); pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); - for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + for (u16 n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; if (!is_present_pte(mmu, pde)) continue; @@ -461,7 +461,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) is_nx_pte(mmu, pde)); pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); - for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + for (u16 n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; if (!is_present_pte(mmu, pte)) continue; diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1af2a6880cdf..e7dd5791959b 100644 --- a/tools/testing/selftests/kvm/lib/x86/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c @@ -6,7 +6,7 @@ */ #include "kvm_util.h" -#define UCALL_PIO_PORT ((uint16_t)0x1000) +#define UCALL_PIO_PORT ((u16)0x1000) void ucall_arch_do_ucall(gva_t uc) { diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 73b7faa7f357..b2f83c3f7f16 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -27,7 +27,7 @@ struct hv_vp_assist_page *current_vp_assist; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { - uint16_t evmcs_ver; + u16 evmcs_ver; vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, (unsigned long)&evmcs_ver); diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 1cd7b8f81fff..aa92fdf0664d 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -485,7 +485,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) case 1: return (uint8_t)val; case 2: - return (uint16_t)val; + return (u16)val; case 4: return (u32)val; case 8: diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c index a634bc281546..721f56d38f49 100644 --- a/tools/testing/selftests/kvm/x86/fastops_test.c +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -186,7 +186,7 @@ if (sizeof(type_t) != 1) { \ static void guest_code(void) { guest_test_fastops(uint8_t, "b"); - guest_test_fastops(uint16_t, "w"); + guest_test_fastops(u16, "w"); guest_test_fastops(u32, "l"); guest_test_fastops(u64, "q"); diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c index 8fa3948b0170..e0c52321f87c 100644 --- a/tools/testing/selftests/kvm/x86/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c @@ -20,7 +20,7 @@ #include "kvm_util.h" #include "processor.h" -#define UCALL_PIO_PORT ((uint16_t)0x1000) +#define UCALL_PIO_PORT ((u16)0x1000) struct ucall uc_none = { .cmd = UCALL_NONE, -- cgit v1.2.3 From 2540ebd60349b7c0194abdd6f13c1ab6db3b9909 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:54 -0700 Subject: KVM: selftests: Use s16 instead of int16_t Use s16 instead of int16_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/int16_t/s16/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack Link: https://patch.msgid.link/20260420212004.3938325-10-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/guest_sprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index 8d60aa81e27e..2a3ab9c168f0 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -288,7 +288,7 @@ repeat: else if (qualifier == 'h') { num = (u16)va_arg(args, int); if (flags & SIGN) - num = (int16_t)num; + num = (s16)num; } else if (flags & SIGN) num = va_arg(args, int); else -- cgit v1.2.3 From 6ec982b5a2c7c9f0f956fd955416ac11f52bf50a Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 20 Apr 2026 14:19:55 -0700 Subject: KVM: selftests: Use u8 instead of uint8_t Use u8 instead of uint8_t to make the KVM selftests code more concise and more similar to the kernel (since selftests are primarily developed by kernel developers). This commit was generated with the following command: git ls-files tools/testing/selftests/kvm | xargs sed -i 's/uint8_t/u8/g' Then by manually adjusting whitespace to make checkpatch.pl happy. No functional change intended. Signed-off-by: David Matlack Link: https://patch.msgid.link/20260420212004.3938325-11-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/arm64/debug-exceptions.c | 18 +++---- tools/testing/selftests/kvm/arm64/set_id_regs.c | 6 +-- .../selftests/kvm/arm64/vpmu_counter_access.c | 2 +- tools/testing/selftests/kvm/coalesced_io_test.c | 2 +- tools/testing/selftests/kvm/get-reg-list.c | 2 +- tools/testing/selftests/kvm/guest_memfd_test.c | 4 +- tools/testing/selftests/kvm/include/kvm_util.h | 14 ++--- tools/testing/selftests/kvm/include/test_util.h | 2 +- tools/testing/selftests/kvm/include/x86/apic.h | 6 +-- tools/testing/selftests/kvm/include/x86/hyperv.h | 10 ++-- .../testing/selftests/kvm/include/x86/processor.h | 27 +++++----- tools/testing/selftests/kvm/include/x86/sev.h | 6 +-- tools/testing/selftests/kvm/include/x86/vmx.h | 12 ++--- tools/testing/selftests/kvm/lib/arm64/processor.c | 8 +-- tools/testing/selftests/kvm/lib/guest_sprintf.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- .../selftests/kvm/lib/loongarch/processor.c | 6 +-- tools/testing/selftests/kvm/lib/riscv/processor.c | 6 +-- tools/testing/selftests/kvm/lib/s390/processor.c | 8 +-- tools/testing/selftests/kvm/lib/sparsebit.c | 2 +- tools/testing/selftests/kvm/lib/x86/processor.c | 16 +++--- tools/testing/selftests/kvm/lib/x86/sev.c | 6 +-- tools/testing/selftests/kvm/memslot_perf_test.c | 2 +- tools/testing/selftests/kvm/mmu_stress_test.c | 2 +- tools/testing/selftests/kvm/s390/memop.c | 28 +++++----- tools/testing/selftests/kvm/s390/resets.c | 2 +- .../selftests/kvm/s390/shared_zeropage_test.c | 2 +- tools/testing/selftests/kvm/s390/tprot.c | 12 ++--- .../testing/selftests/kvm/set_memory_region_test.c | 2 +- tools/testing/selftests/kvm/steal_time.c | 4 +- tools/testing/selftests/kvm/x86/aperfmperf_test.c | 2 +- .../selftests/kvm/x86/evmcs_smm_controls_test.c | 2 +- tools/testing/selftests/kvm/x86/fastops_test.c | 8 +-- .../testing/selftests/kvm/x86/fix_hypercall_test.c | 12 ++--- tools/testing/selftests/kvm/x86/flds_emulation.h | 2 +- tools/testing/selftests/kvm/x86/hyperv_features.c | 4 +- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 2 +- .../selftests/kvm/x86/nested_emulation_test.c | 8 +-- .../testing/selftests/kvm/x86/platform_info_test.c | 2 +- .../testing/selftests/kvm/x86/pmu_counters_test.c | 61 +++++++++++----------- .../selftests/kvm/x86/pmu_event_filter_test.c | 12 ++--- .../kvm/x86/private_mem_conversions_test.c | 24 ++++----- tools/testing/selftests/kvm/x86/smm_test.c | 2 +- tools/testing/selftests/kvm/x86/state_test.c | 4 +- .../testing/selftests/kvm/x86/userspace_io_test.c | 4 +- .../selftests/kvm/x86/userspace_msr_exit_test.c | 14 ++--- .../testing/selftests/kvm/x86/vmx_pmu_caps_test.c | 2 +- tools/testing/selftests/kvm/x86/xapic_tpr_test.c | 16 +++--- tools/testing/selftests/kvm/x86/xen_shinfo_test.c | 4 +- 49 files changed, 201 insertions(+), 205 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index 5931915ea00a..3eb4b1b6682d 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -102,7 +102,7 @@ GEN_DEBUG_WRITE_REG(dbgwvr) static void reset_debug_state(void) { - uint8_t brps, wrps, i; + u8 brps, wrps, i; u64 dfr0; asm volatile("msr daifset, #8"); @@ -149,7 +149,7 @@ static void enable_monitor_debug_exceptions(void) isb(); } -static void install_wp(uint8_t wpn, u64 addr) +static void install_wp(u8 wpn, u64 addr) { u32 wcr; @@ -162,7 +162,7 @@ static void install_wp(uint8_t wpn, u64 addr) enable_monitor_debug_exceptions(); } -static void install_hw_bp(uint8_t bpn, u64 addr) +static void install_hw_bp(u8 bpn, u64 addr) { u32 bcr; @@ -174,8 +174,7 @@ static void install_hw_bp(uint8_t bpn, u64 addr) enable_monitor_debug_exceptions(); } -static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, u64 addr, - u64 ctx) +static void install_wp_ctx(u8 addr_wp, u8 ctx_bp, u64 addr, u64 ctx) { u32 wcr; u64 ctx_bcr; @@ -196,8 +195,7 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, u64 addr, enable_monitor_debug_exceptions(); } -void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, u64 addr, - u64 ctx) +void install_hw_bp_ctx(u8 addr_bp, u8 ctx_bp, u64 addr, u64 ctx) { u32 addr_bcr, ctx_bcr; @@ -234,7 +232,7 @@ static void install_ss(void) static volatile char write_data; -static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) +static void guest_code(u8 bpn, u8 wpn, u8 ctx_bpn) { u64 ctx = 0xabcdef; /* a random context number */ @@ -421,7 +419,7 @@ static int debug_version(u64 id_aa64dfr0) return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } -static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) +static void test_guest_debug_exceptions(u8 bpn, u8 wpn, u8 ctx_bpn) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -535,7 +533,7 @@ void test_single_step_from_userspace(int test_cnt) */ void test_guest_debug_exceptions_all(u64 aa64dfr0) { - uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; + u8 brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; int b, w, c; /* Number of breakpoints */ diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8bf9c717b698..8bb53dd4f321 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -30,7 +30,7 @@ struct reg_ftr_bits { char *name; bool sign; enum ftr_type type; - uint8_t shift; + u8 shift; u64 mask; /* * For FTR_EXACT, safe_val is used as the exact safe value. @@ -384,7 +384,7 @@ u64 get_invalid_value(const struct reg_ftr_bits *ftr_bits, u64 ftr) static u64 test_reg_set_success(struct kvm_vcpu *vcpu, u64 reg, const struct reg_ftr_bits *ftr_bits) { - uint8_t shift = ftr_bits->shift; + u8 shift = ftr_bits->shift; u64 mask = ftr_bits->mask; u64 val, new_val, ftr; @@ -407,7 +407,7 @@ static u64 test_reg_set_success(struct kvm_vcpu *vcpu, u64 reg, static void test_reg_set_fail(struct kvm_vcpu *vcpu, u64 reg, const struct reg_ftr_bits *ftr_bits) { - uint8_t shift = ftr_bits->shift; + u8 shift = ftr_bits->shift; u64 mask = ftr_bits->mask; u64 val, old_val, ftr; int r; diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index 4ceab0760447..22223395969e 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -402,7 +402,7 @@ static void guest_code(u64 expected_pmcr_n) static void create_vpmu_vm(void *guest_code) { struct kvm_vcpu_init init; - uint8_t pmuver, ec; + u8 pmuver, ec; u64 dfr0, irq = 23; struct kvm_device_attr irq_attr = { .group = KVM_ARM_VCPU_PMU_V3_CTRL, diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c index f5ab412d2042..df4ed5e3877c 100644 --- a/tools/testing/selftests/kvm/coalesced_io_test.c +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -23,7 +23,7 @@ struct kvm_coalesced_io { * amount of #ifdeffery and complexity, without having to sacrifice * verbose error messages. */ - uint8_t pio_port; + u8 pio_port; }; static struct kvm_coalesced_io kvm_builtin_io_ring; diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index f4644c9d2d3b..216f10644c1a 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -216,7 +216,7 @@ static void run_test(struct vcpu_reg_list *c) * since we don't know the capabilities of any new registers. */ for_each_present_blessed_reg(i) { - uint8_t addr[2048 / 8]; + u8 addr[2048 / 8]; struct kvm_one_reg reg = { .id = reg_list->reg[i], .addr = (__u64)&addr, diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ad17ea62555f..9cbd3ad7f44a 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -470,7 +470,7 @@ static void test_guest_memfd(unsigned long vm_type) kvm_vm_free(vm); } -static void guest_code(uint8_t *mem, u64 size) +static void guest_code(u8 *mem, u64 size) { size_t i; @@ -494,7 +494,7 @@ static void test_guest_memfd_guest(void) struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint8_t *mem; + u8 *mem; size_t size; int fd, i; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 34c8a7d94997..676e3ccb1462 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -214,8 +214,8 @@ enum vm_guest_mode { struct vm_shape { u32 type; - uint8_t mode; - uint8_t pad0; + u8 mode; + u8 pad0; u16 pad1; }; @@ -475,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent); static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) { @@ -1155,10 +1155,10 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent); + u8 indent); static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent) + u8 indent) { vcpu_arch_dump(stream, vcpu, indent); } @@ -1263,9 +1263,9 @@ static inline gpa_t addr_gva2gpa(struct kvm_vm *vm, gva_t gva) * Dumps to the FILE stream given by @stream, the contents of all the * virtual translation tables for the VM given by @vm. */ -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent); -static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +static inline void virt_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { virt_arch_dump(stream, vm, indent); } diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index fb24347c6e6c..d9b433b834f1 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -119,7 +119,7 @@ struct guest_random_state new_guest_random_state(u32 seed); u32 guest_random_u32(struct guest_random_state *state); static inline bool __guest_random_bool(struct guest_random_state *state, - uint8_t percent) + u8 percent) { return (guest_random_u32(state) % 100) < percent; } diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 74eaa3bd335d..31887bdc3d6c 100644 --- a/tools/testing/selftests/kvm/include/x86/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -99,14 +99,14 @@ static inline u64 x2apic_read_reg(unsigned int reg) return rdmsr(APIC_BASE_MSR + (reg >> 4)); } -static inline uint8_t x2apic_write_reg_safe(unsigned int reg, u64 value) +static inline u8 x2apic_write_reg_safe(unsigned int reg, u64 value) { return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); } static inline void x2apic_write_reg(unsigned int reg, u64 value) { - uint8_t fault = x2apic_write_reg_safe(reg, value); + u8 fault = x2apic_write_reg_safe(reg, value); __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", fault, APIC_BASE_MSR + (reg >> 4), value); @@ -114,7 +114,7 @@ static inline void x2apic_write_reg(unsigned int reg, u64 value) static inline void x2apic_write_reg_fault(unsigned int reg, u64 value) { - uint8_t fault = x2apic_write_reg_safe(reg, value); + u8 fault = x2apic_write_reg_safe(reg, value); __GUEST_ASSERT(fault == GP_VECTOR, "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index 2add2123e37b..78003f5a22f3 100644 --- a/tools/testing/selftests/kvm/include/x86/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -254,12 +254,12 @@ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' * is set to the hypercall status (if no exception occurred). */ -static inline uint8_t __hyperv_hypercall(u64 control, gva_t input_address, - gva_t output_address, - u64 *hv_status) +static inline u8 __hyperv_hypercall(u64 control, gva_t input_address, + gva_t output_address, + u64 *hv_status) { u64 error_code; - uint8_t vector; + u8 vector; /* Note both the hypercall and the "asm safe" clobber r9-r11. */ asm volatile("mov %[output_address], %%r8\n\t" @@ -278,7 +278,7 @@ static inline void hyperv_hypercall(u64 control, gva_t input_address, gva_t output_address) { u64 hv_status; - uint8_t vector; + u8 vector; vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 8700d37a5727..4efa6c942192 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -724,8 +724,7 @@ static inline bool this_cpu_is_hygon(void) return this_cpu_vendor_string_is("HygonGenuine"); } -static inline u32 __this_cpu_has(u32 function, u32 index, - uint8_t reg, uint8_t lo, uint8_t hi) +static inline u32 __this_cpu_has(u32 function, u32 index, u8 reg, u8 lo, u8 hi) { u32 gprs[4]; @@ -1105,7 +1104,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, u32 value); -void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, u8 maxphyaddr); void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function); @@ -1262,8 +1261,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe(insn, inputs...) \ ({ \ - u64 ign_error_code; \ - uint8_t vector; \ + u64 ign_error_code; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ @@ -1274,7 +1273,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_ec(insn, error_code, inputs...) \ ({ \ - uint8_t vector; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ @@ -1285,8 +1284,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_fep(insn, inputs...) \ ({ \ - u64 ign_error_code; \ - uint8_t vector; \ + u64 ign_error_code; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ @@ -1297,7 +1296,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \ ({ \ - uint8_t vector; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ @@ -1307,10 +1306,10 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, }) #define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ -static inline uint8_t insn##_safe ##_fep(u32 idx, u64 *val) \ +static inline u8 insn##_safe ##_fep(u32 idx, u64 *val) \ { \ - u64 error_code; \ - uint8_t vector; \ + u64 error_code; \ + u8 vector; \ u32 a, d; \ \ asm volatile(KVM_ASM_SAFE##_FEP(#insn) \ @@ -1335,12 +1334,12 @@ BUILD_READ_U64_SAFE_HELPERS(rdmsr) BUILD_READ_U64_SAFE_HELPERS(rdpmc) BUILD_READ_U64_SAFE_HELPERS(xgetbv) -static inline uint8_t wrmsr_safe(u32 msr, u64 val) +static inline u8 wrmsr_safe(u32 msr, u64 val) { return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); } -static inline uint8_t xsetbv_safe(u32 index, u64 value) +static inline u8 xsetbv_safe(u32 index, u64 value) { u32 eax = value; u32 edx = value >> 32; diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 4f91c1179416..1af44c151d60 100644 --- a/tools/testing/selftests/kvm/include/x86/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -47,7 +47,7 @@ static inline bool is_sev_vm(struct kvm_vm *vm) } void sev_vm_launch(struct kvm_vm *vm, u32 policy); -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); +void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); void snp_vm_launch_start(struct kvm_vm *vm, u64 policy); void snp_vm_launch_update(struct kvm_vm *vm); @@ -55,7 +55,7 @@ void snp_vm_launch_finish(struct kvm_vm *vm); struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, struct kvm_vcpu **cpu); -void vm_sev_launch(struct kvm_vm *vm, u64 policy, uint8_t *measurement); +void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); @@ -132,7 +132,7 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, gpa_t gpa, } static inline void snp_launch_update_data(struct kvm_vm *vm, gpa_t gpa, - u64 hva, u64 size, uint8_t type) + u64 hva, u64 size, u8 type) { struct kvm_sev_snp_launch_update update_data = { .uaddr = hva, diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 6cd6bb7efbc2..90fffaf91595 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -294,7 +294,7 @@ struct vmx_msr_entry { static inline int vmxon(u64 phys) { - uint8_t ret; + u8 ret; __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" : [ret]"=rm"(ret) @@ -311,7 +311,7 @@ static inline void vmxoff(void) static inline int vmclear(u64 vmcs_pa) { - uint8_t ret; + u8 ret; __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" : [ret]"=rm"(ret) @@ -323,7 +323,7 @@ static inline int vmclear(u64 vmcs_pa) static inline int vmptrld(u64 vmcs_pa) { - uint8_t ret; + u8 ret; if (enable_evmcs) return -1; @@ -339,7 +339,7 @@ static inline int vmptrld(u64 vmcs_pa) static inline int vmptrst(u64 *value) { u64 tmp; - uint8_t ret; + u8 ret; if (enable_evmcs) return evmcs_vmptrst(value); @@ -450,7 +450,7 @@ static inline void vmcall(void) static inline int vmread(u64 encoding, u64 *value) { u64 tmp; - uint8_t ret; + u8 ret; if (enable_evmcs) return evmcs_vmread(encoding, value); @@ -477,7 +477,7 @@ static inline u64 vmreadz(u64 encoding) static inline int vmwrite(u64 encoding, u64 value) { - uint8_t ret; + u8 ret; if (enable_evmcs) return evmcs_vmwrite(encoding, value); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index f96513262c5b..7ba3a48911e3 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -124,7 +124,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, u64 flags) { - uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + u8 attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); u64 pg_attr; u64 *ptep; @@ -237,7 +237,7 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, u64 page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) { #ifdef DEBUG static const char * const type[] = { "", "pud", "pmd", "pte" }; @@ -256,7 +256,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, u64 page, #endif } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { int level = 4 - (vm->mmu.pgtable_levels - 1); u64 pgd, *ptep; @@ -397,7 +397,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { u64 pstate, pc; diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index 2a3ab9c168f0..7a33965349a7 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -216,7 +216,7 @@ repeat: while (--field_width > 0) APPEND_BUFFER_SAFE(str, end, ' '); APPEND_BUFFER_SAFE(str, end, - (uint8_t)va_arg(args, int)); + (u8)va_arg(args, int)); while (--field_width > 0) APPEND_BUFFER_SAFE(str, end, ' '); continue; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9f80e2e03001..050ae9c92681 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1951,7 +1951,7 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) * Dumps the current state of the VM given by vm, to the FILE stream * given by stream. */ -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { int ctr; struct userspace_mem_region *region; diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 38ee62c6cbfb..2982196db3b2 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -140,7 +140,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) WRITE_ONCE(*ptep, paddr | prot_bits); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, u64 page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) { u64 pte, *ptep; static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; @@ -158,7 +158,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, u64 page, } } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { int level; @@ -169,7 +169,7 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pte_dump(stream, vm, indent, vm->mmu.pgd, level); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d7646eebcfab..7336d5a20419 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -148,7 +148,7 @@ unmapped_gva: exit(1); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) { #ifdef DEBUG @@ -170,7 +170,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, #endif } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { struct kvm_mmu *mmu = &vm->mmu; int level = mmu->pgtable_levels - 1; @@ -233,7 +233,7 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { struct kvm_riscv_core core; diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 7591c5167927..d35f23a4db12 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -111,7 +111,7 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) return (entry[idx] & ~0xffful) + (gva & 0xffful); } -static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, +static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, u8 indent, u64 ptea_start) { u64 *pte, ptea; @@ -125,7 +125,7 @@ static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, +static void virt_dump_region(FILE *stream, struct kvm_vm *vm, u8 indent, u64 reg_tab_addr) { u64 addr, *entry; @@ -147,7 +147,7 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { if (!vm->mmu.pgd_created) return; @@ -212,7 +212,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) va_end(ap); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 7e7734088f2f..4d845000de15 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -2074,7 +2074,7 @@ int main(void) { s = sparsebit_alloc(); for (;;) { - uint8_t op = get8() & 0xf; + u8 op = get8() & 0xf; u64 first = get64(); u64 last = get64(); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 8e6393384fa4..723a5200c4bb 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -62,7 +62,7 @@ const char *ex_str(int vector) } } -static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) +static void regs_dump(FILE *stream, struct kvm_regs *regs, u8 indent) { fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " "rcx: 0x%.16llx rdx: 0x%.16llx\n", @@ -86,7 +86,7 @@ static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) } static void segment_dump(FILE *stream, struct kvm_segment *segment, - uint8_t indent) + u8 indent) { fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " "selector: 0x%.4x type: 0x%.2x\n", @@ -103,7 +103,7 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment, } static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, - uint8_t indent) + u8 indent) { fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " "padding: 0x%.4x 0x%.4x 0x%.4x\n", @@ -111,7 +111,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, dtable->padding[0], dtable->padding[1], dtable->padding[2]); } -static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) +static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, u8 indent) { unsigned int i; @@ -407,7 +407,7 @@ u64 *vm_get_pte(struct kvm_vm *vm, u64 vaddr) return __vm_get_page_table_entry(vm, &vm->mmu, vaddr, &level); } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { struct kvm_mmu *mmu = &vm->mmu; u64 *pml4e, *pml4e_start; @@ -909,7 +909,7 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) static u32 __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, u32 function, u32 index, - uint8_t reg, uint8_t lo, uint8_t hi) + u8 reg, u8 lo, u8 hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -1127,7 +1127,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) va_end(ap); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { struct kvm_regs regs; struct kvm_sregs sregs; @@ -1378,7 +1378,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr, guest_maxphyaddr; + u8 maxphyaddr, guest_maxphyaddr; /* * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index d82f677b7c5e..93f916903461 100644 --- a/tools/testing/selftests/kvm/lib/x86/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -15,7 +15,7 @@ * expression would cause us to quit the loop. */ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region, - uint8_t page_type, bool private) + u8 page_type, bool private) { const struct sparsebit *protected_phy_pages = region->protected_phy_pages; const gpa_t gpa_base = region->region.guest_phys_addr; @@ -103,7 +103,7 @@ void sev_vm_launch(struct kvm_vm *vm, u32 policy) vm->arch.is_pt_protected = true; } -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement) +void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement) { struct kvm_sev_launch_measure launch_measure; struct kvm_sev_guest_status guest_status; @@ -174,7 +174,7 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, return vm; } -void vm_sev_launch(struct kvm_vm *vm, u64 policy, uint8_t *measurement) +void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement) { if (is_sev_snp_vm(vm)) { vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index c2b36a4ac638..51f8be50c7e4 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -217,7 +217,7 @@ static void *vm_gpa2hva(struct vm_data *data, u64 gpa, u64 *rempages) } base = data->hva_slots[slot]; - return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; + return (u8 *)base + slotoffs * guest_page_size + pgoffs; } static u64 vm_slot2gpa(struct vm_data *data, u32 slot) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index c1f1e318e059..e0975a5dcff1 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -347,7 +347,7 @@ int main(int argc, char *argv[]) /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ for (i = 0; i < slot_size; i += vm->page_size) - ((uint8_t *)mem)[i] = 0xaa; + ((u8 *)mem)[i] = 0xaa; gpa = 0; for (slot = first_slot; slot < max_slots; slot++) { diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index aa92fdf0664d..9855b5bfb5ed 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -48,13 +48,13 @@ struct mop_desc { void *buf; u32 sida_offset; void *old; - uint8_t old_value[16]; + u8 old_value[16]; bool *cmpxchg_success; - uint8_t ar; - uint8_t key; + u8 ar; + u8 key; }; -const uint8_t NO_KEY = 0xff; +const u8 NO_KEY = 0xff; static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) { @@ -230,8 +230,8 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) -static uint8_t __aligned(PAGE_SIZE) mem1[65536]; -static uint8_t __aligned(PAGE_SIZE) mem2[65536]; +static u8 __aligned(PAGE_SIZE) mem1[65536]; +static u8 __aligned(PAGE_SIZE) mem2[65536]; struct test_default { struct kvm_vm *kvm_vm; @@ -296,7 +296,7 @@ static void prepare_mem12(void) TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, u32 size, uint8_t key) + enum mop_target mop_target, u32 size, u8 key) { prepare_mem12(); CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, @@ -308,7 +308,7 @@ static void default_write_read(struct test_info copy_cpu, struct test_info mop_c } static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, u32 size, uint8_t key) + enum mop_target mop_target, u32 size, u8 key) { prepare_mem12(); CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); @@ -318,12 +318,12 @@ static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, ASSERT_MEM_EQ(mem1, mem2, size); } -static void default_cmpxchg(struct test_default *test, uint8_t key) +static void default_cmpxchg(struct test_default *test, u8 key) { for (int size = 1; size <= 16; size *= 2) { for (int offset = 0; offset < 16; offset += size) { - uint8_t __aligned(16) new[16] = {}; - uint8_t __aligned(16) old[16]; + u8 __aligned(16) new[16] = {}; + u8 __aligned(16) old[16]; bool succ; prepare_mem12(); @@ -400,7 +400,7 @@ static void test_copy_access_register(void) kvm_vm_free(t.kvm_vm); } -static void set_storage_key_range(void *addr, size_t len, uint8_t key) +static void set_storage_key_range(void *addr, size_t len, u8 key) { uintptr_t _addr, abs, i; int not_mapped = 0; @@ -483,7 +483,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) { switch (size) { case 1: - return (uint8_t)val; + return (u8)val; case 2: return (u16)val; case 4: @@ -553,7 +553,7 @@ static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old) if (swap) { int i, j; __uint128_t new; - uint8_t byte0, byte1; + u8 byte0, byte1; rand = rand * 3 + 1; i = rand % size; diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c index 7a81d07500bd..e3c7a2f148f9 100644 --- a/tools/testing/selftests/kvm/s390/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -20,7 +20,7 @@ struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS]; -static uint8_t regs_null[512]; +static u8 regs_null[512]; static void guest_code_initial(void) { diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index bba0d9a6dcc8..a9e5a01200b8 100644 --- a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c @@ -13,7 +13,7 @@ #include "kselftest.h" #include "ucall_common.h" -static void set_storage_key(void *addr, uint8_t skey) +static void set_storage_key(void *addr, u8 skey) { asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index e94a640f6f32..e021e198b28e 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -14,12 +14,12 @@ #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) -static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; -static uint8_t *const page_store_prot = pages[0]; -static uint8_t *const page_fetch_prot = pages[1]; +static __aligned(PAGE_SIZE) u8 pages[2][PAGE_SIZE]; +static u8 *const page_store_prot = pages[0]; +static u8 *const page_fetch_prot = pages[1]; /* Nonzero return value indicates that address not mapped */ -static int set_storage_key(void *addr, uint8_t key) +static int set_storage_key(void *addr, u8 key) { int not_mapped = 0; @@ -44,7 +44,7 @@ enum permission { TRANSL_UNAVAIL = 3, }; -static enum permission test_protection(void *addr, uint8_t key) +static enum permission test_protection(void *addr, u8 key) { u64 mask; @@ -72,7 +72,7 @@ enum stage { struct test { enum stage stage; void *addr; - uint8_t key; + u8 key; enum permission expected; } tests[] = { /* diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 59a6eae30946..5551dd0f9fad 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -556,7 +556,7 @@ static void guest_code_mmio_during_vectoring(void) set_idt(&idt_desc); /* Generate a #GP by dereferencing a non-canonical address */ - *((uint8_t *)NONCANONICAL) = 0x1; + *((u8 *)NONCANONICAL) = 0x1; GUEST_ASSERT(0); } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 85fabe262864..d46968f5579e 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -245,8 +245,8 @@ struct sta_struct { u32 sequence; u32 flags; u64 steal; - uint8_t preempted; - uint8_t pad[47]; + u8 preempted; + u8 pad[47]; } __packed; static void sta_set_shmem(gpa_t gpa, unsigned long flags) diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c index 620809cf35da..c91660103137 100644 --- a/tools/testing/selftests/kvm/x86/aperfmperf_test.c +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -108,7 +108,7 @@ static void guest_code(void *nested_test_data) static void guest_no_aperfmperf(void) { u64 msr_val; - uint8_t vector; + u8 vector; vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); GUEST_ASSERT(vector == GP_VECTOR); diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c index 9ff24d4851f5..5b3aef109cfc 100644 --- a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c +++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c @@ -29,7 +29,7 @@ * SMI handler: runs in real-address mode. * Reports SMRAM_STAGE via port IO, then does RSM. */ -static uint8_t smi_handler[] = { +static u8 smi_handler[] = { 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ 0x0f, 0xaa, /* rsm */ diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c index 721f56d38f49..c0d30ccd8767 100644 --- a/tools/testing/selftests/kvm/x86/fastops_test.c +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -77,7 +77,7 @@ #define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ ({ \ type_t output = __val2, ex_output = __val2, input = __val2; \ - uint8_t shift = __val1; \ + u8 shift = __val1; \ u64 flags, ex_flags; \ \ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \ @@ -95,7 +95,7 @@ #define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ ({ \ u64 ign_error_code; \ - uint8_t vector; \ + u8 vector; \ \ __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ : "+a"(__a), "+d"(__d), flags_constraint(__flags), \ @@ -110,7 +110,7 @@ type_t _a = __val1, _d = __val1, rm = __val2; \ type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ u64 flags, ex_flags; \ - uint8_t v, ex_v; \ + u8 v, ex_v; \ \ ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \ @@ -185,7 +185,7 @@ if (sizeof(type_t) != 1) { \ static void guest_code(void) { - guest_test_fastops(uint8_t, "b"); + guest_test_fastops(u8, "b"); guest_test_fastops(u16, "w"); guest_test_fastops(u32, "l"); guest_test_fastops(u64, "q"); diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index df7a94400d3d..753a0e730ea8 100644 --- a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c @@ -26,11 +26,11 @@ static void guest_ud_handler(struct ex_regs *regs) regs->rip += HYPERCALL_INSN_SIZE; } -static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; -static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; +static const u8 vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; +static const u8 svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; -extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; -static u64 do_sched_yield(uint8_t apic_id) +extern u8 hypercall_insn[HYPERCALL_INSN_SIZE]; +static u64 do_sched_yield(u8 apic_id) { u64 ret; @@ -45,8 +45,8 @@ static u64 do_sched_yield(uint8_t apic_id) static void guest_main(void) { - const uint8_t *native_hypercall_insn; - const uint8_t *other_hypercall_insn; + const u8 *native_hypercall_insn; + const u8 *other_hypercall_insn; u64 ret; if (host_cpu_is_intel) { diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index c7e4f08765fb..fd6b6c67199a 100644 --- a/tools/testing/selftests/kvm/x86/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h @@ -21,7 +21,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; struct kvm_regs regs; - uint8_t *insn_bytes; + u8 *insn_bytes; u64 flags; TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 80588b7ea259..52dbd52ce606 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -41,7 +41,7 @@ static bool is_write_only_msr(u32 msr) static void guest_msr(struct msr_data *msr) { - uint8_t vector = 0; + u8 vector = 0; u64 msr_val = 0; GUEST_ASSERT(msr->idx); @@ -85,7 +85,7 @@ done: static void guest_hcall(gpa_t pgs_gpa, struct hcall_data *hcall) { u64 res, input, output; - uint8_t vector; + u8 vector; GUEST_ASSERT_NE(hcall->control, 0); diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index babf0f95165a..8ed5fa635021 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -41,7 +41,7 @@ static struct msr_data msrs_to_test[] = { static void test_msr(struct msr_data *msr) { u64 ignored; - uint8_t vector; + u8 vector; PR_MSR(msr); diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c index 42fd24567e26..fb7dcbe53ac7 100644 --- a/tools/testing/selftests/kvm/x86/nested_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -13,7 +13,7 @@ enum { struct emulated_instruction { const char name[32]; - uint8_t opcode[15]; + u8 opcode[15]; u32 exit_reason[NR_VIRTUALIZATION_FLAVORS]; }; @@ -32,9 +32,9 @@ static struct emulated_instruction instructions[] = { }, }; -static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ -static uint8_t l2_guest_code[sizeof(kvm_fep) + 15]; -static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; +static u8 kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ +static u8 l2_guest_code[sizeof(kvm_fep) + 15]; +static u8 *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; static u32 get_instruction_length(struct emulated_instruction *insn) { diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c index 86d1ab0db1e8..80bb07e6531c 100644 --- a/tools/testing/selftests/kvm/x86/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c @@ -24,7 +24,7 @@ static void guest_code(void) { u64 msr_platform_info; - uint8_t vector; + u8 vector; GUEST_SYNC(true); msr_platform_info = rdmsr(MSR_PLATFORM_INFO); diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 2a12c2d42697..dc6afac3aa91 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -32,7 +32,7 @@ /* Track which architectural events are supported by hardware. */ static u32 hardware_pmu_arch_events; -static uint8_t kvm_pmu_version; +static u8 kvm_pmu_version; static bool kvm_has_perf_caps; #define X86_PMU_FEATURE_NULL \ @@ -57,7 +57,7 @@ struct kvm_intel_pmu_event { * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the * compiler often thinks the feature definitions aren't compile-time constants. */ -static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) +static struct kvm_intel_pmu_event intel_event_to_feature(u8 idx) { const struct kvm_intel_pmu_event __intel_event_to_feature[] = { [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, @@ -89,7 +89,7 @@ static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, - uint8_t pmu_version, + u8 pmu_version, u64 perf_capabilities) { struct kvm_vm *vm; @@ -132,7 +132,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu) } while (uc.cmd != UCALL_DONE); } -static uint8_t guest_get_pmu_version(void) +static u8 guest_get_pmu_version(void) { /* * Return the effective PMU version, i.e. the minimum between what KVM @@ -141,7 +141,7 @@ static uint8_t guest_get_pmu_version(void) * supported by KVM to verify KVM doesn't freak out and do something * bizarre with an architecturally valid, but unsupported, version. */ - return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); + return min_t(u8, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); } /* @@ -153,7 +153,7 @@ static uint8_t guest_get_pmu_version(void) * Sanity check that in all cases, the event doesn't count when it's disabled, * and that KVM correctly emulates the write of an arbitrary value. */ -static void guest_assert_event_count(uint8_t idx, u32 pmc, u32 pmc_msr) +static void guest_assert_event_count(u8 idx, u32 pmc, u32 pmc_msr) { u64 count; @@ -255,7 +255,7 @@ do { \ guest_assert_event_count(_idx, _pmc, _pmc_msr); \ } while (0) -static void __guest_test_arch_event(uint8_t idx, u32 pmc, u32 pmc_msr, +static void __guest_test_arch_event(u8 idx, u32 pmc, u32 pmc_msr, u32 ctrl_msr, u64 ctrl_msr_value) { GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); @@ -264,7 +264,7 @@ static void __guest_test_arch_event(uint8_t idx, u32 pmc, u32 pmc_msr, GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); } -static void guest_test_arch_event(uint8_t idx) +static void guest_test_arch_event(u8 idx) { u32 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); u32 pmu_version = guest_get_pmu_version(); @@ -320,7 +320,7 @@ static void guest_test_arch_event(uint8_t idx) static void guest_test_arch_events(void) { - uint8_t i; + u8 i; for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) guest_test_arch_event(i); @@ -328,8 +328,8 @@ static void guest_test_arch_events(void) GUEST_DONE(); } -static void test_arch_events(uint8_t pmu_version, u64 perf_capabilities, - uint8_t length, u32 unavailable_mask) +static void test_arch_events(u8 pmu_version, u64 perf_capabilities, + u8 length, u32 unavailable_mask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -376,7 +376,7 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success, u64 expected_val) { - uint8_t vector; + u8 vector; u64 val; vector = rdpmc_safe(rdpmc_idx, &val); @@ -393,11 +393,11 @@ static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success, GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); } -static void guest_rd_wr_counters(u32 base_msr, uint8_t nr_possible_counters, - uint8_t nr_counters, u32 or_mask) +static void guest_rd_wr_counters(u32 base_msr, u8 nr_possible_counters, + u8 nr_counters, u32 or_mask) { const bool pmu_has_fast_mode = !guest_get_pmu_version(); - uint8_t i; + u8 i; for (i = 0; i < nr_possible_counters; i++) { /* @@ -422,7 +422,7 @@ static void guest_rd_wr_counters(u32 base_msr, uint8_t nr_possible_counters, const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1; u32 rdpmc_idx; - uint8_t vector; + u8 vector; u64 val; vector = wrmsr_safe(msr, test_val); @@ -461,8 +461,8 @@ static void guest_rd_wr_counters(u32 base_msr, uint8_t nr_possible_counters, static void guest_test_gp_counters(void) { - uint8_t pmu_version = guest_get_pmu_version(); - uint8_t nr_gp_counters = 0; + u8 pmu_version = guest_get_pmu_version(); + u8 nr_gp_counters = 0; u32 base_msr; if (pmu_version) @@ -495,8 +495,8 @@ static void guest_test_gp_counters(void) GUEST_DONE(); } -static void test_gp_counters(uint8_t pmu_version, u64 perf_capabilities, - uint8_t nr_gp_counters) +static void test_gp_counters(u8 pmu_version, u64 perf_capabilities, + u8 nr_gp_counters) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -515,8 +515,8 @@ static void test_gp_counters(uint8_t pmu_version, u64 perf_capabilities, static void guest_test_fixed_counters(void) { u64 supported_bitmask = 0; - uint8_t nr_fixed_counters = 0; - uint8_t i; + u8 nr_fixed_counters = 0; + u8 i; /* Fixed counters require Architectural vPMU Version 2+. */ if (guest_get_pmu_version() >= 2) @@ -533,7 +533,7 @@ static void guest_test_fixed_counters(void) nr_fixed_counters, supported_bitmask); for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { - uint8_t vector; + u8 vector; u64 val; if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { @@ -561,9 +561,8 @@ static void guest_test_fixed_counters(void) GUEST_DONE(); } -static void test_fixed_counters(uint8_t pmu_version, u64 perf_capabilities, - uint8_t nr_fixed_counters, - u32 supported_bitmask) +static void test_fixed_counters(u8 pmu_version, u64 perf_capabilities, + u8 nr_fixed_counters, u32 supported_bitmask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -583,11 +582,11 @@ static void test_fixed_counters(uint8_t pmu_version, u64 perf_capabilities, static void test_intel_counters(void) { - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); + u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + u8 nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + u8 pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); unsigned int i; - uint8_t v, j; + u8 v, j; u32 k; const u64 perf_caps[] = { @@ -620,7 +619,7 @@ static void test_intel_counters(void) * Intel, i.e. is the last version that is guaranteed to be backwards * compatible with KVM's existing behavior. */ - uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); + u8 max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); /* * Detect the existence of events that aren't supported by selftests. diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index 5d607b114aeb..c1232344fda8 100644 --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -685,7 +685,7 @@ static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, u64 event, static void test_filter_ioctl(struct kvm_vcpu *vcpu) { - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); struct __kvm_pmu_event_filter f; u64 e = ~0ul; int r; @@ -729,7 +729,7 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu) TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); } -static void intel_run_fixed_counter_guest_code(uint8_t idx) +static void intel_run_fixed_counter_guest_code(u8 idx) { for (;;) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -770,8 +770,8 @@ static u64 test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, return run_vcpu_to_sync(vcpu); } -static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, - uint8_t nr_fixed_counters) +static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, u8 idx, + u8 nr_fixed_counters) { unsigned int i; u32 bitmap; @@ -815,10 +815,10 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, static void test_fixed_counter_bitmap(void) { - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint8_t idx; + u8 idx; /* * Check that pmu_event_filter works as expected when it's applied to diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 0bf86d822ee0..27675d7d04c0 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -29,7 +29,7 @@ /* Horrific macro so that the line info is captured accurately :-( */ #define memcmp_g(gpa, pattern, size) \ do { \ - uint8_t *mem = (uint8_t *)gpa; \ + u8 *mem = (u8 *)gpa; \ size_t i; \ \ for (i = 0; i < size; i++) \ @@ -38,7 +38,7 @@ do { \ pattern, i, gpa + i, mem[i]); \ } while (0) -static void memcmp_h(uint8_t *mem, u64 gpa, uint8_t pattern, size_t size) +static void memcmp_h(u8 *mem, u64 gpa, u8 pattern, size_t size) { size_t i; @@ -71,12 +71,12 @@ enum ucall_syncs { }; static void guest_sync_shared(u64 gpa, u64 size, - uint8_t current_pattern, uint8_t new_pattern) + u8 current_pattern, u8 new_pattern) { GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); } -static void guest_sync_private(u64 gpa, u64 size, uint8_t pattern) +static void guest_sync_private(u64 gpa, u64 size, u8 pattern) { GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); } @@ -121,8 +121,8 @@ struct { static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate) { - const uint8_t def_p = 0xaa; - const uint8_t init_p = 0xcc; + const u8 def_p = 0xaa; + const u8 init_p = 0xcc; u64 j; int i; @@ -136,10 +136,10 @@ static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate) for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { u64 gpa = base_gpa + test_ranges[i].offset; u64 size = test_ranges[i].size; - uint8_t p1 = 0x11; - uint8_t p2 = 0x22; - uint8_t p3 = 0x33; - uint8_t p4 = 0x44; + u8 p1 = 0x11; + u8 p2 = 0x22; + u8 p3 = 0x33; + u8 p4 = 0x44; /* * Set the test region to pattern one to differentiate it from @@ -229,7 +229,7 @@ static void guest_punch_hole(u64 gpa, u64 size) */ static void guest_test_punch_hole(u64 base_gpa, bool precise) { - const uint8_t init_p = 0xcc; + const u8 init_p = 0xcc; int i; /* @@ -347,7 +347,7 @@ static void *__test_mem_conversions(void *__vcpu) for (i = 0; i < size; i += vm->page_size) { size_t nr_bytes = min_t(size_t, vm->page_size, size - i); - uint8_t *hva = addr_gpa2hva(vm, gpa + i); + u8 *hva = addr_gpa2hva(vm, gpa + i); /* In all cases, the host should observe the shared data. */ memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 39e89350c2e7..740051167dbd 100644 --- a/tools/testing/selftests/kvm/x86/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -34,7 +34,7 @@ * independent subset of asm here. * SMI handler always report back fixed stage SMRAM_STAGE. */ -uint8_t smi_handler[] = { +u8 smi_handler[] = { 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ 0x0f, 0xaa, /* rsm */ diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 62e14843e5af..409c6cc9f921 100644 --- a/tools/testing/selftests/kvm/x86/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -145,7 +145,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) if (this_cpu_has(X86_FEATURE_XSAVE)) { u64 supported_xcr0 = this_cpu_supported_xcr0(); - uint8_t buffer[PAGE_SIZE]; + u8 buffer[PAGE_SIZE]; memset(buffer, 0xcc, sizeof(buffer)); @@ -331,7 +331,7 @@ int main(int argc, char *argv[]) * supported features, even if something goes awry in saving * the original snapshot. */ - xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + xstate_bv = (void *)&((u8 *)state->xsave->region)[512]; saved_xstate_bv = *xstate_bv; vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index be7d72f3c029..9c5a87576c2e 100644 --- a/tools/testing/selftests/kvm/x86/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -10,7 +10,7 @@ #include "kvm_util.h" #include "processor.h" -static void guest_ins_port80(uint8_t *buffer, unsigned int count) +static void guest_ins_port80(u8 *buffer, unsigned int count) { unsigned long end; @@ -26,7 +26,7 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count) static void guest_code(void) { - uint8_t buffer[8192]; + u8 buffer[8192]; int i; /* diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 98b8d285dbb7..2808ce727e5f 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -23,21 +23,21 @@ struct kvm_msr_filter filter_allow = { .nmsrs = 1, /* Test an MSR the kernel knows about. */ .base = MSR_IA32_XSS, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, { .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, .nmsrs = 1, /* Test an MSR the kernel doesn't know about. */ .base = MSR_IA32_FLUSH_CMD, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, { .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, .nmsrs = 1, /* Test a fabricated MSR that no one knows about. */ .base = MSR_NON_EXISTENT, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; @@ -49,7 +49,7 @@ struct kvm_msr_filter filter_fs = { .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_FS_BASE, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; @@ -61,7 +61,7 @@ struct kvm_msr_filter filter_gs = { .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_GS_BASE, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; @@ -77,7 +77,7 @@ static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; static u8 bitmap_deadbeef[1] = { 0x1 }; -static void deny_msr(uint8_t *bitmap, u32 msr) +static void deny_msr(u8 *bitmap, u32 msr) { u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); @@ -732,7 +732,7 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm) .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = 0, - .bitmap = (uint8_t *)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index 1f3638c6ee14..d004108dbdc6 100644 --- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -54,7 +54,7 @@ static const union perf_capabilities format_caps = { static void guest_test_perf_capabilities_gp(u64 val) { - uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); + u8 vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, "Expected #GP for value '0x%lx', got %s", diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c index af1fe833ad4f..ab25db2235d5 100644 --- a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c @@ -69,7 +69,7 @@ static void tpr_guest_irq_queue(void) } } -static uint8_t tpr_guest_tpr_get(void) +static u8 tpr_guest_tpr_get(void) { u32 taskpri; @@ -81,7 +81,7 @@ static uint8_t tpr_guest_tpr_get(void) return GET_APIC_PRI(taskpri); } -static uint8_t tpr_guest_ppr_get(void) +static u8 tpr_guest_ppr_get(void) { u32 procpri; @@ -93,7 +93,7 @@ static uint8_t tpr_guest_ppr_get(void) return GET_APIC_PRI(procpri); } -static uint8_t tpr_guest_cr8_get(void) +static u8 tpr_guest_cr8_get(void) { u64 cr8; @@ -104,7 +104,7 @@ static uint8_t tpr_guest_cr8_get(void) static void tpr_guest_check_tpr_ppr_cr8_equal(void) { - uint8_t tpr; + u8 tpr; tpr = tpr_guest_tpr_get(); @@ -157,19 +157,19 @@ static void tpr_guest_code(void) GUEST_DONE(); } -static uint8_t lapic_tpr_get(struct kvm_lapic_state *xapic) +static u8 lapic_tpr_get(struct kvm_lapic_state *xapic) { return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI])); } -static void lapic_tpr_set(struct kvm_lapic_state *xapic, uint8_t val) +static void lapic_tpr_set(struct kvm_lapic_state *xapic, u8 val) { u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI]; *taskpri = SET_APIC_PRI(*taskpri, val); } -static uint8_t sregs_tpr(struct kvm_sregs *sregs) +static u8 sregs_tpr(struct kvm_sregs *sregs) { return sregs->cr8 & GENMASK(3, 0); } @@ -197,7 +197,7 @@ static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu) static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask) { struct kvm_lapic_state xapic; - uint8_t tpr; + u8 tpr; static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number"); tpr = IRQ_VECTOR / 16; diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index c6d00205b59d..5076f6a75455 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -133,8 +133,8 @@ struct arch_vcpu_info { }; struct vcpu_info { - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; + u8 evtchn_upcall_pending; + u8 evtchn_upcall_mask; unsigned long evtchn_pending_sel; struct arch_vcpu_info arch; struct pvclock_vcpu_time_info time; -- cgit v1.2.3 From 85819fa0e3b98682b8c57c6d8ba57e7a9c6032ea Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:19:56 -0700 Subject: KVM: selftests: Drop "vaddr_" from APIs that allocate memory for a given VM Now that KVM selftests use gva_t instead of vm_vaddr_t, drop "vaddr_" from the core memory allocation APIs as the information is extraneous and does more harm than good. E.g. the APIs don't _just_ allocate virtual memory, they allocate backing physical memory and install mappings in the guest page tables. And as proven by kmalloc() and malloc(), developers generally expect that allocations come with a working virtual address. Opportunistically clean up the function comment for vm_alloc(), and drop the misleading and superfluous comments for its wrappers. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-12-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/vgic_irq.c | 4 +- tools/testing/selftests/kvm/include/kvm_util.h | 16 ++--- tools/testing/selftests/kvm/lib/arm64/processor.c | 10 +-- tools/testing/selftests/kvm/lib/elf.c | 3 +- tools/testing/selftests/kvm/lib/kvm_util.c | 84 ++++++---------------- .../selftests/kvm/lib/loongarch/processor.c | 13 ++-- tools/testing/selftests/kvm/lib/riscv/processor.c | 6 +- tools/testing/selftests/kvm/lib/s390/processor.c | 2 +- tools/testing/selftests/kvm/lib/ucall_common.c | 4 +- tools/testing/selftests/kvm/lib/x86/hyperv.c | 8 +-- tools/testing/selftests/kvm/lib/x86/processor.c | 12 ++-- tools/testing/selftests/kvm/lib/x86/svm.c | 8 +-- tools/testing/selftests/kvm/lib/x86/vmx.c | 16 ++--- tools/testing/selftests/kvm/s390/memop.c | 12 ++-- tools/testing/selftests/kvm/s390/tprot.c | 4 +- tools/testing/selftests/kvm/x86/amx_test.c | 6 +- tools/testing/selftests/kvm/x86/cpuid_test.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_clock.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_evmcs.c | 2 +- .../selftests/kvm/x86/hyperv_extended_hypercalls.c | 4 +- tools/testing/selftests/kvm/x86/hyperv_features.c | 6 +- tools/testing/selftests/kvm/x86/hyperv_ipi.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_svm_test.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 6 +- tools/testing/selftests/kvm/x86/kvm_clock_test.c | 2 +- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 4 +- .../kvm/x86/svm_nested_soft_inject_test.c | 2 +- tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 2 +- 28 files changed, 102 insertions(+), 142 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index 8a9dd79123d4..5e231998617e 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -771,7 +771,7 @@ static void test_vgic(u32 nr_irqs, bool level_sensitive, bool eoi_split) vcpu_init_descriptor_tables(vcpu); /* Setup the guest args page (so it gets the args). */ - args_gva = vm_vaddr_alloc_page(vm); + args_gva = vm_alloc_page(vm); memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); vcpu_args_set(vcpu, 1, args_gva); @@ -997,7 +997,7 @@ static void test_vgic_two_cpus(void *gcode) vcpu_init_descriptor_tables(vcpus[1]); /* Setup the guest args page (so it gets the args). */ - args_gva = vm_vaddr_alloc_page(vm); + args_gva = vm_alloc_page(vm); memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); vcpu_args_set(vcpus[0], 2, args_gva, 0); vcpu_args_set(vcpus[1], 2, args_gva, 1); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 676e3ccb1462..8f7afc34ea8d 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -716,14 +716,14 @@ void vm_mem_region_delete(struct kvm_vm *vm, u32 slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); void vm_populate_vaddr_bitmap(struct kvm_vm *vm); gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); -gva_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); -gva_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, - enum kvm_mem_region_type type); -gva_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, - enum kvm_mem_region_type type); -gva_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -gva_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); -gva_t vm_vaddr_alloc_page(struct kvm_vm *vm); +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type); +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type); +gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages); +gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); +gva_t vm_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, unsigned int npages); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3a48911e3..c4f0e37f2907 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -422,9 +422,9 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_vaddr = __vm_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); aarch64_vcpu_setup(vcpu, init); @@ -536,8 +536,8 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size, + MEM_REGION_DATA); *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 0f2710cda9d8..2288480f4e1e 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -162,8 +162,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - gva_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, - MEM_REGION_CODE); + gva_t vaddr = __vm_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 050ae9c92681..b304c0e54837 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1450,8 +1450,8 @@ va_found: return pgidx_start * vm->page_size; } -static gva_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, - enum kvm_mem_region_type type, bool protected) +static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type, bool protected) { u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); @@ -1476,84 +1476,44 @@ static gva_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, return vaddr_start; } -gva_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, - enum kvm_mem_region_type type) +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type) { - return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, - vm_arch_has_protected_memory(vm)); + return ____vm_alloc(vm, sz, vaddr_min, type, + vm_arch_has_protected_memory(vm)); } -gva_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, - enum kvm_mem_region_type type) +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, + enum kvm_mem_region_type type) { - return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false); + return ____vm_alloc(vm, sz, vaddr_min, type, false); } /* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. The allocated physical space comes from the TEST_DATA memory region. + * Allocates at least sz bytes within the virtual address space of the VM + * given by @vm. The allocated bytes are mapped to a virtual address >= the + * address given by @vaddr_min. Note that each allocation uses a a unique set + * of pages, with the minimum real allocation being at least a page. The + * allocated physical space comes from the TEST_DATA memory region. */ -gva_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) { - return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); + return __vm_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); } -/* - * VM Virtual Address Allocate Pages - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least N system pages worth of bytes within the virtual address - * space of the vm. - */ -gva_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) +gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages) { - return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); + return vm_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } -gva_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) { - return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); + return __vm_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); } -/* - * VM Virtual Address Allocate Page - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least one system page worth of bytes within the virtual address - * space of the vm. - */ -gva_t vm_vaddr_alloc_page(struct kvm_vm *vm) +gva_t vm_alloc_page(struct kvm_vm *vm) { - return vm_vaddr_alloc_pages(vm, 1); + return vm_alloc_pages(vm, 1); } /* diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 2982196db3b2..318520f1f1b9 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -206,8 +206,9 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) { void *addr; - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), + LOONGARCH_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); addr = addr_gva2hva(vm, vm->handlers); memset(addr, 0, vm->page_size); @@ -354,8 +355,8 @@ void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); /* LOONGARCH_CSR_KS1 is used for exception stack */ - val = __vm_vaddr_alloc(vm, vm->page_size, - LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + val = __vm_alloc(vm, vm->page_size, LOONGARCH_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); TEST_ASSERT(val != 0, "No memory for exception stack"); val = val + vm->page_size; loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); @@ -378,8 +379,8 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); stack_size = vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + stack_vaddr = __vm_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack"); loongarch_vcpu_setup(vcpu); diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 7336d5a20419..38eb8302922a 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -322,7 +322,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + stack_vaddr = __vm_alloc(vm, stack_size, DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); @@ -449,8 +449,8 @@ void vcpu_init_vector_tables(struct kvm_vcpu *vcpu) void vm_init_vector_tables(struct kvm_vm *vm) { - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size, + MEM_REGION_DATA); *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index d35f23a4db12..4ae0a39f426f 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -171,7 +171,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + stack_vaddr = __vm_alloc(vm, stack_size, DEFAULT_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index b16b5c5b3a1e..4a8a5bc40a45 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -32,8 +32,8 @@ void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa) gva_t vaddr; int i; - vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, - MEM_REGION_DATA); + vaddr = vm_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, + MEM_REGION_DATA); hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); memset(hdr, 0, sizeof(*hdr)); diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index c2806bed43c9..d200c5c26e2e 100644 --- a/tools/testing/selftests/kvm/lib/x86/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -78,21 +78,21 @@ bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, gva_t *p_hv_pages_gva) { - gva_t hv_pages_gva = vm_vaddr_alloc_page(vm); + gva_t hv_pages_gva = vm_alloc_page(vm); struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); /* Setup of a region of guest memory for the VP Assist page. */ - hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist = (void *)vm_alloc_page(vm); hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); /* Setup of a region of guest memory for the partition assist page. */ - hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist = (void *)vm_alloc_page(vm); hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); /* Setup of a region of guest memory for the enlightened VMCS. */ - hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs = (void *)vm_alloc_page(vm); hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 723a5200c4bb..50848112932c 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -746,10 +746,10 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) struct kvm_segment seg; int i; - vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.gdt = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) @@ -828,7 +828,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) gva_t stack_vaddr; struct kvm_vcpu *vcpu; - stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + stack_vaddr = __vm_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), DEFAULT_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); @@ -844,7 +844,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) * may need to subtract 4 bytes instead of 8 bytes. */ TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE), - "__vm_vaddr_alloc() did not provide a page-aligned address"); + "__vm_alloc() did not provide a page-aligned address"); stack_vaddr -= 8; vcpu = __vm_vcpu_add(vm, vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 620bdc5d3cc2..3b01605ab016 100644 --- a/tools/testing/selftests/kvm/lib/x86/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -30,18 +30,18 @@ u64 rflags; struct svm_test_data * vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva) { - gva_t svm_gva = vm_vaddr_alloc_page(vm); + gva_t svm_gva = vm_alloc_page(vm); struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); - svm->vmcb = (void *)vm_vaddr_alloc_page(vm); + svm->vmcb = (void *)vm_alloc_page(vm); svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); - svm->save_area = (void *)vm_vaddr_alloc_page(vm); + svm->save_area = (void *)vm_alloc_page(vm); svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); - svm->msr = (void *)vm_vaddr_alloc_page(vm); + svm->msr = (void *)vm_alloc_page(vm); svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr); svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); memset(svm->msr_hva, 0, getpagesize()); diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index b2f83c3f7f16..67642759e4a0 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -81,37 +81,37 @@ void vm_enable_ept(struct kvm_vm *vm) struct vmx_pages * vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva) { - gva_t vmx_gva = vm_vaddr_alloc_page(vm); + gva_t vmx_gva = vm_alloc_page(vm); struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); /* Setup of a region of guest memory for the vmxon region. */ - vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); + vmx->vmxon = (void *)vm_alloc_page(vm); vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); /* Setup of a region of guest memory for a vmcs. */ - vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->vmcs = (void *)vm_alloc_page(vm); vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); /* Setup of a region of guest memory for the MSR bitmap. */ - vmx->msr = (void *)vm_vaddr_alloc_page(vm); + vmx->msr = (void *)vm_alloc_page(vm); vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); memset(vmx->msr_hva, 0, getpagesize()); /* Setup of a region of guest memory for the shadow VMCS. */ - vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->shadow_vmcs = (void *)vm_alloc_page(vm); vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ - vmx->vmread = (void *)vm_vaddr_alloc_page(vm); + vmx->vmread = (void *)vm_alloc_page(vm); vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); memset(vmx->vmread_hva, 0, getpagesize()); - vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); + vmx->vmwrite = (void *)vm_alloc_page(vm); vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); @@ -390,7 +390,7 @@ bool kvm_cpu_has_ept(void) void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) { - vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); + vmx->apic_access = (void *)vm_alloc_page(vm); vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); } diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 9855b5bfb5ed..0244848621b3 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -880,8 +880,8 @@ static void test_copy_key_fetch_prot_override(void) struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); gva_t guest_0_page, guest_last_page; - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); if (guest_0_page != 0 || guest_last_page != last_page_addr) { print_skip("did not allocate guest pages at required positions"); goto out; @@ -919,8 +919,8 @@ static void test_errors_key_fetch_prot_override_not_enabled(void) struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); gva_t guest_0_page, guest_last_page; - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); if (guest_0_page != 0 || guest_last_page != last_page_addr) { print_skip("did not allocate guest pages at required positions"); goto out; @@ -940,8 +940,8 @@ static void test_errors_key_fetch_prot_override_enabled(void) struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); gva_t guest_0_page, guest_last_page; - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); if (guest_0_page != 0 || guest_last_page != last_page_addr) { print_skip("did not allocate guest pages at required positions"); goto out; diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index e021e198b28e..8054d2b178f0 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -146,7 +146,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0) /* * Some fetch protection override tests require that page 0 * be mapped, however, when the hosts tries to map that page via - * vm_vaddr_alloc, it may happen that some other page gets mapped + * vm_alloc, it may happen that some other page gets mapped * instead. * In order to skip these tests we detect this inside the guest */ @@ -219,7 +219,7 @@ int main(int argc, char *argv[]) mprotect(addr_gva2hva(vm, (gva_t)pages), PAGE_SIZE * 2, PROT_READ); HOST_SYNC(vcpu, TEST_SIMPLE); - guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); + guest_0_page = vm_alloc(vm, PAGE_SIZE, 0); if (guest_0_page != 0) { /* Use NO_TAP so we don't get a PASS print */ HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index 9ecf7515442b..4e63da2b1889 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -263,15 +263,15 @@ int main(int argc, char *argv[]) vcpu_regs_get(vcpu, ®s1); /* amx cfg for guest_code */ - amx_cfg = vm_vaddr_alloc_page(vm); + amx_cfg = vm_alloc_page(vm); memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize()); /* amx tiledata for guest_code */ - tiledata = vm_vaddr_alloc_pages(vm, 2); + tiledata = vm_alloc_pages(vm, 2); memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize()); /* XSAVE state for guest_code */ - xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); + xstate = vm_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index 3c45249a42c4..ef0ddd240887 100644 --- a/tools/testing/selftests/kvm/x86/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -143,7 +143,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, gva_t *p_gva, struct kvm_cpuid2 *cpuid) { int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); - gva_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); + gva_t gva = vm_alloc(vm, size, KVM_UTIL_MIN_VADDR); struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); memcpy(guest_cpuids, cpuid, size); diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index 6bb1ca11256f..c083cea546dc 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c @@ -218,7 +218,7 @@ int main(void) vcpu_set_hv_cpuid(vcpu); - tsc_page_gva = vm_vaddr_alloc_page(vm); + tsc_page_gva = vm_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned"); diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 061d9e1f02c0..c7fa114aee20 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -246,7 +246,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); - hcall_page = vm_vaddr_alloc_pages(vm, 1); + hcall_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); vcpu_set_hv_cpuid(vcpu); diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index be7a2a631789..ae047db7b1be 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -57,11 +57,11 @@ int main(void) vcpu_set_hv_cpuid(vcpu); /* Hypercall input */ - hcall_in_page = vm_vaddr_alloc_pages(vm, 1); + hcall_in_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size); /* Hypercall output */ - hcall_out_page = vm_vaddr_alloc_pages(vm, 1); + hcall_out_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size); vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page), diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 52dbd52ce606..7347f1fe5157 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -141,7 +141,7 @@ static void guest_test_msrs_access(void) while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_msr); - msr_gva = vm_vaddr_alloc_page(vm); + msr_gva = vm_alloc_page(vm); memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); msr = addr_gva2hva(vm, msr_gva); @@ -530,10 +530,10 @@ static void guest_test_hcalls_access(void) vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall_page = vm_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - hcall_params = vm_vaddr_alloc_page(vm); + hcall_params = vm_alloc_page(vm); memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); hcall = addr_gva2hva(vm, hcall_params); diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index beafcfa4043a..771535f9aad3 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -253,7 +253,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall_page = vm_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index 77b774b5041c..7a62f6a9d606 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) vcpu_alloc_svm(vm, &nested_gva); vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); - hcall_page = vm_vaddr_alloc_pages(vm, 1); + hcall_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index a4fb63112cac..6adf76574921 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -593,11 +593,11 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Test data page */ - test_data_page = vm_vaddr_alloc_page(vm); + test_data_page = vm_alloc_page(vm); data = (struct test_data *)addr_gva2hva(vm, test_data_page); /* Hypercall input/output */ - data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); + data->hcall_gva = vm_alloc_pages(vm, 2); data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); @@ -606,7 +606,7 @@ int main(int argc, char *argv[]) * and the test will swap their mappings. The third page keeps the indication * about the current state of mappings. */ - data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); + data->test_pages = vm_alloc_pages(vm, NTEST_PAGES + 1); for (i = 0; i < NTEST_PAGES; i++) memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), (u8)(i + 1), PAGE_SIZE); diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 2b8a3feee1f8..5ad4aeb8e373 100644 --- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -147,7 +147,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_main); - pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000); + pvti_gva = vm_alloc(vm, getpagesize(), 0x10000); pvti_gpa = addr_gva2gpa(vm, pvti_gva); vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva); diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 4e037795dc33..1a49ee391586 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -115,8 +115,8 @@ static void test_sync_vmsa(u32 type, u64 policy) struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu); - gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, - MEM_REGION_TEST_DATA); + gva = vm_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); hva = addr_gva2hva(vm, gva); vcpu_args_set(vcpu, 1, gva); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 5fefb319d9be..f72f11d4c4f8 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -161,7 +161,7 @@ static void run_test(bool is_nmi) if (!is_nmi) { void *idt, *idt_alt; - idt_alt_vm = vm_vaddr_alloc_page(vm); + idt_alt_vm = vm_alloc_page(vm); idt_alt = addr_gva2hva(vm, idt_alt_vm); idt = addr_gva2hva(vm, vm->arch.idt); memcpy(idt_alt, idt, getpagesize()); diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index 3df6df2a1b55..d2e2410f748b 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -414,7 +414,7 @@ int main(int argc, char *argv[]) params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); - test_data_page_vaddr = vm_vaddr_alloc_page(vm); + test_data_page_vaddr = vm_alloc_page(vm); data = addr_gva2hva(vm, test_data_page_vaddr); memset(data, 0, sizeof(*data)); params[0].data = data; -- cgit v1.2.3 From 48321f609a73e37c26e29e0b38e38741263e7e7d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:19:57 -0700 Subject: KVM: selftests: Rename vm_vaddr_unused_gap() => vm_unused_gva_gap() Now that KVM selftests use gva_t instead of vm_vaddr_t, rename the API for finding an unused range of virtual memory to drop the defunct terminology and use "vm" for the scope. Opportunistically clean up the function comment to drop superfluous and redundant information. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-13-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/arm64/ucall.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 24 +++++----------------- tools/testing/selftests/kvm/lib/loongarch/ucall.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 2 +- 5 files changed, 9 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 8f7afc34ea8d..0239e89320e5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -715,7 +715,7 @@ void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, u32 slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); void vm_populate_vaddr_bitmap(struct kvm_vm *vm); -gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, enum kvm_mem_region_type type); diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index 8257dc4ae106..e0550ad5aa75 100644 --- a/tools/testing/selftests/kvm/lib/arm64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -10,7 +10,7 @@ gva_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { - gva_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); virt_map(vm, mmio_gva, mmio_gpa, 1); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b304c0e54837..8c82b40a7448 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1366,26 +1366,12 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) } /* - * VM Virtual Address Unused Gap - * - * Input Args: - * vm - Virtual Machine - * sz - Size (bytes) - * vaddr_min - Minimum Virtual Address - * - * Output Args: None - * - * Return: - * Lowest virtual address at or above vaddr_min, with at least - * sz unused bytes. TEST_ASSERT failure if no area of at least - * size sz is available. - * - * Within the VM specified by vm, locates the lowest starting virtual - * address >= vaddr_min, that has at least sz unallocated bytes. A + * Within the VM specified by @vm, locates the lowest starting guest virtual + * address >= @vaddr_min, that has at least @sz unallocated bytes. A * TEST_ASSERT failure occurs for invalid input or no area of at least - * sz unallocated bytes >= vaddr_min is available. + * @sz unallocated bytes >= @min_gva is available. */ -gva_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) { u64 pages = (sz + vm->page_size - 1) >> vm->page_shift; @@ -1464,7 +1450,7 @@ static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, * Find an unused range of virtual page addresses of at least * pages in length. */ - gva_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); + gva_t vaddr_start = vm_unused_gva_gap(vm, sz, vaddr_min); /* Map the virtual pages. */ for (gva_t vaddr = vaddr_start; pages > 0; diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c index eb9f714a535c..cd49a3440ead 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/ucall.c +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -13,7 +13,7 @@ gva_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { - gva_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); virt_map(vm, mmio_gva, mmio_gpa, 1); diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 6adf76574921..15ee8b7bfc11 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -617,7 +617,7 @@ int main(int argc, char *argv[]) * Get PTE pointers for test pages and map them inside the guest. * Use separate page for each PTE for simplicity. */ - gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); + gva = vm_unused_gva_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); for (i = 0; i < NTEST_PAGES; i++) { pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); -- cgit v1.2.3 From 3fd995905b71dac9bd77a4cc770524bdc5606212 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:19:58 -0700 Subject: KVM: selftests: Rename vm_vaddr_populate_bitmap() => vm_populate_gva_bitmap() Now that KVM selftests use gva_t instead of vm_vaddr_t, rename the helper for populating the initial GVA bitmap to drop the defunct terminology and use "vm" for the scope. Opportunistically fixup the declaration of the API, which has been broken since day 1. The flaw went unnoticed because the sole caller is defined after the weak version, i.e. can see the prototype without a previous declaration. No functional change intended. Fixes: e8b9a055fa04 ("KVM: arm64: selftests: Align VA space allocator with TTBR0") Link: https://patch.msgid.link/20260420212004.3938325-14-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 2 +- tools/testing/selftests/kvm/lib/arm64/processor.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 0239e89320e5..0fbfb2a28767 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -714,7 +714,7 @@ void vm_mem_region_reload(struct kvm_vm *vm, u32 slot); void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, u32 slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); -void vm_populate_vaddr_bitmap(struct kvm_vm *vm); +void vm_populate_gva_bitmap(struct kvm_vm *vm); gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index c4f0e37f2907..384b6c80b1e7 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -671,7 +671,7 @@ void kvm_selftest_arch_init(void) guest_modes_append_default(); } -void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +void vm_populate_gva_bitmap(struct kvm_vm *vm) { /* * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8c82b40a7448..1a1b41021cc7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -267,7 +267,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * based on the MSB of the VA. On architectures with this behavior * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. */ -__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +__weak void vm_populate_gva_bitmap(struct kvm_vm *vm) { sparsebit_set_num(vm->vpages_valid, 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); @@ -385,7 +385,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape) /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); - vm_vaddr_populate_bitmap(vm); + vm_populate_gva_bitmap(vm); /* Limit physical addresses to PA-bits. */ vm->max_gfn = vm_compute_max_gfn(vm); -- cgit v1.2.3 From 4babae4ca10a6ba642373c45734d9df93852e6ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:19:59 -0700 Subject: KVM: selftests: Rename translate_to_host_paddr() => translate_hva_to_hpa() Rename arm64's translate_to_host_paddr() to translate_hva_to_hpa() and update variable names to match, as using "vaddr" and "paddr" terminology is super confusing due to selftests using those exact names for *guest* addresses. Opportunisitically drop superfluous local page_addr and paddr variables. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-15-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/sea_to_user.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c index 7285eade4acf..fb06b9dcb3d9 100644 --- a/tools/testing/selftests/kvm/arm64/sea_to_user.c +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -56,13 +56,11 @@ static void *einj_hva; static u64 einj_hpa; static bool far_invalid; -static u64 translate_to_host_paddr(unsigned long vaddr) +static u64 translate_hva_to_hpa(unsigned long hva) { u64 pinfo; - s64 offset = vaddr / getpagesize() * sizeof(pinfo); + s64 offset = hva / getpagesize() * sizeof(pinfo); int fd; - u64 page_addr; - u64 paddr; fd = open("/proc/self/pagemap", O_RDONLY); if (fd < 0) @@ -77,9 +75,8 @@ static u64 translate_to_host_paddr(unsigned long vaddr) if ((pinfo & PAGE_PRESENT) == 0) ksft_exit_fail_perror("Page not present"); - page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT; - paddr = page_addr + (vaddr & (getpagesize() - 1)); - return paddr; + return ((pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT) + + (hva & (getpagesize() - 1)); } static void write_einj_entry(const char *einj_path, u64 val) @@ -303,7 +300,7 @@ static void vm_inject_memory_uer(struct kvm_vm *vm) ksft_print_msg("Before EINJect: data=%#lx\n", guest_data); - einj_hpa = translate_to_host_paddr((unsigned long)einj_hva); + einj_hpa = translate_hva_to_hpa((unsigned long)einj_hva); ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n", EINJ_GVA, einj_gpa, einj_hva, einj_hpa); -- cgit v1.2.3 From a662c4e03853304ff0967c756659366efdc9ea49 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:20:00 -0700 Subject: KVM: selftests: Clarify that arm64's inject_uer() takes a host PA, not a guest PA Rename inject_uer()'s @paddr to @hpa to make it more obvious that it injects an error using a host PA, not a guest PA. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-16-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/sea_to_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c index fb06b9dcb3d9..e16034852470 100644 --- a/tools/testing/selftests/kvm/arm64/sea_to_user.c +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -93,7 +93,7 @@ static void write_einj_entry(const char *einj_path, u64 val) ksft_exit_fail_perror("Failed to write EINJ entry"); } -static void inject_uer(u64 paddr) +static void inject_uer(u64 hpa) { if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) ksft_test_result_skip("EINJ table no available in firmware"); @@ -103,7 +103,7 @@ static void inject_uer(u64 paddr) write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER); write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER); - write_einj_entry(EINJ_ADDR, paddr); + write_einj_entry(EINJ_ADDR, hpa); write_einj_entry(EINJ_MASK, ~0x0UL); write_einj_entry(EINJ_NOTRIGGER, 1); write_einj_entry(EINJ_DOIT, 1); -- cgit v1.2.3 From 014dfb7b9bf3ff49261b47fbe56b42fc8ed06fc5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:20:01 -0700 Subject: KVM: selftests: Replace "vaddr" with "gva" throughout Replace all variations of "vaddr" variables in KVM selftests with "gva", with the exception of the ELF structures, as those fields are not specific to guest virtual addresses, to complete the conversion from vm_vaddr_t to gva_t. Opportunistically use gva_t instead of u64 for relevant variables, and fixup indentation as appropriate. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-17-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/access_tracking_perf_test.c | 6 +- .../testing/selftests/kvm/arm64/page_fault_test.c | 4 +- tools/testing/selftests/kvm/include/kvm_util.h | 32 +++------ tools/testing/selftests/kvm/include/memstress.h | 2 +- .../testing/selftests/kvm/include/x86/processor.h | 6 +- tools/testing/selftests/kvm/lib/arm64/processor.c | 33 +++++---- tools/testing/selftests/kvm/lib/elf.c | 10 +-- tools/testing/selftests/kvm/lib/kvm_util.c | 60 +++++++--------- .../selftests/kvm/lib/loongarch/processor.c | 25 ++++--- tools/testing/selftests/kvm/lib/memstress.c | 2 +- tools/testing/selftests/kvm/lib/riscv/processor.c | 25 ++++--- tools/testing/selftests/kvm/lib/s390/processor.c | 23 +++--- tools/testing/selftests/kvm/lib/ucall_common.c | 8 +-- tools/testing/selftests/kvm/lib/x86/processor.c | 82 ++++++++++------------ tools/testing/selftests/kvm/s390/ucontrol_test.c | 4 +- tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 10 +-- 16 files changed, 150 insertions(+), 182 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 4479aed94625..e5bbdb5bbdc3 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -123,7 +123,7 @@ static u64 pread_u64(int fd, const char *filename, u64 index) #define PAGEMAP_PRESENT (1ULL << 63) #define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) -static u64 lookup_pfn(int pagemap_fd, struct kvm_vm *vm, u64 gva) +static u64 lookup_pfn(int pagemap_fd, struct kvm_vm *vm, gva_t gva) { u64 hva = (u64)addr_gva2hva(vm, gva); u64 entry; @@ -174,7 +174,7 @@ static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; - u64 base_gva = vcpu_args->gva; + gva_t base_gva = vcpu_args->gva; u64 pages = vcpu_args->pages; u64 page; u64 still_idle = 0; @@ -193,7 +193,7 @@ static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - u64 gva = base_gva + page * memstress_args.guest_page_size; + gva_t gva = base_gva + page * memstress_args.guest_page_size; u64 pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index b92a9614d7d2..6bb3d82906b2 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -70,9 +70,9 @@ struct test_params { struct test_desc *test_desc; }; -static inline void flush_tlb_page(u64 vaddr) +static inline void flush_tlb_page(gva_t gva) { - u64 page = vaddr >> 12; + gva_t page = gva >> 12; dsb(ishst); asm volatile("tlbi vaae1is, %0" :: "r" (page)); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 0fbfb2a28767..0dcfad728edd 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -715,17 +715,17 @@ void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, u32 slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); void vm_populate_gva_bitmap(struct kvm_vm *vm); -gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); -gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min); -gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva); +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva); +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, enum kvm_mem_region_type type); -gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva, enum kvm_mem_region_type type); gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages); gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); gva_t vm_alloc_page(struct kvm_vm *vm); -void virt_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, +void virt_map(struct kvm_vm *vm, gva_t gva, u64 paddr, unsigned int npages); void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa); void *addr_gva2hva(struct kvm_vm *vm, gva_t gva); @@ -1202,27 +1202,15 @@ static inline void virt_pgd_alloc(struct kvm_vm *vm) } /* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. + * at @gva to the page starting at @paddr. */ -void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr); +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr); -static inline void virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) +static inline void virt_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) { - virt_arch_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + virt_arch_pg_map(vm, gva, paddr); + sparsebit_set(vm->vpages_mapped, gva >> vm->page_shift); } diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index e3e4b4d6a27a..abd0dca10283 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -21,7 +21,7 @@ struct memstress_vcpu_args { u64 gpa; - u64 gva; + gva_t gva; u64 pages; /* Only used by the host userspace part of the vCPU thread */ diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 4efa6c942192..15252e75aaf1 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1394,7 +1394,7 @@ static inline bool kvm_is_lbrv_enabled(void) return !!get_kvm_amd_param_integer("lbrv"); } -u64 *vm_get_pte(struct kvm_vm *vm, u64 vaddr); +u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva); u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3); u64 __xen_hypercall(u64 nr, u64 a0, void *a1); @@ -1507,9 +1507,9 @@ enum pg_level { void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, struct pte_masks *pte_masks); -void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, u64 paddr, int level); -void virt_map_level(struct kvm_vm *vm, u64 vaddr, u64 paddr, +void virt_map_level(struct kvm_vm *vm, gva_t gva, u64 paddr, u64 nr_bytes, int level); void vm_enable_tdp(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 384b6c80b1e7..0f693d8891d2 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -121,19 +121,18 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -static void _virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, +static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr, u64 flags) { u8 attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); u64 pg_attr; u64 *ptep; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); + " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((paddr % vm->page_size) == 0, "Physical address not on page boundary,\n" " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); @@ -142,26 +141,26 @@ static void _virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!*ptep) *ptep = addr_pte(vm, vm_alloc_page_table(vm), PGD_TYPE_TABLE | PTE_VALID); switch (vm->mmu.pgtable_levels) { case 4: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!*ptep) *ptep = addr_pte(vm, vm_alloc_page_table(vm), PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!*ptep) *ptep = addr_pte(vm, vm_alloc_page_table(vm), PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; break; default: TEST_FAIL("Page table levels must be 2, 3, or 4"); @@ -174,11 +173,11 @@ static void _virt_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, *ptep = addr_pte(vm, paddr, pg_attr); } -void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) { u64 attr_idx = MT_NORMAL; - _virt_pg_map(vm, vaddr, paddr, attr_idx); + _virt_pg_map(vm, gva, paddr, attr_idx); } u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) @@ -417,18 +416,18 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init) { size_t stack_size; - u64 stack_vaddr; + gva_t stack_gva; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_gva + stack_size); return vcpu; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 2288480f4e1e..b689c4df4a01 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -162,14 +162,14 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - gva_t vaddr = __vm_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE); - TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " + gva_t gva = __vm_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE); + TEST_ASSERT(gva == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" " seg_vstart: 0x%lx\n" - " vaddr: 0x%lx", - n1, seg_vstart, vaddr); - memset(addr_gva2hva(vm, vaddr), 0, seg_size); + " gva: 0x%lx", + n1, seg_vstart, gva); + memset(addr_gva2hva(vm, gva), 0, seg_size); /* TODO(lhuemill): Set permissions of each memory segment * based on the least-significant 3 bits of phdr.p_flags. */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1a1b41021cc7..e282f9abd4c7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1367,17 +1367,17 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) /* * Within the VM specified by @vm, locates the lowest starting guest virtual - * address >= @vaddr_min, that has at least @sz unallocated bytes. A + * address >= @min_gva, that has at least @sz unallocated bytes. A * TEST_ASSERT failure occurs for invalid input or no area of at least * @sz unallocated bytes >= @min_gva is available. */ -gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva) { u64 pages = (sz + vm->page_size - 1) >> vm->page_shift; /* Determine lowest permitted virtual page index. */ - u64 pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; - if ((pgidx_start * vm->page_size) < vaddr_min) + u64 pgidx_start = (min_gva + vm->page_size - 1) >> vm->page_shift; + if ((pgidx_start * vm->page_size) < min_gva) goto no_va_found; /* Loop over section with enough valid virtual page indexes. */ @@ -1414,7 +1414,7 @@ gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) } while (pgidx_start != 0); no_va_found: - TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); + TEST_FAIL("No gva of specified pages available, pages: 0x%lx", pages); /* NOT REACHED */ return -1; @@ -1436,7 +1436,7 @@ va_found: return pgidx_start * vm->page_size; } -static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, +static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, enum kvm_mem_region_type type, bool protected) { u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); @@ -1450,41 +1450,41 @@ static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, * Find an unused range of virtual page addresses of at least * pages in length. */ - gva_t vaddr_start = vm_unused_gva_gap(vm, sz, vaddr_min); + gva_t gva_start = vm_unused_gva_gap(vm, sz, min_gva); /* Map the virtual pages. */ - for (gva_t vaddr = vaddr_start; pages > 0; - pages--, vaddr += vm->page_size, paddr += vm->page_size) { + for (gva_t gva = gva_start; pages > 0; + pages--, gva += vm->page_size, paddr += vm->page_size) { - virt_pg_map(vm, vaddr, paddr); + virt_pg_map(vm, gva, paddr); } - return vaddr_start; + return gva_start; } -gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, enum kvm_mem_region_type type) { - return ____vm_alloc(vm, sz, vaddr_min, type, + return ____vm_alloc(vm, sz, min_gva, type, vm_arch_has_protected_memory(vm)); } -gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t vaddr_min, +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva, enum kvm_mem_region_type type) { - return ____vm_alloc(vm, sz, vaddr_min, type, false); + return ____vm_alloc(vm, sz, min_gva, type, false); } /* * Allocates at least sz bytes within the virtual address space of the VM * given by @vm. The allocated bytes are mapped to a virtual address >= the - * address given by @vaddr_min. Note that each allocation uses a a unique set + * address given by @min_gva. Note that each allocation uses a a unique set * of pages, with the minimum real allocation being at least a page. The * allocated physical space comes from the TEST_DATA memory region. */ -gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t vaddr_min) +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva) { - return __vm_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); + return __vm_alloc(vm, sz, min_gva, MEM_REGION_TEST_DATA); } gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages) @@ -1503,34 +1503,24 @@ gva_t vm_alloc_page(struct kvm_vm *vm) } /* - * Map a range of VM virtual address to the VM's physical address + * Map a range of VM virtual address to the VM's physical address. * - * Input Args: - * vm - Virtual Machine - * vaddr - Virtuall address to map - * paddr - VM Physical Address - * npages - The number of pages to map - * - * Output Args: None - * - * Return: None - * - * Within the VM given by @vm, creates a virtual translation for - * @npages starting at @vaddr to the page range starting at @paddr. + * Within the VM given by @vm, creates a virtual translation for @npages + * starting at @gva to the page range starting at @paddr. */ -void virt_map(struct kvm_vm *vm, u64 vaddr, u64 paddr, +void virt_map(struct kvm_vm *vm, gva_t gva, u64 paddr, unsigned int npages) { size_t page_size = vm->page_size; size_t size = npages * page_size; - TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); + TEST_ASSERT(gva + size > gva, "Vaddr overflow"); TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - virt_pg_map(vm, vaddr, paddr); + virt_pg_map(vm, gva, paddr); - vaddr += page_size; + gva += page_size; paddr += page_size; } } diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 318520f1f1b9..47e782056196 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -111,22 +111,21 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) u64 *ptep; ptep = virt_populate_pte(vm, gva, 0); - TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva); + TEST_ASSERT(*ptep != 0, "Virtual address gva: 0x%lx not mapped\n", gva); return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) { u32 prot_bits; u64 *ptep; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); + "gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((paddr % vm->page_size) == 0, "Physical address not on page boundary,\n" "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); @@ -135,7 +134,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - ptep = virt_populate_pte(vm, vaddr, 1); + ptep = virt_populate_pte(vm, gva, 1); prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; WRITE_ONCE(*ptep, paddr | prot_bits); } @@ -373,20 +372,20 @@ void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { size_t stack_size; - u64 stack_vaddr; + u64 stack_gva; struct kvm_regs regs; struct kvm_vcpu *vcpu; vcpu = __vm_vcpu_add(vm, vcpu_id); stack_size = vm->page_size; - stack_vaddr = __vm_alloc(vm, stack_size, - LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); - TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack"); + stack_gva = __vm_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_gva != 0, "No memory for vm stack"); loongarch_vcpu_setup(vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); - regs.gpr[3] = stack_vaddr + stack_size; + regs.gpr[3] = stack_gva + stack_size; vcpu_regs_set(vcpu, ®s); return vcpu; diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index b59dc3344ff3..6dcd15910a06 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -49,7 +49,7 @@ void memstress_guest_code(u32 vcpu_idx) struct memstress_args *args = &memstress_args; struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; struct guest_random_state rand_state; - u64 gva; + gva_t gva; u64 pages; u64 addr; u64 page; diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 38eb8302922a..108144fb858b 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -75,17 +75,16 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) { u64 *ptep, next_ppn; int level = vm->mmu.pgtable_levels - 1; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); + " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((paddr % vm->page_size) == 0, "Physical address not on page boundary,\n" " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); @@ -94,7 +93,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!*ptep) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | @@ -104,7 +103,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) while (level > -1) { ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + - pte_index(vm, vaddr, level) * 8; + pte_index(vm, gva, level) * 8; if (!*ptep && level > 0) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; @@ -315,16 +314,16 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { int r; size_t stack_size; - unsigned long stack_vaddr; + unsigned long stack_gva; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); @@ -344,7 +343,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); /* Setup stack pointer and program counter of guest */ - vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_gva + stack_size); /* Setup sscratch for guest_get_vcpuid() */ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 4ae0a39f426f..643e583c804c 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -47,19 +47,17 @@ static u64 virt_alloc_region(struct kvm_vm *vm, int ri) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_arch_pg_map(struct kvm_vm *vm, u64 gva, u64 gpa) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 gpa) { int ri, idx; u64 *entry; TEST_ASSERT((gva % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", - gva, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (gva >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - gva); + "Virtual address not on page boundary,\n" + " gva: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" " paddr: 0x%lx vm->page_size: 0x%x", @@ -163,7 +161,7 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); - u64 stack_vaddr; + u64 stack_gva; struct kvm_regs regs; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; @@ -171,15 +169,14 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = __vm_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); /* Setup guest registers */ vcpu_regs_get(vcpu, ®s); - regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; + regs.gprs[15] = stack_gva + (DEFAULT_STACK_PGS * getpagesize()) - 160; vcpu_regs_set(vcpu, ®s); vcpu_sregs_get(vcpu, &sregs); diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 4a8a5bc40a45..029ce21f9f2f 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -29,12 +29,12 @@ void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa) { struct ucall_header *hdr; struct ucall *uc; - gva_t vaddr; + gva_t gva; int i; - vaddr = vm_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, + gva = vm_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA); - hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + hdr = (struct ucall_header *)addr_gva2hva(vm, gva); memset(hdr, 0, sizeof(*hdr)); for (i = 0; i < KVM_MAX_VCPUS; ++i) { @@ -42,7 +42,7 @@ void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa) uc->hva = uc; } - write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + write_guest_global(vm, ucall_pool, (struct ucall_header *)gva); ucall_arch_init(vm, mmio_gpa); } diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 50848112932c..3c55980c81b2 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -207,15 +207,15 @@ void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, } static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, - u64 *parent_pte, u64 vaddr, int level) + u64 *parent_pte, gva_t gva, int level) { u64 pt_gpa = PTE_GET_PA(*parent_pte); u64 *page_table = addr_gpa2hva(vm, pt_gpa); - int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + int index = (gva >> PG_LEVEL_SHIFT(level)) & 0x1ffu; TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte), "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", - level + 1, vaddr); + level + 1, gva); return &page_table[index]; } @@ -223,12 +223,12 @@ static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, static u64 *virt_create_upper_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 *parent_pte, - u64 vaddr, + gva_t gva, u64 paddr, int current_level, int target_level) { - u64 *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level); + u64 *pte = virt_get_pte(vm, mmu, parent_pte, gva, current_level); paddr = vm_untag_gpa(vm, paddr); @@ -247,16 +247,16 @@ static u64 *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx", - current_level, vaddr); + "Cannot create hugepage at level: %u, gva: 0x%lx", + current_level, gva); TEST_ASSERT(!is_huge_pte(mmu, pte), - "Cannot create page table at level: %u, vaddr: 0x%lx", - current_level, vaddr); + "Cannot create page table at level: %u, gva: 0x%lx", + current_level, gva); } return pte; } -void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, u64 paddr, int level) { const u64 pg_size = PG_LEVEL_SIZE(level); @@ -266,11 +266,11 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, "Unknown or unsupported guest mode: 0x%x", vm->mode); - TEST_ASSERT((vaddr % pg_size) == 0, + TEST_ASSERT((gva % pg_size) == 0, "Virtual address not aligned,\n" - "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); + "gva: 0x%lx page size: 0x%lx", gva, pg_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((paddr % pg_size) == 0, "Physical address not aligned,\n" " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); @@ -291,16 +291,16 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, for (current_level = mmu->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_create_upper_pte(vm, mmu, pte, vaddr, paddr, + pte = virt_create_upper_pte(vm, mmu, pte, gva, paddr, current_level, level); if (is_huge_pte(mmu, pte)) return; } /* Fill in page table entry. */ - pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K); TEST_ASSERT(!is_present_pte(mmu, pte), - "PTE already present for 4k page at vaddr: 0x%lx", vaddr); + "PTE already present for 4k page at gva: 0x%lx", gva); *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | PTE_ALWAYS_SET_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); @@ -315,12 +315,12 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 vaddr, *pte |= PTE_S_BIT_MASK(mmu); } -void virt_arch_pg_map(struct kvm_vm *vm, u64 vaddr, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) { - __virt_pg_map(vm, &vm->mmu, vaddr, paddr, PG_LEVEL_4K); + __virt_pg_map(vm, &vm->mmu, gva, paddr, PG_LEVEL_4K); } -void virt_map_level(struct kvm_vm *vm, u64 vaddr, u64 paddr, +void virt_map_level(struct kvm_vm *vm, gva_t gva, u64 paddr, u64 nr_bytes, int level) { u64 pg_size = PG_LEVEL_SIZE(level); @@ -332,11 +332,11 @@ void virt_map_level(struct kvm_vm *vm, u64 vaddr, u64 paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, &vm->mmu, vaddr, paddr, level); - sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, + __virt_pg_map(vm, &vm->mmu, gva, paddr, level); + sparsebit_set_num(vm->vpages_mapped, gva >> vm->page_shift, nr_bytes / PAGE_SIZE); - vaddr += pg_size; + gva += pg_size; paddr += pg_size; } } @@ -356,7 +356,7 @@ static bool vm_is_target_pte(struct kvm_mmu *mmu, u64 *pte, static u64 *__vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_mmu *mmu, - u64 vaddr, + gva_t gva, int *level) { int va_width = 12 + (mmu->pgtable_levels) * 9; @@ -371,26 +371,23 @@ static u64 *__vm_get_page_table_entry(struct kvm_vm *vm, TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, "Unknown or unsupported guest mode: 0x%x", vm->mode); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - vaddr); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); /* - * Check that the vaddr is a sign-extended va_width value. + * Check that the gva is a sign-extended va_width value. */ - TEST_ASSERT(vaddr == - (((s64)vaddr << (64 - va_width) >> (64 - va_width))), + TEST_ASSERT(gva == (((s64)gva << (64 - va_width) >> (64 - va_width))), "Canonical check failed. The virtual address is invalid."); for (current_level = mmu->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_get_pte(vm, mmu, pte, vaddr, current_level); + pte = virt_get_pte(vm, mmu, pte, gva, current_level); if (vm_is_target_pte(mmu, pte, level, current_level)) return pte; } - return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); + return virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K); } u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa) @@ -400,11 +397,11 @@ u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa) return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level); } -u64 *vm_get_pte(struct kvm_vm *vm, u64 vaddr) +u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_4K; - return __vm_get_page_table_entry(vm, &vm->mmu, vaddr, &level); + return __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) @@ -825,14 +822,13 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_mp_state mp_state; struct kvm_regs regs; - gva_t stack_vaddr; + gva_t stack_gva; struct kvm_vcpu *vcpu; - stack_vaddr = __vm_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); - stack_vaddr += DEFAULT_STACK_PGS * getpagesize(); + stack_gva += DEFAULT_STACK_PGS * getpagesize(); /* * Align stack to match calling sequence requirements in section "The @@ -843,9 +839,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) * If this code is ever used to launch a vCPU with 32-bit entry point it * may need to subtract 4 bytes instead of 8 bytes. */ - TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE), + TEST_ASSERT(IS_ALIGNED(stack_gva, PAGE_SIZE), "__vm_alloc() did not provide a page-aligned address"); - stack_vaddr -= 8; + stack_gva -= 8; vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -855,7 +851,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); regs.rflags = regs.rflags | 0x2; - regs.rsp = stack_vaddr; + regs.rsp = stack_gva; vcpu_regs_set(vcpu, ®s); /* Setup the MP state */ diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index f773ba0f4641..dbdee4c39d47 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -571,7 +571,7 @@ TEST_F(uc_kvm, uc_skey) { struct kvm_s390_sie_block *sie_block = self->sie_block; struct kvm_sync_regs *sync_regs = &self->run->s.regs; - u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + u64 test_gva = VM_MEM_SIZE - (SZ_1M / 2); struct kvm_run *run = self->run; const u8 skeyvalue = 0x34; @@ -583,7 +583,7 @@ TEST_F(uc_kvm, uc_skey) /* set register content for test_skey_asm to access not mapped memory */ sync_regs->gprs[1] = skeyvalue; sync_regs->gprs[5] = self->base_gpa; - sync_regs->gprs[6] = test_vaddr; + sync_regs->gprs[6] = test_gva; run->kvm_dirty_regs |= KVM_SYNC_GPRS; /* DAT disabled + 64 bit mode */ diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index d2e2410f748b..39ce9a9369f5 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -393,7 +393,7 @@ int main(int argc, char *argv[]) int run_secs = 0; int delay_usecs = 0; struct test_data_page *data; - gva_t test_data_page_vaddr; + gva_t test_data_page_gva; bool migrate = false; pthread_t threads[2]; struct thread_params params[2]; @@ -414,14 +414,14 @@ int main(int argc, char *argv[]) params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); - test_data_page_vaddr = vm_alloc_page(vm); - data = addr_gva2hva(vm, test_data_page_vaddr); + test_data_page_gva = vm_alloc_page(vm); + data = addr_gva2hva(vm, test_data_page_gva); memset(data, 0, sizeof(*data)); params[0].data = data; params[1].data = data; - vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); - vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); + vcpu_args_set(params[0].vcpu, 1, test_data_page_gva); + vcpu_args_set(params[1].vcpu, 1, test_data_page_gva); pipis_rcvd = (u64 *)addr_gva2hva(vm, (u64)&ipis_rcvd); params[0].pipis_rcvd = pipis_rcvd; -- cgit v1.2.3 From df079910f9814ddb4239b4f9f70a2272a7e4116a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:20:02 -0700 Subject: KVM: selftests: Replace "u64 gpa" with "gpa_t" throughout Use gpa_t instead of u64 for obvious declarations of GPA variables. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-18-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 2 +- tools/testing/selftests/kvm/include/kvm_util.h | 26 +++++++++++----------- tools/testing/selftests/kvm/include/memstress.h | 4 ++-- .../testing/selftests/kvm/include/x86/processor.h | 4 ++-- tools/testing/selftests/kvm/lib/kvm_util.c | 14 ++++++------ tools/testing/selftests/kvm/lib/s390/processor.c | 2 +- .../kvm/memslot_modification_stress_test.c | 2 +- tools/testing/selftests/kvm/memslot_perf_test.c | 10 ++++----- tools/testing/selftests/kvm/mmu_stress_test.c | 4 ++-- .../testing/selftests/kvm/pre_fault_memory_test.c | 4 ++-- tools/testing/selftests/kvm/s390/ucontrol_test.c | 2 +- .../testing/selftests/kvm/set_memory_region_test.c | 2 +- .../kvm/x86/private_mem_conversions_test.c | 24 ++++++++++---------- .../kvm/x86/smaller_maxphyaddr_emulation_test.c | 2 +- .../selftests/kvm/x86/svm_nested_vmcb12_gpa.c | 8 +++---- 15 files changed, 55 insertions(+), 55 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 9cbd3ad7f44a..d6528c6f5e03 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -489,7 +489,7 @@ static void test_guest_memfd_guest(void) * the guest's code, stack, and page tables, and low memory contains * the PCI hole and other MMIO regions that need to be avoided. */ - const u64 gpa = SZ_4G; + const gpa_t gpa = SZ_4G; const int slot = 1; struct kvm_vcpu *vcpu; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 0dcfad728edd..0d9f11be9806 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -114,7 +114,7 @@ struct kvm_vm { gpa_t ucall_mmio_addr; gva_t handlers; u32 dirty_ring_size; - u64 gpa_tag_mask; + gpa_t gpa_tag_mask; /* * "mmu" is the guest's stage-1, with a short name because the vast @@ -418,7 +418,7 @@ static inline void vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0) vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } -static inline void vm_set_memory_attributes(struct kvm_vm *vm, u64 gpa, +static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa, u64 size, u64 attributes) { struct kvm_memory_attributes attr = { @@ -439,28 +439,28 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, u64 gpa, } -static inline void vm_mem_set_private(struct kvm_vm *vm, u64 gpa, +static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa, u64 size) { vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); } -static inline void vm_mem_set_shared(struct kvm_vm *vm, u64 gpa, +static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa, u64 size) { vm_set_memory_attributes(vm, gpa, size, 0); } -void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 gpa, u64 size, +void vm_guest_mem_fallocate(struct kvm_vm *vm, gpa_t gpa, u64 size, bool punch_hole); -static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, u64 gpa, +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, gpa_t gpa, u64 size) { vm_guest_mem_fallocate(vm, gpa, size, true); } -static inline void vm_guest_mem_allocate(struct kvm_vm *vm, u64 gpa, +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, gpa_t gpa, u64 size) { vm_guest_mem_fallocate(vm, gpa, size, false); @@ -685,21 +685,21 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, u64 size, } void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva); + gpa_t gpa, u64 size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva); + gpa_t gpa, u64 size, void *hva); void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva, + gpa_t gpa, u64 size, void *hva, u32 guest_memfd, u64 guest_memfd_offset); int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva, + gpa_t gpa, u64 size, void *hva, u32 guest_memfd, u64 guest_memfd_offset); void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, u32 slot, u64 npages, u32 flags); + gpa_t gpa, u32 slot, u64 npages, u32 flags); void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, u32 slot, u64 npages, u32 flags, + gpa_t gpa, u32 slot, u64 npages, u32 flags, int guest_memfd_fd, u64 guest_memfd_offset); #ifndef vm_arch_has_protected_memory diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index abd0dca10283..0d1d6230cc05 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -20,7 +20,7 @@ #define MEMSTRESS_MEM_SLOT_INDEX 1 struct memstress_vcpu_args { - u64 gpa; + gpa_t gpa; gva_t gva; u64 pages; @@ -32,7 +32,7 @@ struct memstress_vcpu_args { struct memstress_args { struct kvm_vm *vm; /* The starting address and size of the guest test region. */ - u64 gpa; + gpa_t gpa; u64 size; u64 guest_page_size; u32 random_seed; diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 15252e75aaf1..fc7efd722229 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1400,12 +1400,12 @@ u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3); u64 __xen_hypercall(u64 nr, u64 a0, void *a1); void xen_hypercall(u64 nr, u64 a0, void *a1); -static inline u64 __kvm_hypercall_map_gpa_range(u64 gpa, u64 size, u64 flags) +static inline u64 __kvm_hypercall_map_gpa_range(gpa_t gpa, u64 size, u64 flags) { return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); } -static inline void kvm_hypercall_map_gpa_range(u64 gpa, u64 size, u64 flags) +static inline void kvm_hypercall_map_gpa_range(gpa_t gpa, u64 size, u64 flags) { u64 ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e282f9abd4c7..905fa214099d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -919,7 +919,7 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva) + gpa_t gpa, u64 size, void *hva) { struct kvm_userspace_memory_region region = { .slot = slot, @@ -933,7 +933,7 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, } void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva) + gpa_t gpa, u64 size, void *hva) { int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); @@ -946,7 +946,7 @@ void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva, + gpa_t gpa, u64 size, void *hva, u32 guest_memfd, u64 guest_memfd_offset) { struct kvm_userspace_memory_region2 region = { @@ -965,7 +965,7 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, } void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, - u64 gpa, u64 size, void *hva, + gpa_t gpa, u64 size, void *hva, u32 guest_memfd, u64 guest_memfd_offset) { int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, @@ -978,7 +978,7 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, u32 slot, u64 npages, u32 flags, + gpa_t gpa, u32 slot, u64 npages, u32 flags, int guest_memfd, u64 guest_memfd_offset) { int ret; @@ -1141,7 +1141,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - u64 gpa, u32 slot, u64 npages, u32 flags) + gpa_t gpa, u32 slot, u64 npages, u32 flags) { vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); } @@ -1278,7 +1278,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 base, u64 size, const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); struct userspace_mem_region *region; u64 end = base + size; - u64 gpa, len; + gpa_t gpa, len; off_t fd_offset; int ret; diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 643e583c804c..77a7b6965812 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -47,7 +47,7 @@ static u64 virt_alloc_region(struct kvm_vm *vm, int ri) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 gpa) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { int ri, idx; u64 *entry; diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 9d7c4afab961..9c7578a098c3 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -58,7 +58,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, u64 nr_modifications) { u64 pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size; - u64 gpa; + gpa_t gpa; int i; /* diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 51f8be50c7e4..3d02db371422 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -186,9 +186,9 @@ static void wait_for_vcpu(void) "sem_timedwait() failed: %d", errno); } -static void *vm_gpa2hva(struct vm_data *data, u64 gpa, u64 *rempages) +static void *vm_gpa2hva(struct vm_data *data, gpa_t gpa, u64 *rempages) { - u64 gpage, pgoffs; + gpa_t gpage, pgoffs; u32 slot, slotoffs; void *base; u32 guest_page_size = data->vm->page_size; @@ -332,7 +332,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots, for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { u64 npages; - u64 gpa; + gpa_t gpa; npages = data->pages_per_slot; if (slot == data->nslots) @@ -638,7 +638,7 @@ static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) static void test_memslot_do_unmap(struct vm_data *data, u64 offsp, u64 count) { - u64 gpa, ctr; + gpa_t gpa, ctr; u32 guest_page_size = data->vm->page_size; for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { @@ -663,7 +663,7 @@ static void test_memslot_do_unmap(struct vm_data *data, static void test_memslot_map_unmap_check(struct vm_data *data, u64 offsp, u64 valexp) { - u64 gpa; + gpa_t gpa; u64 *val; u32 guest_page_size = data->vm->page_size; diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index e0975a5dcff1..54d281419d31 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -22,7 +22,7 @@ static bool all_vcpus_hit_ro_fault; static void guest_code(u64 start_gpa, u64 end_gpa, u64 stride) { - u64 gpa; + gpa_t gpa; int i; for (i = 0; i < 2; i++) { @@ -206,7 +206,7 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, u64 start_gpa, u64 end_gpa) { struct vcpu_info *info; - u64 gpa, nr_bytes; + gpa_t gpa, nr_bytes; pthread_t *threads; int i; diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index bfdaaeed3a8c..fcb57fd034e6 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -33,7 +33,7 @@ static void guest_code(u64 base_gva) struct slot_worker_data { struct kvm_vm *vm; - u64 gpa; + gpa_t gpa; u32 flags; bool worker_ready; bool prefault_ready; @@ -161,7 +161,7 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, static void __test_pre_fault_memory(unsigned long vm_type, bool private) { - u64 gpa, gva, alignment, guest_page_size; + gpa_t gpa, gva, alignment, guest_page_size; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, .type = vm_type, diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index dbdee4c39d47..b8c6f37b53e0 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -269,7 +269,7 @@ TEST(uc_cap_hpage) } /* calculate host virtual addr from guest physical addr */ -static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, gpa_t gpa) { return (void *)(self->base_hva - self->base_gpa + gpa); } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 5551dd0f9fad..9b919a231c93 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -112,7 +112,7 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, { struct kvm_vm *vm; u64 *hva; - u64 gpa; + gpa_t gpa; vm = vm_create_with_one_vcpu(vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 27675d7d04c0..1d2f5d4fd45d 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -38,7 +38,7 @@ do { \ pattern, i, gpa + i, mem[i]); \ } while (0) -static void memcmp_h(u8 *mem, u64 gpa, u8 pattern, size_t size) +static void memcmp_h(u8 *mem, gpa_t gpa, u8 pattern, size_t size) { size_t i; @@ -70,13 +70,13 @@ enum ucall_syncs { SYNC_PRIVATE, }; -static void guest_sync_shared(u64 gpa, u64 size, +static void guest_sync_shared(gpa_t gpa, u64 size, u8 current_pattern, u8 new_pattern) { GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); } -static void guest_sync_private(u64 gpa, u64 size, u8 pattern) +static void guest_sync_private(gpa_t gpa, u64 size, u8 pattern) { GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); } @@ -86,7 +86,7 @@ static void guest_sync_private(u64 gpa, u64 size, u8 pattern) #define MAP_GPA_SHARED BIT(1) #define MAP_GPA_DO_FALLOCATE BIT(2) -static void guest_map_mem(u64 gpa, u64 size, bool map_shared, +static void guest_map_mem(gpa_t gpa, u64 size, bool map_shared, bool do_fallocate) { u64 flags = MAP_GPA_SET_ATTRIBUTES; @@ -98,12 +98,12 @@ static void guest_map_mem(u64 gpa, u64 size, bool map_shared, kvm_hypercall_map_gpa_range(gpa, size, flags); } -static void guest_map_shared(u64 gpa, u64 size, bool do_fallocate) +static void guest_map_shared(gpa_t gpa, u64 size, bool do_fallocate) { guest_map_mem(gpa, size, true, do_fallocate); } -static void guest_map_private(u64 gpa, u64 size, bool do_fallocate) +static void guest_map_private(gpa_t gpa, u64 size, bool do_fallocate) { guest_map_mem(gpa, size, false, do_fallocate); } @@ -134,7 +134,7 @@ static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate) memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - u64 gpa = base_gpa + test_ranges[i].offset; + gpa_t gpa = base_gpa + test_ranges[i].offset; u64 size = test_ranges[i].size; u8 p1 = 0x11; u8 p2 = 0x22; @@ -214,7 +214,7 @@ skip: } } -static void guest_punch_hole(u64 gpa, u64 size) +static void guest_punch_hole(gpa_t gpa, u64 size) { /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ u64 flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; @@ -239,7 +239,7 @@ static void guest_test_punch_hole(u64 base_gpa, bool precise) guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - u64 gpa = base_gpa + test_ranges[i].offset; + gpa_t gpa = base_gpa + test_ranges[i].offset; u64 size = test_ranges[i].size; /* @@ -289,7 +289,7 @@ static void guest_code(u64 base_gpa) static void handle_exit_hypercall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - u64 gpa = run->hypercall.args[0]; + gpa_t gpa = run->hypercall.args[0]; u64 size = run->hypercall.args[1] * PAGE_SIZE; bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; @@ -337,7 +337,7 @@ static void *__test_mem_conversions(void *__vcpu) case UCALL_ABORT: REPORT_GUEST_ASSERT(uc); case UCALL_SYNC: { - u64 gpa = uc.args[1]; + gpa_t gpa = uc.args[1]; size_t size = uc.args[2]; size_t i; @@ -402,7 +402,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_v KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); for (i = 0; i < nr_vcpus; i++) { - u64 gpa = BASE_DATA_GPA + i * per_cpu_size; + gpa_t gpa = BASE_DATA_GPA + i * per_cpu_size; vcpu_args_set(vcpus[i], 1, gpa); diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index 27cded643699..3dca85e95478 100644 --- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c @@ -48,7 +48,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; u64 *hva; - u64 gpa; + gpa_t gpa; int rc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c index ae8a10913af7..a4935ce2fb99 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c @@ -28,28 +28,28 @@ static void l2_code(void) vmcall(); } -static void l1_vmrun(struct svm_test_data *svm, u64 gpa) +static void l1_vmrun(struct svm_test_data *svm, gpa_t gpa) { generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); asm volatile ("vmrun %[gpa]" : : [gpa] "a" (gpa) : "memory"); } -static void l1_vmload(struct svm_test_data *svm, u64 gpa) +static void l1_vmload(struct svm_test_data *svm, gpa_t gpa) { generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); asm volatile ("vmload %[gpa]" : : [gpa] "a" (gpa) : "memory"); } -static void l1_vmsave(struct svm_test_data *svm, u64 gpa) +static void l1_vmsave(struct svm_test_data *svm, gpa_t gpa) { generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); asm volatile ("vmsave %[gpa]" : : [gpa] "a" (gpa) : "memory"); } -static void l1_vmexit(struct svm_test_data *svm, u64 gpa) +static void l1_vmexit(struct svm_test_data *svm, gpa_t gpa) { generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); -- cgit v1.2.3 From abc374191dc22c4b36d01c699d9122588ce80101 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:20:03 -0700 Subject: KVM: selftests: Replace "u64 nested_paddr" with "gpa_t l2_gpa" In x86's nested TDP APIs, use the appropriate gpa_t typedef and rename variables from nested_paddr to l2_gpa to match KVM x86's nomenclature. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-19-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86/processor.c | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index fc7efd722229..97dc887658c3 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1514,7 +1514,7 @@ void virt_map_level(struct kvm_vm *vm, gva_t gva, u64 paddr, void vm_enable_tdp(struct kvm_vm *vm); bool kvm_cpu_has_tdp(void); -void tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, u64 size); +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, u64 paddr, u64 size); void tdp_identity_map_default_memslots(struct kvm_vm *vm); void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size); u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 3c55980c81b2..892cc517d9f1 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -495,26 +495,24 @@ bool kvm_cpu_has_tdp(void) return kvm_cpu_has_ept() || kvm_cpu_has_npt(); } -void __tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, - u64 size, int level) +void __tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, u64 paddr, u64 size, int level) { size_t page_size = PG_LEVEL_SIZE(level); size_t npages = size / page_size; - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(l2_gpa + size > l2_gpa, "L2 GPA overflow"); TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - __virt_pg_map(vm, &vm->stage2_mmu, nested_paddr, paddr, level); - nested_paddr += page_size; + __virt_pg_map(vm, &vm->stage2_mmu, l2_gpa, paddr, level); + l2_gpa += page_size; paddr += page_size; } } -void tdp_map(struct kvm_vm *vm, u64 nested_paddr, u64 paddr, - u64 size) +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, u64 paddr, u64 size) { - __tdp_map(vm, nested_paddr, paddr, size, PG_LEVEL_4K); + __tdp_map(vm, l2_gpa, paddr, size, PG_LEVEL_4K); } /* Prepare an identity extended page table that maps all the -- cgit v1.2.3 From dfd2a8b07c6cc94145e11d87d2f11137d6444854 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 14:20:04 -0700 Subject: KVM: selftests: Replace "paddr" with "gpa" throughout Replace all variations of "paddr" variables in KVM selftests with "gpa", with the exception of the ELF structures, as those fields are not specific to guest virtual addresses, to complete the conversion from vm_paddr_t to gpa_t. No functional change intended. Link: https://patch.msgid.link/20260420212004.3938325-20-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/arm64/sea_to_user.c | 2 +- tools/testing/selftests/kvm/include/kvm_util.h | 23 +++++----- .../testing/selftests/kvm/include/x86/processor.h | 6 +-- tools/testing/selftests/kvm/lib/arm64/processor.c | 22 ++++----- tools/testing/selftests/kvm/lib/kvm_util.c | 53 +++++++++++----------- .../selftests/kvm/lib/loongarch/processor.c | 14 +++--- tools/testing/selftests/kvm/lib/riscv/processor.c | 16 +++---- tools/testing/selftests/kvm/lib/s390/processor.c | 12 ++--- tools/testing/selftests/kvm/lib/x86/processor.c | 50 ++++++++++---------- 9 files changed, 98 insertions(+), 100 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c index e16034852470..e96d8982c28b 100644 --- a/tools/testing/selftests/kvm/arm64/sea_to_user.c +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -275,7 +275,7 @@ static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) vm_userspace_mem_region_add( /*vm=*/vm, /*src_type=*/src_type, - /*guest_paddr=*/start_gpa, + /*gpa=*/start_gpa, /*slot=*/1, /*npages=*/num_guest_pages, /*flags=*/0); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 0d9f11be9806..2ecaaa0e9965 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -725,7 +725,7 @@ gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages); gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); gva_t vm_alloc_page(struct kvm_vm *vm); -void virt_map(struct kvm_vm *vm, gva_t gva, u64 paddr, +void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, unsigned int npages); void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa); void *addr_gva2hva(struct kvm_vm *vm, gva_t gva); @@ -990,21 +990,20 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); const char *exit_reason_str(unsigned int exit_reason); -gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, u32 memslot); -gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - gpa_t paddr_min, u32 memslot, - bool protected); +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot); +gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, gpa_t min_gpa, + u32 memslot, bool protected); gpa_t vm_alloc_page_table(struct kvm_vm *vm); static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - gpa_t paddr_min, u32 memslot) + gpa_t min_gpa, u32 memslot) { /* * By default, allocate memory as protected for VMs that support * protected memory, as the majority of memory for such VMs is * protected, i.e. using shared memory is effectively opt-in. */ - return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, + return __vm_phy_pages_alloc(vm, num, min_gpa, memslot, vm_arch_has_protected_memory(vm)); } @@ -1203,13 +1202,13 @@ static inline void virt_pgd_alloc(struct kvm_vm *vm) /* * Within @vm, creates a virtual translation for the page starting - * at @gva to the page starting at @paddr. + * at @gva to the page starting at @gpa. */ -void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr); +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa); -static inline void virt_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) +static inline void virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - virt_arch_pg_map(vm, gva, paddr); + virt_arch_pg_map(vm, gva, gpa); sparsebit_set(vm->vpages_mapped, gva >> vm->page_shift); } @@ -1280,7 +1279,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); void kvm_arch_vm_release(struct kvm_vm *vm); -bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t paddr); +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa); u32 guest_get_vcpuid(void); diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 97dc887658c3..77f576ee7789 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1508,13 +1508,13 @@ void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, struct pte_masks *pte_masks); void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, - u64 paddr, int level); -void virt_map_level(struct kvm_vm *vm, gva_t gva, u64 paddr, + gpa_t gpa, int level); +void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa, u64 nr_bytes, int level); void vm_enable_tdp(struct kvm_vm *vm); bool kvm_cpu_has_tdp(void); -void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, u64 paddr, u64 size); +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size); void tdp_identity_map_default_memslots(struct kvm_vm *vm); void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size); u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 0f693d8891d2..01325bf4d36f 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -121,7 +121,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr, +static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, u64 flags) { u8 attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); @@ -133,13 +133,13 @@ static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr, " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), "Invalid virtual address, gva: 0x%lx", gva); - TEST_ASSERT((paddr % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!*ptep) @@ -170,14 +170,14 @@ static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr, if (!use_lpa2_pte_format(vm)) pg_attr |= PTE_SHARED; - *ptep = addr_pte(vm, paddr, pg_attr); + *ptep = addr_pte(vm, gpa, pg_attr); } -void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { u64 attr_idx = MT_NORMAL; - _virt_pg_map(vm, gva, paddr, attr_idx); + _virt_pg_map(vm, gva, gpa, attr_idx); } u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 905fa214099d..2a76eca7029d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1027,8 +1027,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_FAIL("A mem region with the requested slot " "already exists.\n" - " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" - " existing slot: %u paddr: 0x%lx size: 0x%lx", + " requested slot: %u gpa: 0x%lx npages: 0x%lx\n" + " existing slot: %u gpa: 0x%lx size: 0x%lx", slot, gpa, npages, region->region.slot, (u64)region->region.guest_phys_addr, (u64)region->region.memory_size); @@ -1442,7 +1442,7 @@ static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); - gpa_t paddr = __vm_phy_pages_alloc(vm, pages, + gpa_t gpa = __vm_phy_pages_alloc(vm, pages, KVM_UTIL_MIN_PFN * vm->page_size, vm->memslots[type], protected); @@ -1454,9 +1454,9 @@ static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, /* Map the virtual pages. */ for (gva_t gva = gva_start; pages > 0; - pages--, gva += vm->page_size, paddr += vm->page_size) { + pages--, gva += vm->page_size, gpa += vm->page_size) { - virt_pg_map(vm, gva, paddr); + virt_pg_map(vm, gva, gpa); } return gva_start; @@ -1506,22 +1506,21 @@ gva_t vm_alloc_page(struct kvm_vm *vm) * Map a range of VM virtual address to the VM's physical address. * * Within the VM given by @vm, creates a virtual translation for @npages - * starting at @gva to the page range starting at @paddr. + * starting at @gva to the page range starting at @gpa. */ -void virt_map(struct kvm_vm *vm, gva_t gva, u64 paddr, - unsigned int npages) +void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, unsigned int npages) { size_t page_size = vm->page_size; size_t size = npages * page_size; TEST_ASSERT(gva + size > gva, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + TEST_ASSERT(gpa + size > gpa, "Paddr overflow"); while (npages--) { - virt_pg_map(vm, gva, paddr); + virt_pg_map(vm, gva, gpa); gva += page_size; - paddr += page_size; + gpa += page_size; } } @@ -2008,7 +2007,7 @@ const char *exit_reason_str(unsigned int exit_reason) * Input Args: * vm - Virtual Machine * num - number of pages - * paddr_min - Physical address minimum + * min_gpa - Physical address minimum * memslot - Memory region to allocate page from * protected - True if the pages will be used as protected/private memory * @@ -2018,12 +2017,12 @@ const char *exit_reason_str(unsigned int exit_reason) * Starting physical address * * Within the VM specified by vm, locates a range of available physical - * pages at or above paddr_min. If found, the pages are marked as in use + * pages at or above min_gpa. If found, the pages are marked as in use * and their base address is returned. A TEST_ASSERT failure occurs if - * not enough pages are available at or above paddr_min. + * not enough pages are available at or above min_gpa. */ gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - gpa_t paddr_min, u32 memslot, + gpa_t min_gpa, u32 memslot, bool protected) { struct userspace_mem_region *region; @@ -2031,16 +2030,16 @@ gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, TEST_ASSERT(num > 0, "Must allocate at least one page"); - TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " + TEST_ASSERT((min_gpa % vm->page_size) == 0, "Min physical address " "not divisible by page size.\n" - " paddr_min: 0x%lx page_size: 0x%x", - paddr_min, vm->page_size); + " min_gpa: 0x%lx page_size: 0x%x", + min_gpa, vm->page_size); region = memslot2region(vm, memslot); TEST_ASSERT(!protected || region->protected_phy_pages, "Region doesn't support protected memory"); - base = pg = paddr_min >> vm->page_shift; + base = pg = min_gpa >> vm->page_shift; do { for (; pg < base + num; ++pg) { if (!sparsebit_is_set(region->unused_phy_pages, pg)) { @@ -2052,8 +2051,8 @@ gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, if (pg == 0) { fprintf(stderr, "No guest physical page available, " - "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", - paddr_min, vm->page_size, memslot); + "min_gpa: 0x%lx page_size: 0x%x memslot: %u\n", + min_gpa, vm->page_size, memslot); fputs("---- vm dump ----\n", stderr); vm_dump(stderr, vm, 2); abort(); @@ -2068,9 +2067,9 @@ gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, return base * vm->page_size; } -gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t paddr_min, u32 memslot) +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot) { - return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); + return vm_phy_pages_alloc(vm, 1, min_gpa, memslot); } gpa_t vm_alloc_page_table(struct kvm_vm *vm) @@ -2287,7 +2286,7 @@ void __attribute((constructor)) kvm_selftest_init(void) kvm_selftest_arch_init(); } -bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t paddr) +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa) { sparsebit_idx_t pg = 0; struct userspace_mem_region *region; @@ -2295,10 +2294,10 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t paddr) if (!vm_arch_has_protected_memory(vm)) return false; - region = userspace_mem_region_find(vm, paddr, paddr); - TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr); + region = userspace_mem_region_find(vm, gpa, gpa); + TEST_ASSERT(region, "No vm physical memory at 0x%lx", gpa); - pg = paddr >> vm->page_shift; + pg = gpa >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 47e782056196..64d91fb76522 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -116,7 +116,7 @@ gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { u32 prot_bits; u64 *ptep; @@ -126,17 +126,17 @@ void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) "gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), "Invalid virtual address, gva: 0x%lx", gva); - TEST_ASSERT((paddr % vm->page_size) == 0, + TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); + "gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); ptep = virt_populate_pte(vm, gva, 1); prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; - WRITE_ONCE(*ptep, paddr | prot_bits); + WRITE_ONCE(*ptep, gpa | prot_bits); } static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 108144fb858b..ded5429f3448 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -75,7 +75,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) vm->mmu.pgd_created = true; } -void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { u64 *ptep, next_ppn; int level = vm->mmu.pgtable_levels - 1; @@ -85,13 +85,13 @@ void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), "Invalid virtual address, gva: 0x%lx", gva); - TEST_ASSERT((paddr % vm->page_size) == 0, + TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!*ptep) { @@ -113,8 +113,8 @@ void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) level--; } - paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; - *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | + gpa = gpa >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (gpa << PGTBL_PTE_ADDR_SHIFT) | PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; } diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 77a7b6965812..a9adb3782b35 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -12,7 +12,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { - gpa_t paddr; + gpa_t gpa; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); @@ -20,12 +20,12 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) if (vm->mmu.pgd_created) return; - paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, + gpa = vm_phy_pages_alloc(vm, PAGES_PER_REGION, KVM_GUEST_PAGE_TABLE_MIN_PADDR, vm->memslots[MEM_REGION_PT]); - memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); + memset(addr_gpa2hva(vm, gpa), 0xff, PAGES_PER_REGION * vm->page_size); - vm->mmu.pgd = paddr; + vm->mmu.pgd = gpa; vm->mmu.pgd_created = true; } @@ -60,11 +60,11 @@ void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", + " gpa: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", gva, vm->max_gfn, vm->page_size); /* Walk through region and segment tables */ diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 892cc517d9f1..b51467d70f6e 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -224,20 +224,20 @@ static u64 *virt_create_upper_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, u64 *parent_pte, gva_t gva, - u64 paddr, + gpa_t gpa, int current_level, int target_level) { u64 *pte = virt_get_pte(vm, mmu, parent_pte, gva, current_level); - paddr = vm_untag_gpa(vm, paddr); + gpa = vm_untag_gpa(vm, gpa); if (!is_present_pte(mmu, pte)) { *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | PTE_ALWAYS_SET_MASK(mmu); if (current_level == target_level) - *pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); + *pte |= PTE_HUGE_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK); else *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; } else { @@ -257,7 +257,7 @@ static u64 *virt_create_upper_pte(struct kvm_vm *vm, } void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, - u64 paddr, int level) + gpa_t gpa, int level) { const u64 pg_size = PG_LEVEL_SIZE(level); u64 *pte = &mmu->pgd; @@ -271,15 +271,15 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, "gva: 0x%lx page size: 0x%lx", gva, pg_size); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), "Invalid virtual address, gva: 0x%lx", gva); - TEST_ASSERT((paddr % pg_size) == 0, + TEST_ASSERT((gpa % pg_size) == 0, "Physical address not aligned,\n" - " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + " gpa: 0x%lx page size: 0x%lx", gpa, pg_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, - "Unexpected bits in paddr: %lx", paddr); + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + TEST_ASSERT(vm_untag_gpa(vm, gpa) == gpa, + "Unexpected bits in gpa: %lx", gpa); TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu), "X and NX bit masks cannot be used simultaneously"); @@ -291,7 +291,7 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, for (current_level = mmu->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_create_upper_pte(vm, mmu, pte, gva, paddr, + pte = virt_create_upper_pte(vm, mmu, pte, gva, gpa, current_level, level); if (is_huge_pte(mmu, pte)) return; @@ -303,24 +303,24 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, "PTE already present for 4k page at gva: 0x%lx", gva); *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | - PTE_ALWAYS_SET_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); + PTE_ALWAYS_SET_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final * leaf PTE needs manually set the C/S-bit. */ - if (vm_is_gpa_protected(vm, paddr)) + if (vm_is_gpa_protected(vm, gpa)) *pte |= PTE_C_BIT_MASK(mmu); else *pte |= PTE_S_BIT_MASK(mmu); } -void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, u64 paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - __virt_pg_map(vm, &vm->mmu, gva, paddr, PG_LEVEL_4K); + __virt_pg_map(vm, &vm->mmu, gva, gpa, PG_LEVEL_4K); } -void virt_map_level(struct kvm_vm *vm, gva_t gva, u64 paddr, +void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa, u64 nr_bytes, int level) { u64 pg_size = PG_LEVEL_SIZE(level); @@ -332,12 +332,12 @@ void virt_map_level(struct kvm_vm *vm, gva_t gva, u64 paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, &vm->mmu, gva, paddr, level); + __virt_pg_map(vm, &vm->mmu, gva, gpa, level); sparsebit_set_num(vm->vpages_mapped, gva >> vm->page_shift, nr_bytes / PAGE_SIZE); gva += pg_size; - paddr += pg_size; + gpa += pg_size; } } @@ -495,24 +495,24 @@ bool kvm_cpu_has_tdp(void) return kvm_cpu_has_ept() || kvm_cpu_has_npt(); } -void __tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, u64 paddr, u64 size, int level) +void __tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size, int level) { size_t page_size = PG_LEVEL_SIZE(level); size_t npages = size / page_size; TEST_ASSERT(l2_gpa + size > l2_gpa, "L2 GPA overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + TEST_ASSERT(gpa + size > gpa, "GPA overflow"); while (npages--) { - __virt_pg_map(vm, &vm->stage2_mmu, l2_gpa, paddr, level); + __virt_pg_map(vm, &vm->stage2_mmu, l2_gpa, gpa, level); l2_gpa += page_size; - paddr += page_size; + gpa += page_size; } } -void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, u64 paddr, u64 size) +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size) { - __tdp_map(vm, l2_gpa, paddr, size, PG_LEVEL_4K); + __tdp_map(vm, l2_gpa, gpa, size, PG_LEVEL_4K); } /* Prepare an identity extended page table that maps all the -- cgit v1.2.3 From 68a135013bf73dfd6a277f76fc4e088b0f3dfa79 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Mon, 23 Mar 2026 12:59:47 +0000 Subject: btrfs: fix bytes_may_use leak in move_existing_remap() If the call to btrfs_reserve_extent() in move_existing_remap() returns a smaller extent than we asked for, currently we're not undoing the bytes_may_use change that we made. Fix this by calling btrfs_space_info_update_bytes_may_use() again for the difference. Fixes: bbea42dfb91f ("btrfs: move existing remaps before relocating block group") Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 1c42c5180bdd..3d1756b61162 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4174,6 +4174,12 @@ static int move_existing_remap(struct btrfs_fs_info *fs_info, return ret; } + if (ins.offset < length) { + spin_lock(&sinfo->lock); + btrfs_space_info_update_bytes_may_use(sinfo, ins.offset - length); + spin_unlock(&sinfo->lock); + } + dest_addr = ins.objectid; dest_length = ins.offset; -- cgit v1.2.3 From 9b8824533d75fb199a3fb0f6147ffcca64b5caf8 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Mon, 23 Mar 2026 12:59:57 +0000 Subject: btrfs: fix bytes_may_use leak in do_remap_reloc_trans() If the call to btrfs_reserve_extent() in do_remap_reloc_trans() returns a smaller extent than we asked for, currently we're not undoing the bytes_may_use change that we made. Fix this by calling btrfs_space_info_update_bytes_may_use() again for the difference. Fixes: fd6594b1446c ("btrfs: replace identity remaps with actual remaps when doing relocations") Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3d1756b61162..ad433b7ca919 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -5006,6 +5006,12 @@ static int do_remap_reloc_trans(struct btrfs_fs_info *fs_info, return ret; } + if (ins.offset < remap_length) { + spin_lock(&sinfo->lock); + btrfs_space_info_update_bytes_may_use(sinfo, ins.offset - remap_length); + spin_unlock(&sinfo->lock); + } + made_reservation = true; new_addr = ins.objectid; -- cgit v1.2.3 From 73db0fad673af844772de964eebecae60eda0496 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Mon, 23 Mar 2026 17:16:43 +0000 Subject: btrfs: abort transaction in do_remap_reloc_trans() on failure If one of the calls made by do_remap_reloc_trans() fails, we can leave the remap tree in an inconsistent state. Abort the transaction if this happens, to prevent the corrupt state from reaching the disk. Reviewed-by: Johannes Thumshirn Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ad433b7ca919..3e48d1a59fb3 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -5035,21 +5035,27 @@ static int do_remap_reloc_trans(struct btrfs_fs_info *fs_info, if (bg_needs_free_space) { ret = btrfs_add_block_group_free_space(trans, dest_bg); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto fail; + } } ret = copy_remapped_data(fs_info, start, new_addr, length); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto fail; + } ret = btrfs_remove_from_free_space_tree(trans, new_addr, length); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); goto fail; + } ret = add_remap_entry(trans, path, src_bg, start, new_addr, length); if (ret) { - btrfs_add_to_free_space_tree(trans, new_addr, length); + btrfs_abort_transaction(trans, ret); goto fail; } -- cgit v1.2.3 From a86a283430e1a44907b142c4f53e1f3ad24e87ae Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 12 Apr 2026 20:31:05 +0930 Subject: btrfs: apply first key check for readahead when possible Currently for tree block readahead we never pass a btrfs_tree_parent_check with @has_first_key set. Without @has_first_key set, btrfs will skip the following extra checks: - Header generation check This is a minor one. - Empty leaf/node checks This is more serious, for certain trees like the csum tree, they are allowed to be empty, thus an empty leaf can pass the tree checker. But if there is a parent node for such an empty leaf, it indicates corruption. Without @has_first_key set, we can no longer detect such a problem. In fact there is already a fuzzed image report that a corrupted csum leaf which has zero nritems but still has a parent node can trigger a BUG_ON() during csum deletion. However there are only two call sites of btrfs_readahead_tree_block(): - Inside relocate_tree_blocks() At this call site we are trying to grab the first key of the tree block, thus we are not able to pass a @first_key parameter. - Inside btrfs_readahead_node_child() This is the more common call site, where we have the parent node and want to readahead the child tree blocks. In this case we can easily grab the node key and pass it for checks. Add a new parameter @first_key to btrfs_readahead_tree_block() and pass the node key to it inside btrfs_readahead_node_child(). This should plug the gap in empty leaf detection during readahead. Link: https://lore.kernel.org/linux-btrfs/20260409071255.3358044-1-gality369@gmail.com/ Reviewed-by: David Sterba Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 14 ++++++++++++-- fs/btrfs/extent_io.h | 3 ++- fs/btrfs/relocation.c | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1ba8a7d3587b..45d56421ac50 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4641,7 +4641,8 @@ int try_release_extent_buffer(struct folio *folio) * to read the block we will not block on anything. */ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, - u64 bytenr, u64 owner_root, u64 gen, int level) + u64 bytenr, u64 owner_root, u64 gen, int level, + const struct btrfs_key *first_key) { struct btrfs_tree_parent_check check = { .level = level, @@ -4650,6 +4651,11 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *eb; int ret; + if (first_key) { + memcpy(&check.first_key, first_key, sizeof(struct btrfs_key)); + check.has_first_key = true; + } + eb = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level); if (IS_ERR(eb)) return; @@ -4677,9 +4683,13 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, */ void btrfs_readahead_node_child(struct extent_buffer *node, int slot) { + struct btrfs_key node_key; + + btrfs_node_key_to_cpu(node, &node_key, slot); btrfs_readahead_tree_block(node->fs_info, btrfs_node_blockptr(node, slot), btrfs_header_owner(node), btrfs_node_ptr_generation(node, slot), - btrfs_header_level(node) - 1); + btrfs_header_level(node) - 1, + &node_key); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index fd209233317f..b310a5145cf6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -287,7 +287,8 @@ static inline void wait_on_extent_buffer_writeback(struct extent_buffer *eb) } void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, - u64 bytenr, u64 owner_root, u64 gen, int level); + u64 bytenr, u64 owner_root, u64 gen, int level, + const struct btrfs_key *first_key); void btrfs_readahead_node_child(struct extent_buffer *node, int slot); /* Note: this can be used in for loops without caching the value in a variable. */ diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3e48d1a59fb3..d21742750b69 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2607,7 +2607,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, if (!block->key_ready) btrfs_readahead_tree_block(fs_info, block->bytenr, block->owner, 0, - block->level); + block->level, NULL); } /* Get first keys */ -- cgit v1.2.3 From 41e706c07ef9f752a08f0b9567176ac79441895f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 14 Apr 2026 11:51:15 +0930 Subject: btrfs: enable shutdown ioctl for non-experimental builds Although commit 304076527c38 ("btrfs: move shutdown and remove_bdev callbacks out of experimental features") tries to move both shutdown and remove_bdev out of experimental features, that commit has only addressed the super block operation callback, the ioctl one is left untouched. Fix that missing aspect by also moving shutdown ioctl out of experimental features. Since we're here, also add unknown flag detection to reject any unsupported shutdown flags. Fixes: 304076527c38 ("btrfs: move shutdown and remove_bdev callbacks out of experimental features") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b2e447f5005c..a39460bf68a7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5102,7 +5102,6 @@ static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *a return 0; } -#ifdef CONFIG_BTRFS_EXPERIMENTAL static int btrfs_ioctl_shutdown(struct btrfs_fs_info *fs_info, unsigned long arg) { int ret = 0; @@ -5134,10 +5133,12 @@ static int btrfs_ioctl_shutdown(struct btrfs_fs_info *fs_info, unsigned long arg case BTRFS_SHUTDOWN_FLAGS_NOLOGFLUSH: btrfs_force_shutdown(fs_info); break; + default: + ret = -EINVAL; + break; } return ret; } -#endif long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -5294,10 +5295,8 @@ long btrfs_ioctl(struct file *file, unsigned int #endif case BTRFS_IOC_SUBVOL_SYNC_WAIT: return btrfs_ioctl_subvol_sync(fs_info, argp); -#ifdef CONFIG_BTRFS_EXPERIMENTAL case BTRFS_IOC_SHUTDOWN: return btrfs_ioctl_shutdown(fs_info, arg); -#endif } return -ENOTTY; -- cgit v1.2.3 From 44366af74061793ee5ceef455a4f0e465892d0de Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Mon, 23 Mar 2026 17:17:01 +0000 Subject: btrfs: don't clobber errors in add_remap_tree_entries() In add_remap_tree_entries(), we only process a certain number of entries at a time, meaning we may need to loop. But because we weren't checking the return value of btrfs_insert_empty_items() within the loop, this meant that if the last iteration of the loop succeeded but a previous iteration failed, we were erroneously returning 0. Fix this by breaking the loop early if btrfs_insert_empty_items() fails. Fixes: b56f35560b82 ("btrfs: handle setting up relocation of block group with remap-tree") Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d21742750b69..3ebaf5880125 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3876,7 +3876,7 @@ static int add_remap_tree_entries(struct btrfs_trans_handle *trans, struct btrfs ret = btrfs_insert_empty_items(trans, fs_info->remap_root, path, &batch); btrfs_release_path(path); - if (num_entries <= max_items) + if (ret || num_entries <= max_items) break; num_entries -= max_items; -- cgit v1.2.3 From 999757231c49376cd1a37308d2c8c4c9932571e1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 9 Apr 2026 15:46:51 +0100 Subject: btrfs: fix missing last_unlink_trans update when removing a directory When removing a directory we are not updating its last_unlink_trans field, which can result in incorrect fsync behaviour in case some one fsyncs the directory after it was removed because it's holding a file descriptor on it. Example scenario: mkdir /mnt/dir1 mkdir /mnt/dir1/dir2 mkdir /mnt/dir3 sync -f /mnt # Do some change to the directory and fsync it. chmod 700 /mnt/dir1 xfs_io -c fsync /mnt/dir1 # Move dir2 out of dir1 so that dir1 becomes empty. mv /mnt/dir1/dir2 /mnt/dir3/ open fd on /mnt/dir1 call rmdir(2) on path "/mnt/dir1" fsync fd When attempting to mount the filesystem, the log replay will fail with an -EIO error and dmesg/syslog has the following: [445771.626482] BTRFS info (device dm-0): first mount of filesystem 0368bbea-6c5e-44b5-b409-09abe496e650 [445771.626486] BTRFS info (device dm-0): using crc32c checksum algorithm [445771.627912] BTRFS info (device dm-0): start tree-log replay [445771.628335] page: refcount:2 mapcount:0 mapping:0000000061443ddc index:0x1d00 pfn:0x7072a5 [445771.629453] memcg:ffff89f400351b00 [445771.629892] aops:btree_aops [btrfs] ino:1 [445771.630737] flags: 0x17fffc00000402a(uptodate|lru|private|writeback|node=0|zone=2|lastcpupid=0x1ffff) [445771.632359] raw: 017fffc00000402a fffff47284d950c8 fffff472907b7c08 ffff89f458e412b8 [445771.633713] raw: 0000000000001d00 ffff89f6c51d1a90 00000002ffffffff ffff89f400351b00 [445771.635029] page dumped because: eb page dump [445771.635825] BTRFS critical (device dm-0): corrupt leaf: root=5 block=30408704 slot=10 ino=258, invalid nlink: has 2 expect no more than 1 for dir [445771.638088] BTRFS info (device dm-0): leaf 30408704 gen 10 total ptrs 17 free space 14878 owner 5 [445771.638091] BTRFS info (device dm-0): refs 4 lock_owner 0 current 3581087 [445771.638094] item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160 [445771.638097] inode generation 3 transid 9 size 16 nbytes 16384 [445771.638098] block group 0 mode 40755 links 1 uid 0 gid 0 [445771.638100] rdev 0 sequence 2 flags 0x0 [445771.638102] atime 1775744884.0 [445771.660056] ctime 1775744885.645502983 [445771.660058] mtime 1775744885.645502983 [445771.660060] otime 1775744884.0 [445771.660062] item 1 key (256 INODE_REF 256) itemoff 16111 itemsize 12 [445771.660064] index 0 name_len 2 [445771.660066] item 2 key (256 DIR_ITEM 1843588421) itemoff 16077 itemsize 34 [445771.660068] location key (259 1 0) type 2 [445771.660070] transid 9 data_len 0 name_len 4 [445771.660075] item 3 key (256 DIR_ITEM 2363071922) itemoff 16043 itemsize 34 [445771.660076] location key (257 1 0) type 2 [445771.660077] transid 9 data_len 0 name_len 4 [445771.660078] item 4 key (256 DIR_INDEX 2) itemoff 16009 itemsize 34 [445771.660079] location key (257 1 0) type 2 [445771.660080] transid 9 data_len 0 name_len 4 [445771.660081] item 5 key (256 DIR_INDEX 3) itemoff 15975 itemsize 34 [445771.660082] location key (259 1 0) type 2 [445771.660083] transid 9 data_len 0 name_len 4 [445771.660084] item 6 key (257 INODE_ITEM 0) itemoff 15815 itemsize 160 [445771.660086] inode generation 9 transid 9 size 8 nbytes 0 [445771.660087] block group 0 mode 40777 links 1 uid 0 gid 0 [445771.660088] rdev 0 sequence 2 flags 0x0 [445771.660089] atime 1775744885.641174097 [445771.660090] ctime 1775744885.645502983 [445771.660091] mtime 1775744885.645502983 [445771.660105] otime 1775744885.641174097 [445771.660106] item 7 key (257 INODE_REF 256) itemoff 15801 itemsize 14 [445771.660107] index 2 name_len 4 [445771.660108] item 8 key (257 DIR_ITEM 2676584006) itemoff 15767 itemsize 34 [445771.660109] location key (258 1 0) type 2 [445771.660110] transid 9 data_len 0 name_len 4 [445771.660111] item 9 key (257 DIR_INDEX 2) itemoff 15733 itemsize 34 [445771.660112] location key (258 1 0) type 2 [445771.660113] transid 9 data_len 0 name_len 4 [445771.660114] item 10 key (258 INODE_ITEM 0) itemoff 15573 itemsize 160 [445771.660115] inode generation 9 transid 10 size 0 nbytes 0 [445771.660116] block group 0 mode 40755 links 2 uid 0 gid 0 [445771.660117] rdev 0 sequence 0 flags 0x0 [445771.660118] atime 1775744885.645502983 [445771.660119] ctime 1775744885.645502983 [445771.660120] mtime 1775744885.645502983 [445771.660121] otime 1775744885.645502983 [445771.660122] item 11 key (258 INODE_REF 257) itemoff 15559 itemsize 14 [445771.660123] index 2 name_len 4 [445771.660124] item 12 key (258 INODE_REF 259) itemoff 15545 itemsize 14 [445771.660125] index 2 name_len 4 [445771.660126] item 13 key (259 INODE_ITEM 0) itemoff 15385 itemsize 160 [445771.660127] inode generation 9 transid 10 size 8 nbytes 0 [445771.660128] block group 0 mode 40755 links 1 uid 0 gid 0 [445771.660129] rdev 0 sequence 1 flags 0x0 [445771.660130] atime 1775744885.645502983 [445771.660130] ctime 1775744885.645502983 [445771.660131] mtime 1775744885.645502983 [445771.660132] otime 1775744885.645502983 [445771.660133] item 14 key (259 INODE_REF 256) itemoff 15371 itemsize 14 [445771.660134] index 3 name_len 4 [445771.660135] item 15 key (259 DIR_ITEM 2676584006) itemoff 15337 itemsize 34 [445771.660136] location key (258 1 0) type 2 [445771.660137] transid 10 data_len 0 name_len 4 [445771.660138] item 16 key (259 DIR_INDEX 2) itemoff 15303 itemsize 34 [445771.660139] location key (258 1 0) type 2 [445771.660140] transid 10 data_len 0 name_len 4 [445771.660144] BTRFS error (device dm-0): block=30408704 write time tree block corruption detected [445771.661650] ------------[ cut here ]------------ [445771.662358] WARNING: fs/btrfs/disk-io.c:326 at btree_csum_one_bio+0x217/0x230 [btrfs], CPU#8: mount/3581087 [445771.663588] Modules linked in: btrfs f2fs xfs (...) [445771.671229] CPU: 8 UID: 0 PID: 3581087 Comm: mount Tainted: G W 7.0.0-rc6-btrfs-next-230+ #2 PREEMPT(full) [445771.672575] Tainted: [W]=WARN [445771.672987] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [445771.674460] RIP: 0010:btree_csum_one_bio+0x217/0x230 [btrfs] [445771.675222] Code: 89 44 24 (...) [445771.677364] RSP: 0018:ffffd23882247660 EFLAGS: 00010246 [445771.678029] RAX: 0000000000000000 RBX: ffff89f6c51d1a90 RCX: 0000000000000000 [445771.678975] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff89f406020000 [445771.679983] RBP: ffff89f821204000 R08: 0000000000000000 R09: 00000000ffefffff [445771.680905] R10: ffffd23882247448 R11: 0000000000000003 R12: ffffd23882247668 [445771.681978] R13: ffff89f458e40fc0 R14: ffff89f737f4f500 R15: ffff89f737f4f500 [445771.682912] FS: 00007f0447a98840(0000) GS:ffff89fb9771d000(0000) knlGS:0000000000000000 [445771.684393] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [445771.685230] CR2: 00007f0447bf1330 CR3: 000000017cb02002 CR4: 0000000000370ef0 [445771.686273] Call Trace: [445771.686646] [445771.686969] btrfs_submit_bbio+0x83f/0x860 [btrfs] [445771.687750] ? write_one_eb+0x28f/0x340 [btrfs] [445771.688428] btree_writepages+0x2e3/0x550 [btrfs] [445771.689180] ? kmem_cache_alloc_noprof+0x12a/0x490 [445771.689963] ? alloc_extent_state+0x19/0x120 [btrfs] [445771.690801] ? kmem_cache_free+0x135/0x380 [445771.691328] ? preempt_count_add+0x69/0xa0 [445771.691831] ? set_extent_bit+0x252/0x8e0 [btrfs] [445771.692468] ? xas_load+0x9/0xc0 [445771.692873] ? xas_find+0x14d/0x1a0 [445771.693304] do_writepages+0xc6/0x160 [445771.693756] filemap_writeback+0xb8/0xe0 [445771.694274] btrfs_write_marked_extents+0x61/0x170 [btrfs] [445771.694999] btrfs_write_and_wait_transaction+0x4e/0xc0 [btrfs] [445771.695818] btrfs_commit_transaction+0x5c8/0xd10 [btrfs] [445771.696530] ? kmem_cache_free+0x135/0x380 [445771.697120] ? release_extent_buffer+0x34/0x160 [btrfs] [445771.697786] btrfs_recover_log_trees+0x7be/0x7e0 [btrfs] [445771.698525] ? __pfx_replay_one_buffer+0x10/0x10 [btrfs] [445771.699206] open_ctree+0x11e5/0x1810 [btrfs] [445771.699776] btrfs_get_tree.cold+0xb/0x162 [btrfs] [445771.700463] ? fscontext_read+0x165/0x180 [445771.701146] ? rw_verify_area+0x50/0x180 [445771.701866] vfs_get_tree+0x25/0xd0 [445771.702491] vfs_cmd_create+0x59/0xe0 [445771.703125] __do_sys_fsconfig+0x303/0x610 [445771.703603] do_syscall_64+0xe9/0xf20 [445771.703974] entry_SYSCALL_64_after_hwframe+0x76/0x7e [445771.704700] RIP: 0033:0x7f0447cbd4aa [445771.705108] Code: 73 01 c3 (...) [445771.707263] RSP: 002b:00007ffc4e528318 EFLAGS: 00000246 ORIG_RAX: 00000000000001af [445771.708107] RAX: ffffffffffffffda RBX: 00005561585d8c20 RCX: 00007f0447cbd4aa [445771.708931] RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 [445771.709744] RBP: 00005561585d9120 R08: 0000000000000000 R09: 0000000000000000 [445771.710674] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [445771.711477] R13: 00007f0447e4f580 R14: 00007f0447e5126c R15: 00007f0447e36a23 [445771.712277] [445771.712541] ---[ end trace 0000000000000000 ]--- [445771.713382] BTRFS error (device dm-0): error while writing out transaction: -5 [445771.714679] BTRFS warning (device dm-0): Skipping commit of aborted transaction. [445771.715562] BTRFS error (device dm-0 state A): Transaction aborted (error -5) [445771.716459] BTRFS: error (device dm-0 state A) in cleanup_transaction:2068: errno=-5 IO failure [445771.717936] BTRFS error (device dm-0 state EA): failed to recover log trees with error: -5 [445771.719681] BTRFS error (device dm-0 state EA): open_ctree failed: -5 The problem is that such a fsync should have result in a fallback to a transaction commit, but that did not happen because through the btrfs_rmdir() we never update the directory's last_unlink_trans field. Any inode that had a link removed must have its last_unlink_trans updated to the ID of transaction used for the operation, otherwise fsync and log replay will not work correctly. btrfs_rmdir() calls btrfs_unlink_inode() and through that call chain we never call btrfs_record_unlink_dir() in order to update last_unlink_trans. However btrfs_unlink(), which is used for unlinking regular files, calls btrfs_record_unlink_dir() and then calls btrfs_unlink_inode(). So fix this by moving the call to btrfs_record_unlink_dir() from btrfs_unlink() to btrfs_unlink_inode(). A test case for fstests will follow soon. Reported-by: Slava0135 Link: https://lore.kernel.org/linux-btrfs/CAAJYhww5ov62Hm+n+tmhcL-e_4cBobg+OWogKjOJxVUXivC=MQ@mail.gmail.com/ CC: stable@vger.kernel.org Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 40474014c03f..55133a364305 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4959,6 +4959,8 @@ static int btrfs_rmdir(struct inode *vfs_dir, struct dentry *dentry) if (ret) goto out; + btrfs_record_unlink_dir(trans, dir, inode, false); + /* now the directory is empty */ ret = btrfs_unlink_inode(trans, dir, inode, &fname.disk_name); if (!ret) -- cgit v1.2.3 From 4d95b9efd783adca472e957b2f576983e789b839 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 14 Apr 2026 17:30:31 +0200 Subject: btrfs: handle unexpected free-space-tree key types Replace the conditional assertions with proper error handling and transaction abort if we find an unexpected key type in the free space tree. Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 9efd1ec90f03..472b3060e5ac 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -259,7 +259,11 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, nr++; path->slots[0]--; } else { - ASSERT(0); + btrfs_err(fs_info, "unexpected free space tree key type %u", + found_key.type); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + goto out; } } @@ -405,7 +409,11 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, nr++; } else { - ASSERT(0); + btrfs_err(fs_info, "unexpected free space tree key type %u", + found_key.type); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + goto out; } } @@ -1518,7 +1526,11 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, nr++; path->slots[0]--; } else { - ASSERT(0); + btrfs_err(trans->fs_info, "unexpected free space tree key type %u", + found_key.type); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + return ret; } } -- cgit v1.2.3 From 513f8a52eed880ea525dbb139b2127bd9bb793f1 Mon Sep 17 00:00:00 2001 From: robbieko Date: Mon, 13 Apr 2026 14:52:32 +0800 Subject: btrfs: copy devid in btrfs_partially_delete_raid_extent() When btrfs_partially_delete_raid_extent() rebuilds a truncated/shifted stripe extent into newitem, the loop copies the physical address for each stride but forgets to copy the devid. The resulting item written back to the stripe tree has zeroed-out devids, corrupting the stripe mapping. Fix this by reading the devid with btrfs_raid_stride_devid() and writing it into the new item with btrfs_set_stack_raid_stride_devid() before copying the physical address. Reviewed-by: Johannes Thumshirn Signed-off-by: robbieko Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 638c4ad572c9..ac8cec3ce6d3 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -45,8 +45,11 @@ static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { struct btrfs_raid_stride *stride = &extent->strides[i]; + u64 devid; u64 phys; + devid = btrfs_raid_stride_devid(leaf, stride); + btrfs_set_stack_raid_stride_devid(&newitem->strides[i], devid); phys = btrfs_raid_stride_physical(leaf, stride) + frontpad; btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys); } -- cgit v1.2.3 From 2aef5cb1dcf9b3e1be3895a6477dc065e618aab8 Mon Sep 17 00:00:00 2001 From: robbieko Date: Mon, 13 Apr 2026 14:52:33 +0800 Subject: btrfs: fix raid stripe search missing entries at leaf boundaries In btrfs_delete_raid_extent(), the search key uses offset=0. When the target stripe entry is the first item on a leaf, btrfs_search_slot() may land on the previous leaf and decrementing the slot from nritems still points to the wrong entry, causing the stripe extent to be silently missed. Fix this by searching with offset=(u64)-1 instead. Since no real stripe entry has this offset, btrfs_search_slot() always returns 1 with the slot pointing past the last matching objectid entry. Then unconditionally decrement the slot with a proper slots[0]==0 early-exit check to handle the case where no matching entry exists. Reviewed-by: Johannes Thumshirn Signed-off-by: robbieko Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index ac8cec3ce6d3..4937b08da9de 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -98,14 +98,26 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le while (1) { key.objectid = start; key.type = BTRFS_RAID_STRIPE_KEY; - key.offset = 0; + key.offset = (u64)-1; ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); if (ret < 0) break; - if (path->slots[0] == btrfs_header_nritems(path->nodes[0])) - path->slots[0]--; + /* + * Search with offset=(u64)-1 ensures we land on the correct + * leaf even when the target entry is the first item on a leaf. + * Since no real entry has offset=(u64)-1, ret is always 1 and + * slot points past the last entry with objectid==start (or + * past the end of the leaf if that entry is the last item). + * Back up one slot to find the actual entry. + */ + if (path->slots[0] == 0) { + /* No entry with objectid <= start exists. */ + ret = 0; + break; + } + path->slots[0]--; leaf = path->nodes[0]; slot = path->slots[0]; -- cgit v1.2.3 From 1871ae78ffa5ce7c0458e9ba5867958c1753e425 Mon Sep 17 00:00:00 2001 From: robbieko Date: Mon, 13 Apr 2026 14:52:34 +0800 Subject: btrfs: fix wrong min_objectid in btrfs_previous_item() call When found_start > start and slot == 0, btrfs_previous_item() is called with min_objectid=start to find the previous stripe extent. However, the previous stripe extent we are looking for has objectid < start (it starts before our deletion range), so passing start as min_objectid prevents finding it. Fix by passing 0 as min_objectid to allow finding any preceding stripe extent regardless of its objectid. Reviewed-by: Johannes Thumshirn Signed-off-by: robbieko Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 4937b08da9de..1a0ea2107688 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -138,7 +138,7 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le */ if (found_start > start) { if (slot == 0) { - ret = btrfs_previous_item(stripe_root, path, start, + ret = btrfs_previous_item(stripe_root, path, 0, BTRFS_RAID_STRIPE_KEY); if (ret) { if (ret > 0) -- cgit v1.2.3 From 653361585d251fbca0e19ac58b04ba95dd01e378 Mon Sep 17 00:00:00 2001 From: robbieko Date: Mon, 13 Apr 2026 14:52:35 +0800 Subject: btrfs: replace ASSERT with proper error handling in stripe lookup fallback After falling back to the previous item in btrfs_delete_raid_extent(), the code uses ASSERT(found_start <= start) to verify the found extent actually precedes our target range. If the B-tree state is unexpected (e.g. no overlapping extent exists), this triggers a kernel BUG/panic in debug builds, or silently continues with wrong data otherwise. Replace the ASSERT with a proper bounds check that returns -ENOENT if the found extent does not actually overlap with the start position. Signed-off-by: robbieko Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 1a0ea2107688..d454894b9e66 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -154,7 +154,10 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le btrfs_item_key_to_cpu(leaf, &key, slot); found_start = key.objectid; found_end = found_start + key.offset; - ASSERT(found_start <= start); + if (found_start > start || found_end <= start) { + ret = -ENOENT; + break; + } } if (key.type != BTRFS_RAID_STRIPE_KEY) -- cgit v1.2.3 From fe0cdfd7118d8b40a21bfac221bb4982c5e10e10 Mon Sep 17 00:00:00 2001 From: robbieko Date: Mon, 13 Apr 2026 14:52:36 +0800 Subject: btrfs: handle -EAGAIN from btrfs_duplicate_item and refresh stale leaf pointer In the 'punch a hole' case of btrfs_delete_raid_extent(), btrfs_duplicate_item() can return -EAGAIN when the leaf needs to be split and the path becomes invalid. The old code treats any error as fatal and breaks out of the loop. Additionally, btrfs_duplicate_item() may trigger setup_leaf_for_split() which can reallocate the leaf node. The code continues using the old leaf pointer, leading to use-after-free or stale data access. Fix both issues by: - Handling -EAGAIN specifically: release the path and retry the loop. - Refreshing leaf = path->nodes[0] after successful duplication. Reviewed-by: Johannes Thumshirn Signed-off-by: robbieko Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index d454894b9e66..2e0d2f83c651 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -194,9 +194,19 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le /* The "right" item. */ ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey); + if (ret == -EAGAIN) { + btrfs_release_path(path); + continue; + } if (ret) break; + /* + * btrfs_duplicate_item() may have triggered a leaf + * split via setup_leaf_for_split(), so we must refresh + * our leaf pointer from the path. + */ + leaf = path->nodes[0]; item_size = btrfs_item_size(leaf, path->slots[0]); extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_stripe_extent); -- cgit v1.2.3 From a8d58a7c0200904ff24ca7f0d7c147017e25aa99 Mon Sep 17 00:00:00 2001 From: robbieko Date: Mon, 13 Apr 2026 14:52:37 +0800 Subject: btrfs: check return value of btrfs_partially_delete_raid_extent() btrfs_partially_delete_raid_extent() returns an error code (e.g. -ENOMEM from kzalloc(), or errors from btrfs_del_item/btrfs_insert_item()), but all three call sites in btrfs_delete_raid_extent() discard the return value, silently losing errors and potentially leaving the stripe tree in an inconsistent state. Fix by capturing the return value into ret at all three call sites and breaking out of the loop on error where appropriate. Reviewed-by: Johannes Thumshirn Signed-off-by: robbieko Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 2e0d2f83c651..4b0186c83ad1 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -223,8 +223,9 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le /* The "left" item. */ path->slots[0]--; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - btrfs_partially_delete_raid_extent(trans, path, &key, - diff_start, 0); + ret = btrfs_partially_delete_raid_extent(trans, path, + &key, + diff_start, 0); break; } @@ -240,8 +241,11 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le if (found_start < start) { u64 diff_start = start - found_start; - btrfs_partially_delete_raid_extent(trans, path, &key, - diff_start, 0); + ret = btrfs_partially_delete_raid_extent(trans, path, + &key, + diff_start, 0); + if (ret) + break; start += (key.offset - diff_start); length -= (key.offset - diff_start); @@ -264,9 +268,10 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le if (found_end > end) { u64 diff_end = found_end - end; - btrfs_partially_delete_raid_extent(trans, path, &key, - key.offset - length, - length); + ret = btrfs_partially_delete_raid_extent(trans, path, + &key, + key.offset - length, + length); ASSERT(key.offset - diff_end == length); break; } -- cgit v1.2.3 From 82323b1a7088b7a5c3e528a5d634bff447fa286f Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Thu, 16 Apr 2026 18:15:23 +0100 Subject: btrfs: fix double-decrement of bytes_may_use in submit_one_async_extent() submit_one_async_extent() calls btrfs_reserve_extent(), which decrements bytes_may_use. If the call btrfs_create_io_em() fails, we jump to out_free_reserve, which calls extent_clear_unlock_delalloc(). Because we're specifying EXTENT_DO_ACCOUNTING, i.e. EXTENT_CLEAR_META_RESV | EXTENT_CLEAR_DATA_RESV, this decreases bytes_may_use again. This can lead to problems later on, as an initial write can fail only for the writeback to silently ENOSPC. Fix this by replacing EXTENT_DO_ACCOUNTING with EXTENT_CLEAR_META_RESV. This parallels a4fe134fc1d8eb ("btrfs: fix a double release on reserved extents in cow_one_range()"), which is the same fix in cow_one_range(). Fixes: 151a41bc46df ("Btrfs: fix what bits we clear when erroring out from delalloc") Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 55133a364305..906d5c21ebc4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1153,7 +1153,7 @@ out_free_reserve: NULL, &cached, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | - EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, + EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); if (async_extent->cb) -- cgit v1.2.3 From 7b03c93d2beb91c6abae322a1f25447b5b3bb9e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Apr 2026 08:08:06 +0200 Subject: scsi: sg: Don't use GFP_ATOMIC in sg_start_req() sg_start_req() is called from normal user context and can sleep when waiting for memory. Switch it to use GFP_KERNEL, which fixes allocation failures seen with the bio_alloc rework. Fixes: b520c4eef83d ("block: split bio_alloc_bioset more clearly into a fast and slowpath") Reported-by: Shin'ichiro Kawasaki Signed-off-by: Christoph Hellwig Tested-by: Shin'ichiro Kawasaki Reviewed-by: John Garry Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260415060813.807659-2-hch@lst.de Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2b4b2a1a8e44..74cd4e8a61c2 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1801,7 +1801,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) } res = blk_rq_map_user_io(rq, md, hp->dxferp, hp->dxfer_len, - GFP_ATOMIC, iov_count, iov_count, 1, rw); + GFP_KERNEL, iov_count, iov_count, 1, rw); if (!res) { srp->bio = rq->bio; -- cgit v1.2.3 From 04631f55afc543d5431a2bdee7f6cc0f2c0debe7 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Tue, 14 Apr 2026 16:38:11 +0530 Subject: scsi: mpt3sas: Limit NVMe request size to 2 MiB The HBA firmware reports NVMe MDTS values based on the underlying drive capability. However, because the driver allocates a fixed 4K buffer for the PRP list, accommodating at most 512 entries, the driver supports a maximum I/O transfer size of 2 MiB. Limit max_hw_sectors to the smaller of the reported MDTS and the 2 MiB driver limit to prevent issuing oversized I/O that may lead to a kernel oops. Cc: stable@vger.kernel.org Fixes: 9b8b84879d4a ("block: Increase BLK_DEF_MAX_SECTORS_CAP") Reported-by: Mira Limbeck Closes: https://lore.kernel.org/r/291f78bf-4b4a-40dd-867d-053b36c564b3@proxmox.com Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9b8b84879d4a Suggested-by: Keith Busch Signed-off-by: Ranjan Kumar Tested-by: Mira Limbeck Link: https://patch.msgid.link/20260414110811.85156-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6ff788557294..12caffeed3a0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2738,8 +2738,20 @@ scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) pcie_device->enclosure_level, pcie_device->connector_name); + /* + * The HBA firmware passes the NVMe drive's MDTS + * (Maximum Data Transfer Size) up to the driver. However, + * the driver hardcodes a 4K buffer size for the PRP list, + * accommodating at most 512 entries. This strictly limits + * the maximum supported NVMe I/O transfer to 2 MiB. + * + * Cap max_hw_sectors to the smaller of the drive's reported + * MDTS or the 2 MiB driver limit to prevent kernel oopses. + */ + lim->max_hw_sectors = SZ_2M >> SECTOR_SHIFT; if (pcie_device->nvme_mdts) - lim->max_hw_sectors = pcie_device->nvme_mdts / 512; + lim->max_hw_sectors = min(lim->max_hw_sectors, + pcie_device->nvme_mdts >> SECTOR_SHIFT); pcie_device_put(pcie_device); spin_unlock_irqrestore(&ioc->pcie_device_lock, flags); -- cgit v1.2.3 From d65efdf467ff935e35dfe6aa9a7ab93f17ac07ee Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 14 Apr 2026 14:41:18 +0200 Subject: scsi: smartpqi: Silence a recursive lock warning On systems with multiple controllers debug kernel shows WARNING: possible recursive locking detected during shutdown. Each controller does have its own ctrl_info (and mutex) and that isn't correctly recognized by debug kernel. Suppress the warning by releasing the mutex at the end of pqi_shutdown(). Signed-off-by: Tomas Henzl Acked-by: Don Brace Link: https://patch.msgid.link/20260414124118.23661-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index b4ed991976d0..2026ac645d6a 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -9427,6 +9427,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev) pqi_crash_if_pending_command(ctrl_info); pqi_reset(ctrl_info); + pqi_ctrl_unblock_device_reset(ctrl_info); } static void pqi_process_lockup_action_param(void) -- cgit v1.2.3 From 68c3a65a5a8e85643745fdde02cb63904e165620 Mon Sep 17 00:00:00 2001 From: Brian Bunker Date: Thu, 16 Apr 2026 09:55:12 -0700 Subject: scsi: scsi_dh_alua: Increase default ALUA timeout to maximum spec value The ALUA handler maps a 0 value (no implicit transition timeout provided by the target) to the ALUA_FAILOVER_TIMEOUT constant, currently 60 seconds. This means the kernel already does not accept an infinite transition time. However, 60 seconds is insufficient for some arrays that may take longer to complete ALUA transitions. Since the highest value allowed by the SCSI specification for the implicit transition timeout is a single byte (255 seconds), change the default to 255. This way, when a target does not provide an explicit transition timeout, we default to the maximum value the spec allows rather than an arbitrary 60 second limit. Co-developed-by: Krishna Kant Signed-off-by: Krishna Kant Co-developed-by: Riya Savla Signed-off-by: Riya Savla Signed-off-by: Brian Bunker Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260416165512.26497-2-brian@purestorage.com Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index efb08b9b145a..80ab0ff921d4 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -37,7 +37,7 @@ #define TPGS_MODE_EXPLICIT 0x2 #define ALUA_RTPG_SIZE 128 -#define ALUA_FAILOVER_TIMEOUT 60 +#define ALUA_FAILOVER_TIMEOUT 255 /* max 255 (8-bit value) */ #define ALUA_FAILOVER_RETRIES 5 #define ALUA_RTPG_DELAY_MSECS 5 #define ALUA_RTPG_RETRY_DELAY 2 -- cgit v1.2.3 From 1dc39ed655750d6c679d3ada4adf4a937f2a63fc Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Fri, 17 Apr 2026 16:07:31 -0400 Subject: scsi: pmcraid: Fix typo in comments Fix typo in structure comment. Signed-off-by: Hugo Villeneuve Link: https://patch.msgid.link/20260417200738.3920001-1-hugo@hugovil.com Signed-off-by: Martin K. Petersen --- drivers/scsi/pmcraid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 9f59930e8b4f..cd059b7599b4 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -657,7 +657,7 @@ struct pmcraid_hostrcb { */ struct pmcraid_instance { /* Array of allowed-to-be-exposed resources, initialized from - * Configutation Table, later updated with CCNs + * Configuration Table, later updated with CCNs */ struct pmcraid_resource_entry *res_entries; -- cgit v1.2.3 From 47e66bec3edaebd7c52d8ee981065a4c83b3072f Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 20 Apr 2026 10:10:44 +0800 Subject: scsi: hisi_sas: Fix sparse warnings in prep_ata_v3_hw() In prep_ata_v3_hw(), add cpu_to_le32() to fix warning: drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:1448:26: sparse: sparse: invalid assignment: |= drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:1448:26: sparse: left side has type restricted __le32 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:1448:26: sparse: right side has type unsigned int Fixes: 8aa580cd9284 ("scsi: hisi_sas: Enable force phy when SATA disk directly connected") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604191850.IVYPTaML-lkp@intel.com/ Signed-off-by: Yihang Li Link: https://patch.msgid.link/20260420021044.3339459-1-liyihang9@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index fda07b193137..14d563e82d20 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1491,7 +1491,7 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba, phy_id = device->phy->identify.phy_identifier; hdr->dw0 |= cpu_to_le32((1U << phy_id) << CMD_HDR_PHY_ID_OFF); - hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; + hdr->dw0 |= cpu_to_le32(CMD_HDR_FORCE_PHY_MSK); hdr->dw0 |= cpu_to_le32(4U << CMD_HDR_CMD_OFF); } -- cgit v1.2.3 From 4fe985292709eeb6a4653c71660f893e26c2f2dd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Apr 2026 20:03:26 -1000 Subject: rhashtable: Bounce deferred worker kick through irq_work Inserts past 75% load call schedule_work(&ht->run_work) to kick an async resize. If a caller holds a raw spinlock (e.g. an insecure_elasticity user), schedule_work() under that lock records caller_lock -> pool->lock -> pi_lock -> rq->__lock A cycle forms if any of these locks is acquired in the reverse direction elsewhere. sched_ext, the only current insecure_elasticity user, hits this: it holds scx_sched_lock across rhashtable inserts of sub-schedulers, while scx_bypass() takes rq->__lock -> scx_sched_lock. Exercising the resize path produces: Chain exists of: &pool->lock --> &rq->__lock --> scx_sched_lock Bounce the kick from the insert paths through irq_work so schedule_work() runs from hard IRQ context with the caller's lock no longer held. rht_deferred_worker()'s self-rearm on error stays on schedule_work(&ht->run_work) - the worker runs in process context with no caller lock held, and keeping the self-requeue on @run_work lets cancel_work_sync() in rhashtable_free_and_destroy() drain it. v3: Keep rht_deferred_worker()'s self-rearm on schedule_work(&run_work). Routing it through irq_work in v2 broke cancel_work_sync()'s self-requeue handling - an irq_work queued after irq_work_sync() returned but while cancel_work_sync() was still waiting could fire post-teardown. v2: Bounce unconditionally instead of gating on insecure_elasticity, as suggested by Herbert. Signed-off-by: Tejun Heo Acked-by: Herbert Xu --- include/linux/rhashtable-types.h | 3 +++ include/linux/rhashtable.h | 3 ++- lib/rhashtable.c | 31 ++++++++++++++++++++++++++++--- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 72082428d6c6..fc2f596a6df1 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,7 @@ struct rhashtable_params { * @p: Configuration parameters * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously + * @run_irq_work: Bounces the @run_work kick through hard IRQ context. * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list * @nelems: Number of elements in table @@ -88,6 +90,7 @@ struct rhashtable { struct rhashtable_params p; bool rhlist; struct work_struct run_work; + struct irq_work run_irq_work; struct mutex mutex; spinlock_t lock; atomic_t nelems; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7def3f0f556b..ef5230cece36 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -847,7 +848,7 @@ slow_path: rht_assign_unlock(tbl, bkt, obj, flags); if (rht_grow_above_75(ht, tbl)) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); data = NULL; out: diff --git a/lib/rhashtable.c b/lib/rhashtable.c index fb2b7bc137ba..7a67ef5b67b6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -441,10 +441,33 @@ static void rht_deferred_worker(struct work_struct *work) mutex_unlock(&ht->mutex); + /* + * Re-arm via @run_work, not @run_irq_work. + * rhashtable_free_and_destroy() drains async work as irq_work_sync() + * followed by cancel_work_sync(). If this site queued irq_work while + * cancel_work_sync() was waiting for us, irq_work_sync() would already + * have returned and the stale irq_work could fire post-teardown. + * cancel_work_sync() natively handles self-requeue on @run_work. + */ if (err) schedule_work(&ht->run_work); } +/* + * Insert-path callers can run under a raw spinlock (e.g. an insecure_elasticity + * user). Calling schedule_work() under that lock records caller_lock -> + * pool->lock -> pi_lock -> rq->__lock, closing a locking cycle if any of + * these is acquired in the reverse direction elsewhere. Bounce through + * irq_work so the schedule_work() runs with the caller's lock no longer held. + */ +static void rht_deferred_irq_work(struct irq_work *irq_work) +{ + struct rhashtable *ht = container_of(irq_work, struct rhashtable, + run_irq_work); + + schedule_work(&ht->run_work); +} + static int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl) { @@ -477,7 +500,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, if (err == -EEXIST) err = 0; } else - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); return err; @@ -488,7 +511,7 @@ fail: /* Schedule async rehash to retry allocation in process context. */ if (err == -ENOMEM) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); return err; } @@ -630,7 +653,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, rht_unlock(tbl, bkt, flags); if (inserted && rht_grow_above_75(ht, tbl)) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); } } while (!IS_ERR_OR_NULL(new_tbl)); @@ -1085,6 +1108,7 @@ int rhashtable_init_noprof(struct rhashtable *ht, RCU_INIT_POINTER(ht->tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); + init_irq_work(&ht->run_irq_work, rht_deferred_irq_work); return 0; } @@ -1150,6 +1174,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, struct bucket_table *tbl, *next_tbl; unsigned int i; + irq_work_sync(&ht->run_irq_work); cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); -- cgit v1.2.3 From 1e8e3f449b1e73b73a843257635b9c50f0cc0f0a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 20 Apr 2026 23:15:32 +0200 Subject: netfilter: arp_tables: fix IEEE1394 ARP payload parsing Weiming Shi says: "arp_packet_match() unconditionally parses the ARP payload assuming two hardware addresses are present (source and target). However, IPv4-over-IEEE1394 ARP (RFC 2734) omits the target hardware address field, and arp_hdr_len() already accounts for this by returning a shorter length for ARPHRD_IEEE1394 devices. As a result, on IEEE1394 interfaces arp_packet_match() advances past a nonexistent target hardware address and reads the wrong bytes for both the target device address comparison and the target IP address. This causes arptables rules to match against garbage data, leading to incorrect filtering decisions: packets that should be accepted may be dropped and vice versa. The ARP stack in net/ipv4/arp.c (arp_create and arp_process) already handles this correctly by skipping the target hardware address for ARPHRD_IEEE1394. Apply the same pattern to arp_packet_match()." Mangle the original patch to always return 0 (no match) in case user matches on the target hardware address which is never present in IEEE1394. Note that this returns 0 (no match) for either normal and inverse match because matching in the target hardware address in ARPHRD_IEEE1394 has never been supported by arptables. This is intentional, matching on the target hardware address should never evaluate true for ARPHRD_IEEE1394. Moreover, adjust arpt_mangle to drop the packet too as AI suggests: In arpt_mangle, the logic assumes a standard ARP layout. Because IEEE1394 (FireWire) omits the target hardware address, the linear pointer arithmetic miscalculates the offset for the target IP address. This causes mangling operations to write to the wrong location, leading to packet corruption. To ensure safety, this patch drops packets (NF_DROP) when mangling is requested for these fields on IEEE1394 devices, as the current implementation cannot correctly map the FireWire ARP payload. This omits both mangling target hardware and IP address. Even if IP address mangling should be possible in IEEE1394, this would require to adjust arpt_mangle offset calculation, which has never been supported. Based on patch from Weiming Shi . Fixes: 6752c8db8e0c ("firewire net, ipv4 arp: Extend hardware address and remove driver-level packet inspection.") Reported-by: Xiang Mei Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 18 +++++++++++++++--- net/ipv4/netfilter/arpt_mangle.c | 8 ++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1cdd9c28ab2d..97ead883e4a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -110,13 +110,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr, arpptr += dev->addr_len; memcpy(&src_ipaddr, arpptr, sizeof(u32)); arpptr += sizeof(u32); - tgt_devaddr = arpptr; - arpptr += dev->addr_len; + + if (IS_ENABLED(CONFIG_FIREWIRE_NET) && dev->type == ARPHRD_IEEE1394) { + if (unlikely(memchr_inv(arpinfo->tgt_devaddr.mask, 0, + sizeof(arpinfo->tgt_devaddr.mask)))) + return 0; + + tgt_devaddr = NULL; + } else { + tgt_devaddr = arpptr; + arpptr += dev->addr_len; + } memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, - dev->addr_len)) || + dev->addr_len))) + return 0; + + if (tgt_devaddr && NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len))) diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a4e07e5e9c11..f65dd339208e 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -40,6 +40,10 @@ target(struct sk_buff *skb, const struct xt_action_param *par) } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { + if (unlikely(IS_ENABLED(CONFIG_FIREWIRE_NET) && + skb->dev->type == ARPHRD_IEEE1394)) + return NF_DROP; + if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; @@ -47,6 +51,10 @@ target(struct sk_buff *skb, const struct xt_action_param *par) } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { + if (unlikely(IS_ENABLED(CONFIG_FIREWIRE_NET) && + skb->dev->type == ARPHRD_IEEE1394)) + return NF_DROP; + if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; -- cgit v1.2.3 From f3224ee463f8f6f6ced7dcdf6081add4f8128527 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 16 Apr 2026 15:14:51 +0200 Subject: netfilter: nf_tables: use list_del_rcu for netlink hooks nft_netdev_unregister_hooks and __nft_unregister_flowtable_net_hooks need to use list_del_rcu(), this list can be walked by concurrent dumpers. Add a new helper and use it consistently. Fixes: f9a43007d3f7 ("netfilter: nf_tables: double hook unregistration in netns path") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 44 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8537b94653d3..07e151245765 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -374,6 +374,12 @@ static void nft_netdev_hook_free_rcu(struct nft_hook *hook) call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu); } +static void nft_netdev_hook_unlink_free_rcu(struct nft_hook *hook) +{ + list_del_rcu(&hook->list); + nft_netdev_hook_free_rcu(hook); +} + static void nft_netdev_unregister_hooks(struct net *net, struct list_head *hook_list, bool release_netdev) @@ -384,10 +390,8 @@ static void nft_netdev_unregister_hooks(struct net *net, list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nf_unregister_net_hook(net, ops); - if (release_netdev) { - list_del(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + if (release_netdev) + nft_netdev_hook_unlink_free_rcu(hook); } } @@ -2271,10 +2275,8 @@ void nf_tables_chain_destroy(struct nft_chain *chain) if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, - &basechain->hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + &basechain->hook_list, list) + nft_netdev_hook_unlink_free_rcu(hook); } module_put(basechain->type->owner); if (rcu_access_pointer(basechain->stats)) { @@ -2974,6 +2976,7 @@ err_hooks: list_for_each_entry(ops, &h->ops_list, list) nf_unregister_net_hook(ctx->net, ops); } + /* hook.list is on stack, no need for list_del_rcu() */ list_del(&h->list); nft_netdev_hook_free_rcu(h); } @@ -8852,10 +8855,8 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net, list_for_each_entry_safe(hook, next, hook_list, list) { list_for_each_entry(ops, &hook->ops_list, list) nft_unregister_flowtable_ops(net, flowtable, ops); - if (release_netdev) { - list_del(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + if (release_netdev) + nft_netdev_hook_unlink_free_rcu(hook); } } @@ -8926,8 +8927,7 @@ err_unregister_net_hooks: nft_unregister_flowtable_ops(net, flowtable, ops); } - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + nft_netdev_hook_unlink_free_rcu(hook); } return err; @@ -8937,10 +8937,8 @@ static void nft_hooks_destroy(struct list_head *hook_list) { struct nft_hook *hook, *next; - list_for_each_entry_safe(hook, next, hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + list_for_each_entry_safe(hook, next, hook_list, list) + nft_netdev_hook_unlink_free_rcu(hook); } static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, @@ -9028,8 +9026,7 @@ err_flowtable_update_hook: nft_unregister_flowtable_ops(ctx->net, flowtable, ops); } - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); + nft_netdev_hook_unlink_free_rcu(hook); } return err; @@ -9535,13 +9532,8 @@ err: static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { - struct nft_hook *hook, *next; - flowtable->data.type->free(&flowtable->data); - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - list_del_rcu(&hook->list); - nft_netdev_hook_free_rcu(hook); - } + nft_hooks_destroy(&flowtable->hook_list); kfree(flowtable->name); module_put(flowtable->data.type->owner); kfree(flowtable); -- cgit v1.2.3 From f902877b635551513729bdf9a8d1422c4aab7741 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 15 Apr 2026 17:56:02 +0200 Subject: rculist: add list_splice_rcu() for private lists This patch adds a helper function, list_splice_rcu(), to safely splice a private (non-RCU-protected) list into an RCU-protected list. The function ensures that only the pointer visible to RCU readers (prev->next) is updated using rcu_assign_pointer(), while the rest of the list manipulations are performed with regular assignments, as the source list is private and not visible to concurrent RCU readers. This is useful for moving elements from a private list into a global RCU-protected list, ensuring safe publication for RCU readers. Subsystems with some sort of batching mechanism from userspace can benefit from this new function. The function __list_splice_rcu() has been added for clarity and to follow the same pattern as in the existing list_splice*() interfaces, where there is a check to ensure that the list to splice is not empty. Note that __list_splice_rcu() has no documentation for this reason. Reviewed-by: Paul E. McKenney Signed-off-by: Pablo Neira Ayuso --- include/linux/rculist.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2abba7552605..e3bc44225692 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -261,6 +261,35 @@ static inline void list_replace_rcu(struct list_head *old, old->prev = LIST_POISON2; } +static inline void __list_splice_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + last->next = next; + first->prev = prev; + next->prev = last; + rcu_assign_pointer(list_next_rcu(prev), first); +} + +/** + * list_splice_rcu - splice a non-RCU list into an RCU-protected list, + * designed for stacks. + * @list: the non RCU-protected list to splice + * @head: the place in the existing RCU-protected list to splice + * + * The list pointed to by @head can be RCU-read traversed concurrently with + * this function. + */ +static inline void list_splice_rcu(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice_rcu(list, head, head->next); +} + /** * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice -- cgit v1.2.3 From a6134e62dba2ea4f760b29d5226907f447c92400 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 15 Apr 2026 17:56:14 +0200 Subject: netfilter: nf_tables: join hook list via splice_list_rcu() in commit phase Publish new hooks in the list into the basechain/flowtable using splice_list_rcu() to ensure netlink dump list traversal via rcu is safe while concurrent ruleset update is going on. Fixes: 78d9f48f7f44 ("netfilter: nf_tables: add devices to existing flowtable") Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 07e151245765..ae10116af923 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10838,8 +10838,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_chain_commit_update(nft_trans_container_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); - list_splice(&nft_trans_chain_hooks(trans), - &nft_trans_basechain(trans)->hook_list); + list_splice_rcu(&nft_trans_chain_hooks(trans), + &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); @@ -10968,8 +10968,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); - list_splice(&nft_trans_flowtable_hooks(trans), - &nft_trans_flowtable(trans)->hook_list); + list_splice_rcu(&nft_trans_flowtable_hooks(trans), + &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); nf_tables_flowtable_notify(&ctx, -- cgit v1.2.3 From 10f79dbd7719d1da9f5884d13060322d8729f091 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 15 Apr 2026 22:58:23 +0200 Subject: netfilter: nf_tables: add hook transactions for device deletions Restore the flag that indicates that the hook is going away, ie. NFT_HOOK_REMOVE, but add a new transaction object to track deletion of hooks without altering the basechain/flowtable hook_list during the preparation phase. The existing approach that moves the hook from the basechain/flowtable hook_list to transaction hook_list breaks netlink dump path readers of this RCU-protected list. It should be possible use an array for nft_trans_hook to store the deleted hooks to compact the representation but I am not expecting many hook object, specially now that wildcard support for devices is in place. Note that the nft_trans_chain_hooks() list contains a list of struct nft_trans_hook objects for DELCHAIN and DELFLOWTABLE commands, while this list stores struct nft_hook objects for NEWCHAIN and NEWFLOWTABLE. Note that new commands can be updated to use nft_trans_hook for consistency. This patch also adapts the event notification path to deal with the list of hook transactions. Fixes: 7d937b107108 ("netfilter: nf_tables: support for deleting devices in an existing netdev chain") Fixes: b6d9014a3335 ("netfilter: nf_tables: delete flowtable hooks via transaction list") Reported-by: Xiang Mei Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 13 ++ net/netfilter/nf_tables_api.c | 264 +++++++++++++++++++++++++++++--------- 2 files changed, 217 insertions(+), 60 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2c0173d9309c..cff7b773e972 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1204,12 +1204,15 @@ struct nft_stats { struct u64_stats_sync syncp; }; +#define NFT_HOOK_REMOVE (1 << 0) + struct nft_hook { struct list_head list; struct list_head ops_list; struct rcu_head rcu; char ifname[IFNAMSIZ]; u8 ifnamelen; + u8 flags; }; struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, @@ -1664,6 +1667,16 @@ struct nft_trans { u8 put_net:1; }; +/** + * struct nft_trans_hook - nf_tables hook update in transaction + * @list: used internally + * @hook: struct nft_hook with the device hook + */ +struct nft_trans_hook { + struct list_head list; + struct nft_hook *hook; +}; + /** * struct nft_trans_binding - nf_tables object with binding support in transaction * @nft_trans: base structure, MUST be first member diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ae10116af923..d20ce5c36d31 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -380,6 +380,32 @@ static void nft_netdev_hook_unlink_free_rcu(struct nft_hook *hook) nft_netdev_hook_free_rcu(hook); } +static void nft_trans_hook_destroy(struct nft_trans_hook *trans_hook) +{ + list_del(&trans_hook->list); + kfree(trans_hook); +} + +static void nft_netdev_unregister_trans_hook(struct net *net, + const struct nft_table *table, + struct list_head *hook_list) +{ + struct nft_trans_hook *trans_hook, *next; + struct nf_hook_ops *ops; + struct nft_hook *hook; + + list_for_each_entry_safe(trans_hook, next, hook_list, list) { + hook = trans_hook->hook; + + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); + } + nft_netdev_hook_unlink_free_rcu(hook); + nft_trans_hook_destroy(trans_hook); + } +} + static void nft_netdev_unregister_hooks(struct net *net, struct list_head *hook_list, bool release_netdev) @@ -1946,15 +1972,69 @@ static int nft_nla_put_hook_dev(struct sk_buff *skb, struct nft_hook *hook) return nla_put_string(skb, attr, hook->ifname); } +struct nft_hook_dump_ctx { + struct nft_hook *first; + int n; +}; + +static int nft_dump_basechain_hook_one(struct sk_buff *skb, + struct nft_hook *hook, + struct nft_hook_dump_ctx *dump_ctx) +{ + if (!dump_ctx->first) + dump_ctx->first = hook; + + if (nft_nla_put_hook_dev(skb, hook)) + return -1; + + dump_ctx->n++; + + return 0; +} + +static int nft_dump_basechain_hook_list(struct sk_buff *skb, + const struct net *net, + const struct list_head *hook_list, + struct nft_hook_dump_ctx *dump_ctx) +{ + struct nft_hook *hook; + int err; + + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { + err = nft_dump_basechain_hook_one(skb, hook, dump_ctx); + if (err < 0) + return err; + } + + return 0; +} + +static int nft_dump_basechain_trans_hook_list(struct sk_buff *skb, + const struct list_head *trans_hook_list, + struct nft_hook_dump_ctx *dump_ctx) +{ + struct nft_trans_hook *trans_hook; + int err; + + list_for_each_entry(trans_hook, trans_hook_list, list) { + err = nft_dump_basechain_hook_one(skb, trans_hook->hook, dump_ctx); + if (err < 0) + return err; + } + + return 0; +} + static int nft_dump_basechain_hook(struct sk_buff *skb, const struct net *net, int family, const struct nft_base_chain *basechain, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { const struct nf_hook_ops *ops = &basechain->ops; - struct nft_hook *hook, *first = NULL; + struct nft_hook_dump_ctx dump_hook_ctx = {}; struct nlattr *nest, *nest_devs; - int n = 0; nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); if (nest == NULL) @@ -1969,23 +2049,23 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, if (!nest_devs) goto nla_put_failure; - if (!hook_list) + if (!hook_list && !trans_hook_list) hook_list = &basechain->hook_list; - list_for_each_entry_rcu(hook, hook_list, list, - lockdep_commit_lock_is_held(net)) { - if (!first) - first = hook; - - if (nft_nla_put_hook_dev(skb, hook)) - goto nla_put_failure; - n++; + if (hook_list && + nft_dump_basechain_hook_list(skb, net, hook_list, &dump_hook_ctx)) { + goto nla_put_failure; + } else if (trans_hook_list && + nft_dump_basechain_trans_hook_list(skb, trans_hook_list, + &dump_hook_ctx)) { + goto nla_put_failure; } + nla_nest_end(skb, nest_devs); - if (n == 1 && - !hook_is_prefix(first) && - nla_put_string(skb, NFTA_HOOK_DEV, first->ifname)) + if (dump_hook_ctx.n == 1 && + !hook_is_prefix(dump_hook_ctx.first) && + nla_put_string(skb, NFTA_HOOK_DEV, dump_hook_ctx.first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -1999,7 +2079,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, const struct nft_chain *chain, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { struct nlmsghdr *nlh; @@ -2015,7 +2096,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, NFTA_CHAIN_PAD)) goto nla_put_failure; - if (!hook_list && + if (!hook_list && !trans_hook_list && (event == NFT_MSG_DELCHAIN || event == NFT_MSG_DESTROYCHAIN)) { nlmsg_end(skb, nlh); @@ -2026,7 +2107,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; - if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list)) + if (nft_dump_basechain_hook(skb, net, family, basechain, + hook_list, trans_hook_list)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CHAIN_POLICY, @@ -2062,7 +2144,8 @@ nla_put_failure: } static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, - const struct list_head *hook_list) + const struct list_head *hook_list, + const struct list_head *trans_hook_list) { struct nftables_pernet *nft_net; struct sk_buff *skb; @@ -2082,7 +2165,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event, err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, ctx->family, ctx->table, - ctx->chain, hook_list); + ctx->chain, hook_list, trans_hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -2128,7 +2211,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, NFT_MSG_NEWCHAIN, NLM_F_MULTI, table->family, table, - chain, NULL) < 0) + chain, NULL, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -2182,7 +2265,7 @@ static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, - 0, family, table, chain, NULL); + 0, family, table, chain, NULL, NULL); if (err < 0) goto err_fill_chain_info; @@ -2345,8 +2428,12 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list, list_for_each_entry(hook, hook_list, list) { if (!strncmp(hook->ifname, this->ifname, - min(hook->ifnamelen, this->ifnamelen))) + min(hook->ifnamelen, this->ifnamelen))) { + if (hook->flags & NFT_HOOK_REMOVE) + continue; + return hook; + } } return NULL; @@ -3105,6 +3192,32 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, policy, flags, extack); } +static int nft_trans_delhook(struct nft_hook *hook, + struct list_head *del_list) +{ + struct nft_trans_hook *trans_hook; + + trans_hook = kmalloc_obj(*trans_hook, GFP_KERNEL); + if (!trans_hook) + return -ENOMEM; + + trans_hook->hook = hook; + list_add_tail(&trans_hook->list, del_list); + hook->flags |= NFT_HOOK_REMOVE; + + return 0; +} + +static void nft_trans_delhook_abort(struct list_head *del_list) +{ + struct nft_trans_hook *trans_hook, *next; + + list_for_each_entry_safe(trans_hook, next, del_list, list) { + trans_hook->hook->flags &= ~NFT_HOOK_REMOVE; + nft_trans_hook_destroy(trans_hook); + } +} + static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_base_chain *basechain, struct netlink_ext_ack *extack) @@ -3131,7 +3244,10 @@ static int nft_delchain_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_chain_del_hook; } - list_move(&hook->list, &chain_del_list); + if (nft_trans_delhook(hook, &chain_del_list) < 0) { + err = -ENOMEM; + goto err_chain_del_hook; + } } trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); @@ -3151,7 +3267,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, return 0; err_chain_del_hook: - list_splice(&chain_del_list, &basechain->hook_list); + nft_trans_delhook_abort(&chain_del_list); nft_chain_release_hook(&chain_hook); return err; @@ -8941,6 +9057,24 @@ static void nft_hooks_destroy(struct list_head *hook_list) nft_netdev_hook_unlink_free_rcu(hook); } +static void nft_flowtable_unregister_trans_hook(struct net *net, + struct nft_flowtable *flowtable, + struct list_head *hook_list) +{ + struct nft_trans_hook *trans_hook, *next; + struct nf_hook_ops *ops; + struct nft_hook *hook; + + list_for_each_entry_safe(trans_hook, next, hook_list, list) { + hook = trans_hook->hook; + list_for_each_entry(ops, &hook->ops_list, list) + nft_unregister_flowtable_ops(net, flowtable, ops); + + nft_netdev_hook_unlink_free_rcu(hook); + nft_trans_hook_destroy(trans_hook); + } +} + static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, struct nft_flowtable *flowtable, struct netlink_ext_ack *extack) @@ -9199,7 +9333,10 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_flowtable_del_hook; } - list_move(&hook->list, &flowtable_del_list); + if (nft_trans_delhook(hook, &flowtable_del_list) < 0) { + err = -ENOMEM; + goto err_flowtable_del_hook; + } } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, @@ -9220,7 +9357,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, return 0; err_flowtable_del_hook: - list_splice(&flowtable_del_list, &flowtable->hook_list); + nft_trans_delhook_abort(&flowtable_del_list); nft_flowtable_hook_release(&flowtable_hook); return err; @@ -9285,8 +9422,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, struct nft_flowtable *flowtable, - struct list_head *hook_list) + struct list_head *hook_list, + struct list_head *trans_hook_list) { + struct nft_trans_hook *trans_hook; struct nlattr *nest, *nest_devs; struct nft_hook *hook; struct nlmsghdr *nlh; @@ -9303,7 +9442,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - if (!hook_list && + if (!hook_list && !trans_hook_list && (event == NFT_MSG_DELFLOWTABLE || event == NFT_MSG_DESTROYFLOWTABLE)) { nlmsg_end(skb, nlh); @@ -9325,13 +9464,20 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, if (!nest_devs) goto nla_put_failure; - if (!hook_list) + if (!hook_list && !trans_hook_list) hook_list = &flowtable->hook_list; - list_for_each_entry_rcu(hook, hook_list, list, - lockdep_commit_lock_is_held(net)) { - if (nft_nla_put_hook_dev(skb, hook)) - goto nla_put_failure; + if (hook_list) { + list_for_each_entry_rcu(hook, hook_list, list, + lockdep_commit_lock_is_held(net)) { + if (nft_nla_put_hook_dev(skb, hook)) + goto nla_put_failure; + } + } else if (trans_hook_list) { + list_for_each_entry(trans_hook, trans_hook_list, list) { + if (nft_nla_put_hook_dev(skb, trans_hook->hook)) + goto nla_put_failure; + } } nla_nest_end(skb, nest_devs); nla_nest_end(skb, nest); @@ -9385,7 +9531,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, NFT_MSG_NEWFLOWTABLE, NLM_F_MULTI | NLM_F_APPEND, table->family, - flowtable, NULL) < 0) + flowtable, NULL, NULL) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -9485,7 +9631,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFT_MSG_NEWFLOWTABLE, 0, family, - flowtable, NULL); + flowtable, NULL, NULL); if (err < 0) goto err_fill_flowtable_info; @@ -9498,7 +9644,9 @@ err_fill_flowtable_info: static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct nft_flowtable *flowtable, - struct list_head *hook_list, int event) + struct list_head *hook_list, + struct list_head *trans_hook_list, + int event) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct sk_buff *skb; @@ -9518,7 +9666,8 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, ctx->seq, event, flags, - ctx->family, flowtable, hook_list); + ctx->family, flowtable, + hook_list, trans_hook_list); if (err < 0) { kfree_skb(skb); goto err; @@ -10052,9 +10201,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: - if (nft_trans_chain_update(trans)) - nft_hooks_destroy(&nft_trans_chain_hooks(trans)); - else + if (!nft_trans_chain_update(trans)) nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: @@ -10075,9 +10222,7 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: - if (nft_trans_flowtable_update(trans)) - nft_hooks_destroy(&nft_trans_flowtable_hooks(trans)); - else + if (!nft_trans_flowtable_update(trans)) nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); break; } @@ -10837,31 +10982,28 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nft_chain_commit_update(nft_trans_container_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, - &nft_trans_chain_hooks(trans)); + &nft_trans_chain_hooks(trans), NULL); list_splice_rcu(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, nft_trans_chain(trans)); - nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL, &nft_trans_chain_hooks(trans)); - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_trans_hook(net, table, + &nft_trans_chain_hooks(trans)); } else { nft_chain_del(nft_trans_chain(trans)); nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, - NULL); + NULL, NULL); nf_tables_unregister_hook(ctx.net, ctx.table, nft_trans_chain(trans)); } @@ -10967,6 +11109,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), + NULL, NFT_MSG_NEWFLOWTABLE); list_splice_rcu(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); @@ -10975,6 +11118,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, + NULL, NFT_MSG_NEWFLOWTABLE); } nft_trans_destroy(trans); @@ -10984,16 +11128,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_flowtable_update(trans)) { nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), + NULL, &nft_trans_flowtable_hooks(trans), trans->msg_type); - nft_unregister_flowtable_net_hooks(net, - nft_trans_flowtable(trans), - &nft_trans_flowtable_hooks(trans)); + nft_flowtable_unregister_trans_hook(net, + nft_trans_flowtable(trans), + &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, + NULL, trans->msg_type); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans), @@ -11157,8 +11303,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - list_splice(&nft_trans_chain_hooks(trans), - &nft_trans_basechain(trans)->hook_list); + nft_trans_delhook_abort(&nft_trans_chain_hooks(trans)); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_chain(trans)); @@ -11272,8 +11417,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - list_splice(&nft_trans_flowtable_hooks(trans), - &nft_trans_flowtable(trans)->hook_list); + nft_trans_delhook_abort(&nft_trans_flowtable_hooks(trans)); } else { nft_use_inc_restore(&table->use); nft_clear(trans->net, nft_trans_flowtable(trans)); -- cgit v1.2.3 From 36920f30e78e69df01f9691c470b6f3ba8aebf98 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Apr 2026 12:50:09 -0500 Subject: ipmi: Check event message buffer response for bad data The event message buffer response data size got checked later when processing, but check it right after the response comes back. It appears some BMCs may return an empty message instead of an error when fetching events. There are apparently some new BMCs that make this error, so we need to compensate. Reported-by: Matt Fleming Closes: https://lore.kernel.org/lkml/20260415115930.3428942-1-matt@readmodwrite.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 4a9e9de4d684..08c208cc64c5 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -630,7 +630,13 @@ static void handle_transaction_done(struct smi_info *smi_info) */ msg = smi_info->curr_msg; smi_info->curr_msg = NULL; - if (msg->rsp[2] != 0) { + /* + * It appears some BMCs, with no event data, return no + * data in the message and not a 0x80 error as the + * spec says they should. Shut down processing if + * the data is not the right length. + */ + if (msg->rsp[2] != 0 || msg->rsp_size != 19) { /* Error getting event, probably done. */ msg->done(msg); -- cgit v1.2.3 From 05909810a946222aca5d0611d37be82d18f95228 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Apr 2026 21:17:11 -1000 Subject: tools/sched_ext: scx_qmap: Silence task_ctx lookup miss scx_fork() dispatches ops.init_task to exactly one scheduler - the one owning the forking task's cgroup. A task forked inside a sub-scheduler's cgroup is init'd into the sub only; the root scheduler has no task_ctx entry for it. When that task later appears as @prev in the root's qmap_dispatch() (or flows through core-sched comparison via task_qdist), the bpf_task_storage_get() legitimately misses. qmap treated those misses as fatal via scx_bpf_error("task_ctx lookup failed") and aborted the scheduler as soon as the first cross-sched task hit the root. Drop the error in the sites where the miss is legitimate: lookup_task_ctx() (helper; callers already check for NULL), qmap_dispatch()'s @prev branch (bookkeeping-only), task_qdist() (returns 0 which makes the comparison a no-op), and qmap_select_cpu() (returns prev_cpu as a no-op fallback instead of -ESRCH). The existing scx_error was a paranoid guard from the pre-sub-sched world where every task was owned by the one and only scheduler. v2: qmap_select_cpu() returns prev_cpu on NULL instead of -ESRCH, so the root scheduler doesn't error on cross-sched tasks that pass through it (Andrea Righi). Fixes: 4f8b122848db ("sched_ext: Add basic building blocks for nested sub-scheduler dispatching") Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi Reviewed-by: Zhao Mengmeng --- tools/sched_ext/scx_qmap.bpf.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index b68abb9e760b..aad698fe294b 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -159,13 +159,7 @@ static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu) static struct task_ctx *lookup_task_ctx(struct task_struct *p) { - struct task_ctx *tctx; - - if (!(tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0))) { - scx_bpf_error("task_ctx lookup failed"); - return NULL; - } - return tctx; + return bpf_task_storage_get(&task_ctx_stor, p, 0, 0); } s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p, @@ -175,7 +169,7 @@ s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p, s32 cpu; if (!(tctx = lookup_task_ctx(p))) - return -ESRCH; + return prev_cpu; if (p->scx.weight < 2 && !(p->flags & PF_KTHREAD)) return prev_cpu; @@ -540,13 +534,9 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) */ if (prev) { tctx = bpf_task_storage_get(&task_ctx_stor, prev, 0, 0); - if (!tctx) { - scx_bpf_error("task_ctx lookup failed"); - return; - } - - tctx->core_sched_seq = - core_sched_tail_seqs[weight_to_idx(prev->scx.weight)]++; + if (tctx) + tctx->core_sched_seq = + core_sched_tail_seqs[weight_to_idx(prev->scx.weight)]++; } } @@ -584,10 +574,8 @@ static s64 task_qdist(struct task_struct *p) s64 qdist; tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0); - if (!tctx) { - scx_bpf_error("task_ctx lookup failed"); + if (!tctx) return 0; - } qdist = tctx->core_sched_seq - core_sched_head_seqs[idx]; -- cgit v1.2.3 From b06cf63d83d3b3744d3aefdd2f3ced25e99d7ec1 Mon Sep 17 00:00:00 2001 From: Wang Shuaiwei Date: Tue, 14 Apr 2026 11:37:18 +0800 Subject: scsi: ufs: core: Fix bRefClkFreq write failure in HS-LSS mode According to the UFS spec, the bRefClkFreq attribute can only be written when both sub-links are in LS-MODE. However, in HS LSS mode with resetmode = HS_MODE, if the UFS device's default bRefClkFreq value differs from the host controller's dev_ref_clk_freq setting, the write operation will fail. To fix this issue, introduce ufshcd_get_op_mode() function to detect the current link operational mode. Call ufshcd_set_dev_ref_clk() only when both sub-links are in LS-MODE to ensure the attribute can be written successfully. Signed-off-by: Wang Shuaiwei Link: https://patch.msgid.link/20260414033718.1459540-1-wangshuaiwei1@xiaomi.com Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 30 ++++++++++++++++++++++++++++-- include/ufs/unipro.h | 5 +++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4805e40ed4d7..c3f08957d179 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -9259,6 +9259,30 @@ static void ufshcd_config_mcq(struct ufs_hba *hba) hba->nutrs); } +/** + * ufshcd_get_op_mode - get UFS operating mode. + * @hba: per-adapter instance + * + * Use the PA_PWRMODE value to represent the operating mode of UFS. + * + */ +static enum ufs_op_mode ufshcd_get_op_mode(struct ufs_hba *hba) +{ + u32 mode; + u8 rx_mode; + u8 tx_mode; + + ufshcd_dme_get(hba, UIC_ARG_MIB(PA_PWRMODE), &mode); + rx_mode = (mode >> PWRMODE_RX_OFFSET) & PWRMODE_MASK; + tx_mode = mode & PWRMODE_MASK; + + if ((rx_mode == SLOW_MODE || rx_mode == SLOWAUTO_MODE) && + (tx_mode == SLOW_MODE || tx_mode == SLOWAUTO_MODE)) + return LS_MODE; + + return HS_MODE; +} + static int ufshcd_post_device_init(struct ufs_hba *hba) { int ret; @@ -9281,11 +9305,13 @@ static int ufshcd_post_device_init(struct ufs_hba *hba) return 0; /* - * Set the right value to bRefClkFreq before attempting to + * Set the right value to bRefClkFreq in LS_MODE before attempting to * switch to HS gears. */ - if (hba->dev_ref_clk_freq != REF_CLK_FREQ_INVAL) + if (ufshcd_get_op_mode(hba) == LS_MODE && + hba->dev_ref_clk_freq != REF_CLK_FREQ_INVAL) ufshcd_set_dev_ref_clk(hba); + /* Gear up to HS gear. */ ret = ufshcd_config_pwr_mode(hba, &hba->max_pwr_info.info, UFSHCD_PMC_POLICY_DONT_FORCE); diff --git a/include/ufs/unipro.h b/include/ufs/unipro.h index f849a2a101ae..9c168703b104 100644 --- a/include/ufs/unipro.h +++ b/include/ufs/unipro.h @@ -333,6 +333,11 @@ enum ufs_eom_eye_mask { #define DME_LocalTC0ReplayTimeOutVal 0xD042 #define DME_LocalAFC0ReqTimeOutVal 0xD043 +enum ufs_op_mode { + LS_MODE = 1, + HS_MODE = 2, +}; + /* PA power modes */ enum ufs_pa_pwr_mode { FAST_MODE = 1, -- cgit v1.2.3 From 2f3835771dff512750205aa5f5f61aec0f2b8cb7 Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Tue, 14 Apr 2026 21:07:28 -0700 Subject: scsi: target: iscsi: reject invalid size Extended CDB AHS If ecdb_ahdr->ahslength is zero, two bugs follow: kmalloc(be16_to_cpu(ecdb_ahdr->ahslength) + 15, ...) allocates 15 bytes, but the immediately following memcpy writes ISCSI_CDB_SIZE (16) bytes into it, a one-byte heap overflow. Also: memcpy(cdb + ISCSI_CDB_SIZE, ecdb_ahdr->ecdb, be16_to_cpu(ecdb_ahdr->ahslength) - 1); (u16)0 - 1 promotes to (int)-1 which converts to SIZE_MAX as size_t, causing a massive out-of-bounds write. Reject ahslength == 0 with ISCSI_REASON_PROTOCOL_ERROR before the kmalloc. Also reject ahslength values that exceed the actual AHS buffer advertised. Fixes: 8f1f7d297bce ("scsi: target: iscsi: Add support for extended CDB AHS") Signed-off-by: Carlos Bilbao Reviewed-by: Dmitry Bogdanov Link: https://patch.msgid.link/20260415040728.187680-1-carlos.bilbao@kernel.org Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index e80449f6ce15..cb832fd523af 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -995,6 +995,7 @@ int iscsit_setup_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, int data_direction, payload_length; struct iscsi_ecdb_ahdr *ecdb_ahdr; struct iscsi_scsi_req *hdr; + u16 ahslength, cdb_length; int iscsi_task_attr; unsigned char *cdb; int sam_task_attr; @@ -1108,14 +1109,27 @@ int iscsit_setup_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, ISCSI_REASON_CMD_NOT_SUPPORTED, buf); } - cdb = kmalloc(be16_to_cpu(ecdb_ahdr->ahslength) + 15, - GFP_KERNEL); + ahslength = be16_to_cpu(ecdb_ahdr->ahslength); + if (!ahslength) { + pr_err("Extended CDB AHS with zero length, protocol error.\n"); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); + } + if (ahslength > (hdr->hlength * 4) - 3) { + pr_err("Extended CDB AHS length %u exceeds available PDU buffer.\n", + ahslength); + return iscsit_add_reject_cmd(cmd, + ISCSI_REASON_PROTOCOL_ERROR, buf); + } + + cdb_length = ahslength - 1 + ISCSI_CDB_SIZE; + + cdb = kmalloc(cdb_length, GFP_KERNEL); if (cdb == NULL) return iscsit_add_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); memcpy(cdb, hdr->cdb, ISCSI_CDB_SIZE); - memcpy(cdb + ISCSI_CDB_SIZE, ecdb_ahdr->ecdb, - be16_to_cpu(ecdb_ahdr->ahslength) - 1); + memcpy(cdb + ISCSI_CDB_SIZE, ecdb_ahdr->ecdb, cdb_length - ISCSI_CDB_SIZE); } data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE : -- cgit v1.2.3 From 55d41b0a20128e86b9e960dd2e3f0a2d69a18df7 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 13 Apr 2026 17:12:40 -0400 Subject: udf: reject descriptors with oversized CRC length udf_read_tagged() skips CRC verification when descCRCLength + sizeof(struct tag) exceeds the block size. A crafted UDF image can set descCRCLength to an oversized value to bypass CRC validation entirely; the descriptor is then accepted based solely on the 8-bit tag checksum, which is trivially recomputable. Reject such descriptors instead of silently accepting them. A legitimate single-block descriptor should never have a CRC length that exceeds the block. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260413211240.853662-1-michael.bommarito@gmail.com Signed-off-by: Jan Kara --- fs/udf/misc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 0788593b6a1d..6928e378fbbd 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -230,8 +230,12 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, } /* Verify the descriptor CRC */ - if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize || - le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, + if (le16_to_cpu(tag_p->descCRCLength) + sizeof(struct tag) > sb->s_blocksize) { + udf_err(sb, "block %u: CRC length %u exceeds block size\n", + block, le16_to_cpu(tag_p->descCRCLength)); + goto error_out; + } + if (le16_to_cpu(tag_p->descCRC) == crc_itu_t(0, bh->b_data + sizeof(struct tag), le16_to_cpu(tag_p->descCRCLength))) return bh; -- cgit v1.2.3 From bddb911d28d4412a9462e73766a706ff0d74fa77 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 20 Apr 2026 09:02:28 -0700 Subject: nvme: skip trace completion for host path errors The command was never dispatched for the driver's "host path error", so the command was never actually initialized and there's no corresponding submit trace for the completion. Reported-by: Minsik Jeon Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fead3b7cd4be..d62adebfdc68 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -454,11 +454,10 @@ void nvme_end_req(struct request *req) blk_mq_end_request(req, status); } -void nvme_complete_rq(struct request *req) +static void __nvme_complete_rq(struct request *req) { struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; - trace_nvme_complete_rq(req); nvme_cleanup_cmd(req); /* @@ -493,6 +492,12 @@ void nvme_complete_rq(struct request *req) return; } } + +void nvme_complete_rq(struct request *req) +{ + trace_nvme_complete_rq(req); + __nvme_complete_rq(req); +} EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_complete_batch_req(struct request *req) @@ -513,7 +518,7 @@ blk_status_t nvme_host_path_error(struct request *req) { nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; blk_mq_set_request_complete(req); - nvme_complete_rq(req); + __nvme_complete_rq(req); return BLK_STS_OK; } EXPORT_SYMBOL_GPL(nvme_host_path_error); -- cgit v1.2.3 From f920ebd03cd13eb0976d18de77adf325b5461361 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Fri, 17 Apr 2026 10:48:08 +1000 Subject: Revert "nvmet-tcp: Don't free SQ on authentication success" In an attempt to fix REPLACETLSPSK we stopped freeing the secrets on successful connections. This resulted in memory leaks in the kernel, so let's revert the commit. A improved fix is being developed to just avoid clearing the tls_key variable. This reverts commit 2e6eb6b277f593b98f151ea8eff1beb558bbea3b. Closes: https://lore.kernel.org/linux-nvme/CAHj4cs-u3MWQR4idywptMfjEYi4YwObWFx4KVib35dZ5HMBDdw@mail.gmail.com Reviewed-by: Chris Leech Reviewed-by: Hannes Reinecke Signed-off-by: Alistair Francis Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd-auth.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index b9ab80c7a694..f1e613e7c63e 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -395,10 +395,9 @@ done: goto complete; } /* Final states, clear up variables */ - if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) { - nvmet_auth_sq_free(req->sq); + nvmet_auth_sq_free(req->sq); + if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) nvmet_ctrl_fatal_error(ctrl); - } complete: nvmet_req_complete(req, status); @@ -574,7 +573,9 @@ void nvmet_execute_auth_receive(struct nvmet_req *req) status = nvmet_copy_to_sgl(req, 0, d, al); kfree(d); done: - if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) { + if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2) + nvmet_auth_sq_free(req->sq); + else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) { nvmet_auth_sq_free(req->sq); nvmet_ctrl_fatal_error(ctrl); } -- cgit v1.2.3 From 5fc422951c962cc01e654950fc043ebd8fadd865 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Fri, 17 Apr 2026 10:48:09 +1000 Subject: nvmet-tcp: Don't clear tls_key when freeing sq Curently after the host sends a REPLACETLSPSK we free the TLS keys as part of calling nvmet_auth_sq_free() on success. This means when the host sends a follow up REPLACETLSPSK we return CONCAT_MISMATCH as the check for !nvmet_queue_tls_keyid(req->sq) fails. A previous attempt to fix this involed not calling nvmet_auth_sq_free() on successful connections, but that results in memory leaks. Instead we should not clear `tls_key` in nvmet_auth_sq_free(), as that was incorrectly wiping the tls keys which are used for the session. This patch ensures we correctly free the ephemeral session key on connection, yet we don't free the TLS key unless closing the connection. Reviewed-by: Chris Leech Reviewed-by: Hannes Reinecke Signed-off-by: Alistair Francis Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index b34610e2f19d..723b6d5604c1 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -229,9 +229,6 @@ out_unlock: void nvmet_auth_sq_free(struct nvmet_sq *sq) { cancel_delayed_work(&sq->auth_expired_work); -#ifdef CONFIG_NVME_TARGET_TCP_TLS - sq->tls_key = NULL; -#endif kfree(sq->dhchap_c1); sq->dhchap_c1 = NULL; kfree(sq->dhchap_c2); -- cgit v1.2.3 From 26bb12b9caafa2e62d638104bf2732f610cdbb0b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 13 Apr 2026 10:16:28 -0700 Subject: nvme-tcp: teardown circular locking fixes When a controller reset is triggered via sysfs (by writing to /sys/class/nvme//reset_controller), the reset work tears down and re-establishes all queues. The socket release using fput() defers the actual cleanup to task_work delayed_fput workqueue. This deferred cleanup can race with the subsequent queue re-allocation during reset, potentially leading to use-after-free or resource conflicts. Replace fput() with __fput_sync() to ensure synchronous socket release, guaranteeing that all socket resources are fully cleaned up before the function returns. This prevents races during controller reset where new queue setup may begin before the old socket is fully released. * Call chain during reset: nvme_reset_ctrl_work() -> nvme_tcp_teardown_ctrl() -> nvme_tcp_teardown_io_queues() -> nvme_tcp_free_io_queues() -> nvme_tcp_free_queue() <-- fput() -> __fput_sync() -> nvme_tcp_teardown_admin_queue() -> nvme_tcp_free_admin_queue() -> nvme_tcp_free_queue() <-- fput() -> __fput_sync() -> nvme_tcp_setup_ctrl() <-- race with deferred fput memalloc_noreclaim_save() sets PF_MEMALLOC which is intended for tasks performing memory reclaim work that need reserve access. While PF_MEMALLOC prevents the task from entering direct reclaim (causing __need_reclaim() to return false), it does not strip __GFP_IO from gfp flags. The allocator can therefore still trigger writeback I/O when __GFP_IO remains set, which is unsafe when the caller holds block layer locks. Switch to memalloc_noio_save() which sets PF_MEMALLOC_NOIO. This causes current_gfp_context() to strip __GFP_IO|__GFP_FS from every allocation in the scope, making it safe to allocate memory while holding elevator_lock and set->srcu. * The issue can be reproduced using blktests: nvme_trtype=tcp ./check nvme/005 blktests (master) # nvme_trtype=tcp ./check nvme/005 nvme/005 (tr=tcp) (reset local loopback target) [failed] runtime 0.725s ... 0.798s something found in dmesg: [ 108.473940] run blktests nvme/005 at 2025-11-22 16:12:20 [...] ... (See '/root/blktests/results/nodev_tr_tcp/nvme/005.dmesg' for the entire message) blktests (master) # cat /root/blktests/results/nodev_tr_tcp/nvme/005.dmesg [ 108.473940] run blktests nvme/005 at 2025-11-22 16:12:20 [ 108.526983] loop0: detected capacity change from 0 to 2097152 [ 108.555606] nvmet: adding nsid 1 to subsystem blktests-subsystem-1 [ 108.572531] nvmet_tcp: enabling port 0 (127.0.0.1:4420) [ 108.613061] nvmet: Created nvm controller 1 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. [ 108.616832] nvme nvme0: creating 48 I/O queues. [ 108.630791] nvme nvme0: mapped 48/0/0 default/read/poll queues. [ 108.661892] nvme nvme0: new ctrl: NQN "blktests-subsystem-1", addr 127.0.0.1:4420, hostnqn: nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349 [ 108.746639] nvmet: Created nvm controller 2 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. [ 108.748466] nvme nvme0: creating 48 I/O queues. [ 108.802984] nvme nvme0: mapped 48/0/0 default/read/poll queues. [ 108.829983] nvme nvme0: Removing ctrl: NQN "blktests-subsystem-1" [ 108.854288] block nvme0n1: no available path - failing I/O [ 108.854344] block nvme0n1: no available path - failing I/O [ 108.854373] Buffer I/O error on dev nvme0n1, logical block 1, async page read [ 108.891693] ====================================================== [ 108.895912] WARNING: possible circular locking dependency detected [ 108.900184] 6.17.0nvme+ #3 Tainted: G N [ 108.903913] ------------------------------------------------------ [ 108.908171] nvme/2734 is trying to acquire lock: [ 108.911957] ffff88810210e610 (set->srcu){.+.+}-{0:0}, at: __synchronize_srcu+0x17/0x170 [ 108.917587] but task is already holding lock: [ 108.921570] ffff88813abea198 (&q->elevator_lock){+.+.}-{4:4}, at: elevator_change+0xa8/0x1c0 [ 108.927361] which lock already depends on the new lock. [ 108.933018] the existing dependency chain (in reverse order) is: [ 108.938223] -> #4 (&q->elevator_lock){+.+.}-{4:4}: [ 108.942988] __mutex_lock+0xa2/0x1150 [ 108.945873] elevator_change+0xa8/0x1c0 [ 108.948925] elv_iosched_store+0xdf/0x140 [ 108.952043] kernfs_fop_write_iter+0x16a/0x220 [ 108.955367] vfs_write+0x378/0x520 [ 108.957598] ksys_write+0x67/0xe0 [ 108.959721] do_syscall_64+0x76/0xbb0 [ 108.962052] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 108.965145] -> #3 (&q->q_usage_counter(io)){++++}-{0:0}: [ 108.968923] blk_alloc_queue+0x30e/0x350 [ 108.972117] blk_mq_alloc_queue+0x61/0xd0 [ 108.974677] scsi_alloc_sdev+0x2a0/0x3e0 [ 108.977092] scsi_probe_and_add_lun+0x1bd/0x430 [ 108.979921] __scsi_add_device+0x109/0x120 [ 108.982504] ata_scsi_scan_host+0x97/0x1c0 [ 108.984365] async_run_entry_fn+0x2d/0x130 [ 108.986109] process_one_work+0x20e/0x630 [ 108.987830] worker_thread+0x184/0x330 [ 108.989473] kthread+0x10a/0x250 [ 108.990852] ret_from_fork+0x297/0x300 [ 108.992491] ret_from_fork_asm+0x1a/0x30 [ 108.994159] -> #2 (fs_reclaim){+.+.}-{0:0}: [ 108.996320] fs_reclaim_acquire+0x99/0xd0 [ 108.998058] kmem_cache_alloc_node_noprof+0x4e/0x3c0 [ 109.000123] __alloc_skb+0x15f/0x190 [ 109.002195] tcp_send_active_reset+0x3f/0x1e0 [ 109.004038] tcp_disconnect+0x50b/0x720 [ 109.005695] __tcp_close+0x2b8/0x4b0 [ 109.007227] tcp_close+0x20/0x80 [ 109.008663] inet_release+0x31/0x60 [ 109.010175] __sock_release+0x3a/0xc0 [ 109.011778] sock_close+0x14/0x20 [ 109.013263] __fput+0xee/0x2c0 [ 109.014673] delayed_fput+0x31/0x50 [ 109.016183] process_one_work+0x20e/0x630 [ 109.017897] worker_thread+0x184/0x330 [ 109.019543] kthread+0x10a/0x250 [ 109.020929] ret_from_fork+0x297/0x300 [ 109.022565] ret_from_fork_asm+0x1a/0x30 [ 109.024194] -> #1 (sk_lock-AF_INET-NVME){+.+.}-{0:0}: [ 109.026634] lock_sock_nested+0x2e/0x70 [ 109.028251] tcp_sendmsg+0x1a/0x40 [ 109.029783] sock_sendmsg+0xed/0x110 [ 109.031321] nvme_tcp_try_send_cmd_pdu+0x13e/0x260 [nvme_tcp] [ 109.034263] nvme_tcp_try_send+0xb3/0x330 [nvme_tcp] [ 109.036375] nvme_tcp_queue_rq+0x342/0x3d0 [nvme_tcp] [ 109.038528] blk_mq_dispatch_rq_list+0x297/0x800 [ 109.040448] __blk_mq_sched_dispatch_requests+0x3db/0x5f0 [ 109.042677] blk_mq_sched_dispatch_requests+0x29/0x70 [ 109.044787] blk_mq_run_work_fn+0x76/0x1b0 [ 109.046535] process_one_work+0x20e/0x630 [ 109.048245] worker_thread+0x184/0x330 [ 109.049890] kthread+0x10a/0x250 [ 109.051331] ret_from_fork+0x297/0x300 [ 109.053024] ret_from_fork_asm+0x1a/0x30 [ 109.054740] -> #0 (set->srcu){.+.+}-{0:0}: [ 109.056850] __lock_acquire+0x1468/0x2210 [ 109.058614] lock_sync+0xa5/0x110 [ 109.060048] __synchronize_srcu+0x49/0x170 [ 109.061802] elevator_switch+0xc9/0x330 [ 109.063950] elevator_change+0x128/0x1c0 [ 109.065675] elevator_set_none+0x4c/0x90 [ 109.067316] blk_unregister_queue+0xa8/0x110 [ 109.069165] __del_gendisk+0x14e/0x3c0 [ 109.070824] del_gendisk+0x75/0xa0 [ 109.072328] nvme_ns_remove+0xf2/0x230 [nvme_core] [ 109.074365] nvme_remove_namespaces+0xf2/0x150 [nvme_core] [ 109.076652] nvme_do_delete_ctrl+0x71/0x90 [nvme_core] [ 109.078775] nvme_delete_ctrl_sync+0x3b/0x50 [nvme_core] [ 109.081009] nvme_sysfs_delete+0x34/0x40 [nvme_core] [ 109.083082] kernfs_fop_write_iter+0x16a/0x220 [ 109.085009] vfs_write+0x378/0x520 [ 109.086539] ksys_write+0x67/0xe0 [ 109.087982] do_syscall_64+0x76/0xbb0 [ 109.089577] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 109.091665] other info that might help us debug this: [ 109.095478] Chain exists of: set->srcu --> &q->q_usage_counter(io) --> &q->elevator_lock [ 109.099544] Possible unsafe locking scenario: [ 109.101708] CPU0 CPU1 [ 109.103402] ---- ---- [ 109.105103] lock(&q->elevator_lock); [ 109.106530] lock(&q->q_usage_counter(io)); [ 109.109022] lock(&q->elevator_lock); [ 109.111391] sync(set->srcu); [ 109.112586] *** DEADLOCK *** [ 109.114772] 5 locks held by nvme/2734: [ 109.116189] #0: ffff888101925410 (sb_writers#4){.+.+}-{0:0}, at: ksys_write+0x67/0xe0 [ 109.119143] #1: ffff88817a914e88 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0x10f/0x220 [ 109.123141] #2: ffff8881046313f8 (kn->active#185){++++}-{0:0}, at: sysfs_remove_file_self+0x26/0x50 [ 109.126543] #3: ffff88810470e1d0 (&set->update_nr_hwq_lock){++++}-{4:4}, at: del_gendisk+0x6d/0xa0 [ 109.129891] #4: ffff88813abea198 (&q->elevator_lock){+.+.}-{4:4}, at: elevator_change+0xa8/0x1c0 [ 109.133149] stack backtrace: [ 109.134817] CPU: 6 UID: 0 PID: 2734 Comm: nvme Tainted: G N 6.17.0nvme+ #3 PREEMPT(voluntary) [ 109.134819] Tainted: [N]=TEST [ 109.134820] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 109.134821] Call Trace: [ 109.134823] [ 109.134824] dump_stack_lvl+0x75/0xb0 [ 109.134828] print_circular_bug+0x26a/0x330 [ 109.134831] check_noncircular+0x12f/0x150 [ 109.134834] __lock_acquire+0x1468/0x2210 [ 109.134837] ? __synchronize_srcu+0x17/0x170 [ 109.134838] lock_sync+0xa5/0x110 [ 109.134840] ? __synchronize_srcu+0x17/0x170 [ 109.134842] __synchronize_srcu+0x49/0x170 [ 109.134843] ? mark_held_locks+0x49/0x80 [ 109.134845] ? _raw_spin_unlock_irqrestore+0x2d/0x60 [ 109.134847] ? kvm_clock_get_cycles+0x14/0x30 [ 109.134853] ? ktime_get_mono_fast_ns+0x36/0xb0 [ 109.134858] elevator_switch+0xc9/0x330 [ 109.134860] elevator_change+0x128/0x1c0 [ 109.134862] ? kernfs_put.part.0+0x86/0x290 [ 109.134864] elevator_set_none+0x4c/0x90 [ 109.134866] blk_unregister_queue+0xa8/0x110 [ 109.134868] __del_gendisk+0x14e/0x3c0 [ 109.134870] del_gendisk+0x75/0xa0 [ 109.134872] nvme_ns_remove+0xf2/0x230 [nvme_core] [ 109.134879] nvme_remove_namespaces+0xf2/0x150 [nvme_core] [ 109.134887] nvme_do_delete_ctrl+0x71/0x90 [nvme_core] [ 109.134893] nvme_delete_ctrl_sync+0x3b/0x50 [nvme_core] [ 109.134899] nvme_sysfs_delete+0x34/0x40 [nvme_core] [ 109.134905] kernfs_fop_write_iter+0x16a/0x220 [ 109.134908] vfs_write+0x378/0x520 [ 109.134911] ksys_write+0x67/0xe0 [ 109.134913] do_syscall_64+0x76/0xbb0 [ 109.134915] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 109.134916] RIP: 0033:0x7fd68a737317 [ 109.134917] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 [ 109.134919] RSP: 002b:00007ffded1546d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 109.134920] RAX: ffffffffffffffda RBX: 000000000054f7e0 RCX: 00007fd68a737317 [ 109.134921] RDX: 0000000000000001 RSI: 00007fd68a855719 RDI: 0000000000000003 [ 109.134921] RBP: 0000000000000003 R08: 0000000030407850 R09: 00007fd68a7cd4e0 [ 109.134922] R10: 00007fd68a65b130 R11: 0000000000000246 R12: 00007fd68a855719 [ 109.134923] R13: 00000000304074c0 R14: 00000000304074c0 R15: 0000000030408660 [ 109.134926] [ 109.962756] Key type psk unregistered Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 02c95c32b07e..15d36d6a728e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1438,18 +1438,32 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; - unsigned int noreclaim_flag; + unsigned int noio_flag; if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) return; page_frag_cache_drain(&queue->pf_cache); - noreclaim_flag = memalloc_noreclaim_save(); - /* ->sock will be released by fput() */ - fput(queue->sock->file); + /** + * Prevent memory reclaim from triggering block I/O during socket + * teardown. The socket release path fput -> tcp_close -> + * tcp_disconnect -> tcp_send_active_reset may allocate memory, and + * allowing reclaim to issue I/O could deadlock if we're being called + * from block device teardown (e.g., del_gendisk -> elevator cleanup) + * which holds locks that the I/O completion path needs. + */ + noio_flag = memalloc_noio_save(); + + /** + * Release the socket synchronously. During reset in + * nvme_reset_ctrl_work(), queue teardown is immediately followed by + * re-allocation. fput() defers socket cleanup to delayed_fput_work + * in workqueue context, which can race with new queue setup. + */ + __fput_sync(queue->sock->file); queue->sock = NULL; - memalloc_noreclaim_restore(noreclaim_flag); + memalloc_noio_restore(noio_flag); kfree(queue->pdu); mutex_destroy(&queue->send_mutex); @@ -1901,8 +1915,8 @@ err_init_connect: err_rcv_pdu: kfree(queue->pdu); err_sock: - /* ->sock will be released by fput() */ - fput(queue->sock->file); + /* Use sync variant - see nvme_tcp_free_queue() for explanation */ + __fput_sync(queue->sock->file); queue->sock = NULL; err_destroy_mutex: mutex_destroy(&queue->send_mutex); -- cgit v1.2.3 From 5d10069e1a1691a0d8642e1fa65f4c1869210299 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Fri, 17 Apr 2026 10:50:48 +1000 Subject: nvme-auth: Include SC_C in RVAL controller hash Section 8.3.4.5.5 of the NVMe Base Specification 2.1 describes what is included in the Response Value (RVAL) hash and SC_C should be included. Currently we are hardcoding 0 instead of using the correct SC_C value. Update the host and target code to use the SC_C when calculating the RVAL instead of using 0. Fixes: e88a7595b57f2 ("nvme-tcp: request secure channel concatenation") Reviewed-by: Chris Leech Reviewed-by: Hannes Reinecke Signed-off-by: Alistair Francis Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 3 ++- drivers/nvme/target/auth.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bbedbe181c8a..63f543e80998 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -535,11 +535,12 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl, put_unaligned_le16(chap->transaction, buf); nvme_auth_hmac_update(&hmac, buf, 2); - memset(buf, 0, 4); + *buf = chap->sc_c; nvme_auth_hmac_update(&hmac, buf, 1); nvme_auth_hmac_update(&hmac, "Controller", 10); nvme_auth_hmac_update(&hmac, ctrl->opts->subsysnqn, strlen(ctrl->opts->subsysnqn)); + memset(buf, 0, 4); nvme_auth_hmac_update(&hmac, buf, 1); nvme_auth_hmac_update(&hmac, ctrl->opts->host->nqn, strlen(ctrl->opts->host->nqn)); diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 723b6d5604c1..c35c427ca2ac 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -399,11 +399,12 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, put_unaligned_le16(req->sq->dhchap_tid, buf); nvme_auth_hmac_update(&hmac, buf, 2); - memset(buf, 0, 4); + *buf = req->sq->sc_c; nvme_auth_hmac_update(&hmac, buf, 1); nvme_auth_hmac_update(&hmac, "Controller", 10); nvme_auth_hmac_update(&hmac, ctrl->subsys->subsysnqn, strlen(ctrl->subsys->subsysnqn)); + memset(buf, 0, 4); nvme_auth_hmac_update(&hmac, buf, 1); nvme_auth_hmac_update(&hmac, ctrl->hostnqn, strlen(ctrl->hostnqn)); nvme_auth_hmac_final(&hmac, response); -- cgit v1.2.3 From 1cc4cdae2a3b7730d462d69e30f213fd2efe7807 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 21 Apr 2026 09:14:02 -0700 Subject: nvme-pci: fix missed admin queue sq doorbell write We can batch admin commands submitted through io_uring_cmd passthrough, which means bd->last may be false and skips the doorbell write to aggregate multiple commands per write. If a subsequent command can't be dispatched for whatever reason, we have to provide the blk-mq ops' commit_rqs callback in order to ensure we properly update the doorbell. Fixes: 58e5bdeb9c2b ("nvme: enable uring-passthrough for admin commands") Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bcf68c198f74..f953237922da 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2239,6 +2239,7 @@ release_cq: static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, + .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_admin_init_hctx, .init_request = nvme_pci_init_request, .timeout = nvme_timeout, -- cgit v1.2.3 From 846c76ecc02973b05ae909dd4248c11bfa277fc1 Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Tue, 21 Apr 2026 23:58:01 +0800 Subject: bpf: Reject TCP_NODELAY in TCP header option callbacks A BPF_SOCK_OPS program can enable BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG and then call bpf_setsockopt(TCP_NODELAY) from BPF_SOCK_OPS_HDR_OPT_LEN_CB or BPF_SOCK_OPS_WRITE_HDR_OPT_CB. In these callbacks, bpf_setsockopt(TCP_NODELAY) can reach __tcp_sock_set_nodelay(), which can call tcp_push_pending_frames(). >From BPF_SOCK_OPS_HDR_OPT_LEN_CB, tcp_push_pending_frames() can call tcp_current_mss(), which calls tcp_established_options() and re-enters bpf_skops_hdr_opt_len(). BPF_SOCK_OPS_HDR_OPT_LEN_CB -> bpf_setsockopt(TCP_NODELAY) -> tcp_push_pending_frames() -> tcp_current_mss() -> tcp_established_options() -> bpf_skops_hdr_opt_len() -> BPF_SOCK_OPS_HDR_OPT_LEN_CB >From BPF_SOCK_OPS_WRITE_HDR_OPT_CB, tcp_push_pending_frames() can call tcp_write_xmit(), which calls tcp_transmit_skb(). That path recomputes header option length through tcp_established_options() and bpf_skops_hdr_opt_len() before re-entering bpf_skops_write_hdr_opt(). BPF_SOCK_OPS_WRITE_HDR_OPT_CB -> bpf_setsockopt(TCP_NODELAY) -> tcp_push_pending_frames() -> tcp_write_xmit() -> tcp_transmit_skb() -> tcp_established_options() -> bpf_skops_hdr_opt_len() -> bpf_skops_write_hdr_opt() -> BPF_SOCK_OPS_WRITE_HDR_OPT_CB This leads to unbounded recursion and can overflow the kernel stack. Reject TCP_NODELAY with -EOPNOTSUPP in bpf_sock_ops_setsockopt() when bpf_setsockopt() is called from BPF_SOCK_OPS_HDR_OPT_LEN_CB or BPF_SOCK_OPS_WRITE_HDR_OPT_CB. Fixes: 7e41df5dbba2 ("bpf: Add a few optnames to bpf_setsockopt") Closes: https://lore.kernel.org/bpf/d1d523c9-6901-4454-a183-94462b8f3e4e@std.uestc.edu.cn/ Reported-by: Quan Sun <2022090917019@std.uestc.edu.cn> Reported-by: Yinhao Hu Reported-by: Kaiyan Mei Signed-off-by: KaFai Wan Signed-off-by: Martin KaFai Lau Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260421155804.135786-2-kafai.wan@linux.dev --- net/core/filter.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 5fa9189eb772..96849f4c1fbc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5833,6 +5833,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, if (!is_locked_tcp_sock_ops(bpf_sock)) return -EOPNOTSUPP; + /* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */ + if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB || + bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) && + level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); } -- cgit v1.2.3 From 54377fcab51f6f1f8807827d3751be42279e1a6a Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Tue, 21 Apr 2026 23:58:02 +0800 Subject: bpf: Reject TCP_NODELAY in bpf-tcp-cc A BPF TCP congestion control program can call bpf_setsockopt() from its callbacks. In current kernels, if it calls bpf_setsockopt(TCP_NODELAY) from cwnd_event_tx_start(), the call can re-enter the TCP transmit path before the outer tcp_transmit_skb() has completed and advanced the send head. This can re-trigger CA_EVENT_TX_START and lead to unbounded recursion: tcp_transmit_skb() -> tcp_event_data_sent() -> tcp_ca_event(sk, CA_EVENT_TX_START) -> cwnd_event_tx_start() -> bpf_setsockopt(TCP_NODELAY) -> tcp_push_pending_frames() -> tcp_write_xmit() -> tcp_transmit_skb() This leads to unbounded recursion and can overflow the kernel stack. Reject TCP_NODELAY with -EOPNOTSUPP for bpf-tcp-cc by introducing a dedicated setsockopt proto for BPF_PROG_TYPE_STRUCT_OPS TCP congestion control programs. To keep it simple, all tcp-cc ops is rejected for TCP_NODELAY. Fixes: 7e41df5dbba2 ("bpf: Add a few optnames to bpf_setsockopt") Suggested-by: Martin KaFai Lau Signed-off-by: KaFai Wan Signed-off-by: Martin KaFai Lau Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260421155804.135786-3-kafai.wan@linux.dev --- include/linux/bpf.h | 1 + net/core/filter.c | 24 ++++++++++++++++++++++++ net/ipv4/bpf_tcp_ca.c | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4b703c90ca9..01e203964892 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto; extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto; extern const struct bpf_func_proto bpf_find_vma_proto; diff --git a/net/core/filter.c b/net/core/filter.c index 96849f4c1fbc..2914f5330310 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5688,6 +5688,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + /* + * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters + * CA_EVENT_TX_START in bpf_tcp_cc. + */ + if (level == SOL_TCP && optname == TCP_NODELAY) + return -EOPNOTSUPP; + + return _bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = { + .func = bpf_sk_setsockopt_nodelay, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, int, optname, char *, optval, int, optlen) { diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 008edc7f6688..791e15063237 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, */ if (prog_ops_moff(prog) != offsetof(struct tcp_congestion_ops, release)) - return &bpf_sk_setsockopt_proto; + return &bpf_sk_setsockopt_nodelay_proto; return NULL; case BPF_FUNC_getsockopt: /* Since get/setsockopt is usually expected to -- cgit v1.2.3 From 52b6b5334924d8f083a2abe8edeface9206e13ee Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Tue, 21 Apr 2026 23:58:03 +0800 Subject: selftests/bpf: Test TCP_NODELAY in TCP hdr opt callbacks Add a sockops selftest for the TCP_NODELAY restriction in BPF_SOCK_OPS_HDR_OPT_LEN_CB and BPF_SOCK_OPS_WRITE_HDR_OPT_CB. With BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG enabled, bpf_setsockopt(TCP_NODELAY) returns -EOPNOTSUPP from BPF_SOCK_OPS_HDR_OPT_LEN_CB and BPF_SOCK_OPS_WRITE_HDR_OPT_CB, avoiding unbounded recursion and kernel stack overflow. Other cases continue to work as before, including BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB. Signed-off-by: KaFai Wan Signed-off-by: Martin KaFai Lau Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260421155804.135786-4-kafai.wan@linux.dev --- tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 4 ++++ .../selftests/bpf/progs/test_misc_tcp_hdr_options.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 56685fc03c7e..80e6315da2a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -507,6 +507,10 @@ static void misc(void) ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp"); + ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok"); + ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject"); + ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject"); + check_linum: ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum"); sk_fds_close(&sk_fds); diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index d487153a839d..ed5a0011b863 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -29,6 +29,10 @@ unsigned int nr_syn = 0; unsigned int nr_fin = 0; unsigned int nr_hwtstamp = 0; +bool nodelay_est_ok = false; +bool nodelay_hdr_len_reject = false; +bool nodelay_write_hdr_reject = false; + /* Check the header received from the active side */ static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) { @@ -300,7 +304,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops) SEC("sockops") int misc_estab(struct bpf_sock_ops *skops) { - int true_val = 1; + int true_val = 1, false_val = 0, ret; switch (skops->op) { case BPF_SOCK_OPS_TCP_LISTEN_CB: @@ -316,10 +320,19 @@ int misc_estab(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_hdr_len_reject = true; return handle_hdr_opt_len(skops); case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_write_hdr_reject = true; return handle_write_hdr_opt(skops); case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + ret = bpf_setsockopt(skops, SOL_TCP, TCP_NODELAY, &false_val, sizeof(false_val)); + if (!ret) + nodelay_est_ok = true; return handle_passive_estab(skops); } -- cgit v1.2.3 From 2c7e33f1fc2e75fcfb4aa5d840bcd2e8b53c1847 Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Tue, 21 Apr 2026 23:58:04 +0800 Subject: selftests/bpf: Verify bpf-tcp-cc rejects TCP_NODELAY Add a bpf_tcp_ca selftest for the TCP_NODELAY restriction in bpf-tcp-cc. Update bpf_cubic to exercise init() and cwnd_event_tx_start(), and check that both callbacks reject bpf_setsockopt(TCP_NODELAY) with -EOPNOTSUPP. Signed-off-by: KaFai Wan Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260421155804.135786-5-kafai.wan@linux.dev --- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 4 ++++ tools/testing/selftests/bpf/progs/bpf_cubic.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index f829b6f09bc9..fe30181e6336 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -112,6 +112,10 @@ static void test_cubic(void) ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option"); + ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject, + "cwnd_event_tx_start reject nodelay option"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index ce18a4db813f..ebd5a1e69f56 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -16,6 +16,7 @@ #include "bpf_tracing_net.h" #include +#include char _license[] SEC("license") = "GPL"; @@ -170,10 +171,18 @@ static void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } +bool nodelay_init_reject = false; +bool nodelay_cwnd_event_tx_start_reject = false; + SEC("struct_ops") void BPF_PROG(bpf_cubic_init, struct sock *sk) { struct bpf_bictcp *ca = inet_csk_ca(sk); + int true_val = 1, ret; + + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_init_reject = true; bictcp_reset(ca); @@ -189,8 +198,13 @@ void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) { struct bpf_bictcp *ca = inet_csk_ca(sk); __u32 now = tcp_jiffies32; + int true_val = 1, ret; __s32 delta; + ret = bpf_setsockopt(sk, SOL_TCP, TCP_NODELAY, &true_val, sizeof(true_val)); + if (ret == -EOPNOTSUPP) + nodelay_cwnd_event_tx_start_reject = true; + delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. -- cgit v1.2.3 From bd7b7ce96db4487bb77692a85ee4489fd2c395df Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Wed, 22 Apr 2026 12:06:36 -0700 Subject: nvme-auth: Hash DH shared secret to create session key The NVMe Base Specification 8.3.5.5.9 states that the session key Ks shall be computed from the ephemeral DH key by applying the hash function selected by the HashID parameter. The current implementation stores the raw DH shared secret as the session key without hashing it. This causes redundant hash operations: 1. Augmented challenge computation (section 8.3.5.5.4) requires Ca = HMAC(H(g^xy mod p), C). The code compensates by hashing the unhashed session key in nvme_auth_augmented_challenge() to produce the correct result. 2. PSK generation (section 8.3.5.5.9) requires PSK = HMAC(Ks, C1 || C2) where Ks should already be H(g^xy mod p). As the DH shared secret is always larger than the HMAC block size, HMAC internally hashes it before use, accidentally producing the correct result. When using secure channel concatenation with bidirectional authentication, this results in hashing the DH value three times: twice for augmented challenge calculations and once during PSK generation. Fix this by: - Modifying nvme_auth_gen_shared_secret() to hash the DH shared secret once after computation: Ks = H(g^xy mod p) - Removing the hash operation from nvme_auth_augmented_challenge() as the session key is now already hashed - Updating session key buffer size from DH key size to hash output size - Adding specification references in comments This avoid storing the raw DH shared secret and reduces the number of hash operations from three to one when using secure channel concatenation. Reviewed-by: Hannes Reinecke Reviewed-by: Eric Biggers Signed-off-by: Chris Leech Signed-off-by: Keith Busch --- drivers/nvme/common/auth.c | 94 ++++++++++++++++++++++++++++++++++++---------- drivers/nvme/host/auth.c | 13 ++++--- drivers/nvme/target/auth.c | 15 ++++---- include/linux/nvme-auth.h | 6 +-- 4 files changed, 92 insertions(+), 36 deletions(-) diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index 2d325fb93083..77f1d22512f8 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -351,18 +351,29 @@ struct nvme_dhchap_key *nvme_auth_transform_key( } EXPORT_SYMBOL_GPL(nvme_auth_transform_key); +/** + * nvme_auth_augmented_challenge() - Compute the augmented DH-HMAC-CHAP challenge + * @hmac_id: Hash algorithm identifier + * @skey: Session key + * @skey_len: Length of @skey + * @challenge: Challenge value + * @aug: Output buffer for the augmented challenge + * @hlen: Hash output length (length of @challenge and @aug) + * + * NVMe base specification 8.3.5.5.4: The augmented challenge is computed + * applying the HMAC function using the hash function H() selected by the + * HashID parameter ... with the hash of the ephemeral DH key ... as HMAC key + * to the challenge C (i.e., Ca = HMAC(H(g^xy mod p), C)). + * + * As the session key skey is already H(g^xy mod p) per section 8.3.5.5.9, use + * it directly as the HMAC key without additional hashing. + * + * Return: 0 on success, negative errno on failure. + */ int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len, const u8 *challenge, u8 *aug, size_t hlen) { - u8 hashed_key[NVME_AUTH_MAX_DIGEST_SIZE]; - int ret; - - ret = nvme_auth_hash(hmac_id, skey, skey_len, hashed_key); - if (ret) - return ret; - ret = nvme_auth_hmac(hmac_id, hashed_key, hlen, challenge, hlen, aug); - memzero_explicit(hashed_key, sizeof(hashed_key)); - return ret; + return nvme_auth_hmac(hmac_id, skey, skey_len, challenge, hlen, aug); } EXPORT_SYMBOL_GPL(nvme_auth_augmented_challenge); @@ -403,33 +414,76 @@ int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, } EXPORT_SYMBOL_GPL(nvme_auth_gen_pubkey); -int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, - const u8 *ctrl_key, size_t ctrl_key_len, - u8 *sess_key, size_t sess_key_len) +/** + * nvme_auth_gen_session_key() - Generate an ephemeral session key + * @dh_tfm: Diffie-Hellman transform with local private key already set + * @public_key: Peer's public key + * @public_key_len: Length of @public_key + * @sess_key: Output buffer for the session key + * @sess_key_len: Size of @sess_key buffer + * @hash_id: Hash algorithm identifier + * + * NVMe base specification 8.3.5.5.9: The session key Ks shall be computed from + * the ephemeral DH key (i.e., g^xy mod p) ... by applying the hash function + * H() selected by the HashID parameter ... (i.e., Ks = H(g^xy mod p)). + * + * Return: 0 on success, negative errno on failure. + */ +int nvme_auth_gen_session_key(struct crypto_kpp *dh_tfm, + const u8 *public_key, size_t public_key_len, + u8 *sess_key, size_t sess_key_len, u8 hash_id) { struct kpp_request *req; struct crypto_wait wait; struct scatterlist src, dst; + u8 *dh_secret; + size_t dh_secret_len, hash_len; int ret; - req = kpp_request_alloc(dh_tfm, GFP_KERNEL); - if (!req) + hash_len = nvme_auth_hmac_hash_len(hash_id); + if (!hash_len) { + pr_warn("%s: invalid hash algorithm %d\n", __func__, hash_id); + return -EINVAL; + } + + if (sess_key_len != hash_len) { + pr_warn("%s: sess_key buffer missized (%zu != %zu)\n", + __func__, sess_key_len, hash_len); + return -EINVAL; + } + + dh_secret_len = crypto_kpp_maxsize(dh_tfm); + dh_secret = kzalloc(dh_secret_len, GFP_KERNEL); + if (!dh_secret) return -ENOMEM; + req = kpp_request_alloc(dh_tfm, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto out_free_secret; + } + crypto_init_wait(&wait); - sg_init_one(&src, ctrl_key, ctrl_key_len); - kpp_request_set_input(req, &src, ctrl_key_len); - sg_init_one(&dst, sess_key, sess_key_len); - kpp_request_set_output(req, &dst, sess_key_len); + sg_init_one(&src, public_key, public_key_len); + kpp_request_set_input(req, &src, public_key_len); + sg_init_one(&dst, dh_secret, dh_secret_len); + kpp_request_set_output(req, &dst, dh_secret_len); kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); ret = crypto_wait_req(crypto_kpp_compute_shared_secret(req), &wait); - kpp_request_free(req); + + if (ret) + goto out_free_secret; + + ret = nvme_auth_hash(hash_id, dh_secret, dh_secret_len, sess_key); + +out_free_secret: + kfree_sensitive(dh_secret); return ret; } -EXPORT_SYMBOL_GPL(nvme_auth_gen_shared_secret); +EXPORT_SYMBOL_GPL(nvme_auth_gen_session_key); int nvme_auth_parse_key(const char *secret, struct nvme_dhchap_key **ret_key) { diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 63f543e80998..16de4499a8e7 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -588,7 +588,7 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl, } gen_sesskey: - chap->sess_key_len = chap->host_key_len; + chap->sess_key_len = chap->hash_len; chap->sess_key = kmalloc(chap->sess_key_len, GFP_KERNEL); if (!chap->sess_key) { chap->sess_key_len = 0; @@ -596,16 +596,17 @@ gen_sesskey: return -ENOMEM; } - ret = nvme_auth_gen_shared_secret(chap->dh_tfm, - chap->ctrl_key, chap->ctrl_key_len, - chap->sess_key, chap->sess_key_len); + ret = nvme_auth_gen_session_key(chap->dh_tfm, + chap->ctrl_key, chap->ctrl_key_len, + chap->sess_key, chap->sess_key_len, + chap->hash_id); if (ret) { dev_dbg(ctrl->device, - "failed to generate shared secret, error %d\n", ret); + "failed to generate session key, error %d\n", ret); chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; return ret; } - dev_dbg(ctrl->device, "shared secret %*ph\n", + dev_dbg(ctrl->device, "session key %*ph\n", (int)chap->sess_key_len, chap->sess_key); return 0; } diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index c35c427ca2ac..9a2eccdc8b13 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -447,18 +447,19 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, struct nvmet_ctrl *ctrl = req->sq->ctrl; int ret; - req->sq->dhchap_skey_len = ctrl->dh_keysize; + req->sq->dhchap_skey_len = nvme_auth_hmac_hash_len(ctrl->shash_id); req->sq->dhchap_skey = kzalloc(req->sq->dhchap_skey_len, GFP_KERNEL); if (!req->sq->dhchap_skey) return -ENOMEM; - ret = nvme_auth_gen_shared_secret(ctrl->dh_tfm, - pkey, pkey_size, - req->sq->dhchap_skey, - req->sq->dhchap_skey_len); + ret = nvme_auth_gen_session_key(ctrl->dh_tfm, + pkey, pkey_size, + req->sq->dhchap_skey, + req->sq->dhchap_skey_len, + ctrl->shash_id); if (ret) - pr_debug("failed to compute shared secret, err %d\n", ret); + pr_debug("failed to compute session key, err %d\n", ret); else - pr_debug("%s: shared secret %*ph\n", __func__, + pr_debug("%s: session key %*ph\n", __func__, (int)req->sq->dhchap_skey_len, req->sq->dhchap_skey); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 184a1f9510fa..89902ae8b929 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -49,9 +49,9 @@ int nvme_auth_augmented_challenge(u8 hmac_id, const u8 *skey, size_t skey_len, int nvme_auth_gen_privkey(struct crypto_kpp *dh_tfm, u8 dh_gid); int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, u8 *host_key, size_t host_key_len); -int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, - const u8 *ctrl_key, size_t ctrl_key_len, - u8 *sess_key, size_t sess_key_len); +int nvme_auth_gen_session_key(struct crypto_kpp *dh_tfm, + const u8 *public_key, size_t public_key_len, + u8 *sess_key, size_t sess_key_len, u8 hash_id); int nvme_auth_generate_psk(u8 hmac_id, const u8 *skey, size_t skey_len, const u8 *c1, const u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len); -- cgit v1.2.3 From 5db6ef9847717329f12c5ea8aba7e9f588a980c0 Mon Sep 17 00:00:00 2001 From: Yucheng Lu Date: Wed, 22 Apr 2026 21:45:04 +0800 Subject: crypto: authencesn - reject short ahash digests during instance creation authencesn requires either a zero authsize or an authsize of at least 4 bytes because the ESN encrypt/decrypt paths always move 4 bytes of high-order sequence number data at the end of the authenticated data. While crypto_authenc_esn_setauthsize() already rejects explicit non-zero authsizes in the range 1..3, crypto_authenc_esn_create() still copied auth->digestsize into inst->alg.maxauthsize without validating it. The AEAD core then initialized the tfm's default authsize from that value. As a result, selecting an ahash with digest size 1..3, such as cbcmac(cipher_null), exposed authencesn instances whose default authsize was invalid even though setauthsize() would have rejected the same value. AF_ALG could then trigger the ESN tail handling with a too-short tag and hit an out-of-bounds access. Reject authencesn instances whose ahash digest size is in the invalid non-zero range 1..3 so that no tfm can inherit an unsupported default authsize. Fixes: f15f05b0a5de ("crypto: ccm - switch to separate cbcmac driver") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Yuhang Zheng Reviewed-by: Eric Biggers Signed-off-by: Yucheng Lu Signed-off-by: Ren Wei Signed-off-by: Herbert Xu --- crypto/authencesn.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index af3d584e584f..522df41365d8 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -390,6 +390,11 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, auth = crypto_spawn_ahash_alg(&ctx->auth); auth_base = &auth->base; + if (auth->digestsize > 0 && auth->digestsize < 4) { + err = -EINVAL; + goto err_free_inst; + } + err = crypto_grab_skcipher(&ctx->enc, aead_crypto_instance(inst), crypto_attr_alg_name(tb[2]), 0, mask); if (err) -- cgit v1.2.3 From 0b13173d27fa15679463b62a10cfa8b3d6c3a71c Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 15 Apr 2026 13:41:01 +0800 Subject: dma-buf: fix stale @lock references in struct dma_buf documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel-doc comments for vmapping_counter and vmap_ptr in struct dma_buf reference "@lock" as the protecting lock, but struct dma_buf no longer has a "lock" member. The mutex was removed in favor of using the dma_resv lock exclusively. The implementation correctly uses dma_resv_assert_held(dmabuf->resv) in dma_buf_vmap() and dma_buf_vunmap(), so update the documentation to reference @resv instead. Signed-off-by: gaoxiang17 Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20260415054101.535520-1-gxxa03070307@gmail.com --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 133b9e637b55..ef6d93fd7a2c 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -322,13 +322,13 @@ struct dma_buf { * @vmapping_counter: * * Used internally to refcnt the vmaps returned by dma_buf_vmap(). - * Protected by @lock. + * Protected by @resv. */ unsigned vmapping_counter; /** * @vmap_ptr: - * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. + * The current vmap ptr if @vmapping_counter > 0. Protected by @resv. */ struct iosys_map vmap_ptr; -- cgit v1.2.3 From 27fdbab4221b375de54bf91919798d88520c6e28 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 27 Mar 2026 14:13:38 +0100 Subject: Buffer overflow in drivers/xen/sys-hypervisor.c The build id returned by HYPERVISOR_xen_version(XENVER_build_id) is neither NUL terminated nor a string. The first causes a buffer overflow as sprintf in buildid_show will read and copy till it finds a NUL. 00000000 f4 91 51 f4 dd 38 9e 9d 65 47 52 eb 10 71 db 50 |..Q..8..eGR..q.P| 00000010 b9 a8 01 42 6f 2e 32 |...Bo.2| 00000017 So use a memcpy instead of sprintf to have the correct value: 00000000 f4 91 51 f4 dd 00 9e 9d 65 47 52 eb 10 71 db 50 |..Q.....eGR..q.P| 00000010 b9 a8 01 42 |...B| 00000014 (the above have a hack to embed a zero inside and check it's returned correctly). This is XSA-485 / CVE-2026-31786 Fixes: 84b7625728ea ("xen: add sysfs node for hypervisor build id") Signed-off-by: Frediano Ziglio Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/sys-hypervisor.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index b1bb01ba82f8..91923242a5ae 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -366,6 +366,8 @@ static ssize_t buildid_show(struct hyp_sysfs_attr *attr, char *buffer) ret = sprintf(buffer, ""); return ret; } + if (ret > PAGE_SIZE) + return -ENOSPC; buildid = kmalloc(sizeof(*buildid) + ret, GFP_KERNEL); if (!buildid) @@ -373,8 +375,10 @@ static ssize_t buildid_show(struct hyp_sysfs_attr *attr, char *buffer) buildid->len = ret; ret = HYPERVISOR_xen_version(XENVER_build_id, buildid); - if (ret > 0) - ret = sprintf(buffer, "%s", buildid->buf); + if (ret > 0) { + /* Build id is binary, not a string. */ + memcpy(buffer, buildid->buf, ret); + } kfree(buildid); return ret; -- cgit v1.2.3 From 24daca4fc07f3ff8cd0e3f629cd982187f48436a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 10 Apr 2026 09:20:04 +0200 Subject: xen/privcmd: fix double free via VMA splitting privcmd_vm_ops defines .close (privcmd_close), but neither .may_split nor .open. When userspace does a partial munmap() on a privcmd mapping, the kernel splits the VMA via __split_vma(). Since may_split is NULL, the split is allowed. vm_area_dup() copies vm_private_data (a pages array allocated in alloc_empty_pages()) into the new VMA without any fixup, because there is no .open callback. Both VMAs now point to the same pages array. When the unmapped portion is closed, privcmd_close() calls: - xen_unmap_domain_gfn_range() - xen_free_unpopulated_pages() - kvfree(pages) The surviving VMA still holds the dangling pointer. When it is later destroyed, the same sequence runs again, which leads to a double free. Fix this issue by adding a .may_split callback denying the VMA split. This is XSA-487 / CVE-2026-31787 Fixes: d71f513985c2 ("xen: privcmd: support autotranslated physmap guests.") Reported-by: Atharva Vartak Suggested-by: Atharva Vartak Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/xen/privcmd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 15ba592236e8..725a49a0eee7 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1620,6 +1620,12 @@ static void privcmd_close(struct vm_area_struct *vma) kvfree(pages); } +static int privcmd_may_split(struct vm_area_struct *area, unsigned long addr) +{ + /* Forbid splitting, avoids double free via privcmd_close(). */ + return -EINVAL; +} + static vm_fault_t privcmd_fault(struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -1631,6 +1637,7 @@ static vm_fault_t privcmd_fault(struct vm_fault *vmf) static const struct vm_operations_struct privcmd_vm_ops = { .close = privcmd_close, + .may_split = privcmd_may_split, .fault = privcmd_fault }; -- cgit v1.2.3 From 4e3d7c89e15ac5dbf45b7d7a49bb374650c03339 Mon Sep 17 00:00:00 2001 From: zhidao su Date: Thu, 23 Apr 2026 10:58:32 +0800 Subject: sched_ext: Fix local_dsq_post_enq() to use task's scheduler in sub-sched local_dsq_post_enq() calls call_task_dequeue() with scx_root instead of the scheduler instance actually managing the task. When CONFIG_EXT_SUB_SCHED is enabled, tasks may be managed by a sub-scheduler whose ops.dequeue() callback differs from root's. Using scx_root causes the wrong scheduler's ops.dequeue() to be consulted: sub-sched tasks dispatched to a local DSQ via scx_bpf_dsq_move_to_local() will have SCX_TASK_IN_CUSTODY cleared but the sub-scheduler's ops.dequeue() is never invoked, violating the custody exit semantics. Fix by adding a 'struct scx_sched *sch' parameter to local_dsq_post_enq() and move_local_task_to_local_dsq(), and propagating the correct scheduler from their callers dispatch_enqueue(), move_task_between_dsqs(), and consume_dispatch_q(). This is consistent with dispatch_enqueue()'s non-local path which already passes 'sch' directly to call_task_dequeue() for global/bypass DSQs. Fixes: ebf1ccff79c4 ("sched_ext: Fix ops.dequeue() semantics") Signed-off-by: zhidao su Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index d66fea57ee69..1f670028bf19 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1389,13 +1389,13 @@ static void call_task_dequeue(struct scx_sched *sch, struct rq *rq, p->scx.flags &= ~SCX_TASK_IN_CUSTODY; } -static void local_dsq_post_enq(struct scx_dispatch_q *dsq, struct task_struct *p, - u64 enq_flags) +static void local_dsq_post_enq(struct scx_sched *sch, struct scx_dispatch_q *dsq, + struct task_struct *p, u64 enq_flags) { struct rq *rq = container_of(dsq, struct rq, scx.local_dsq); bool preempt = false; - call_task_dequeue(scx_root, rq, p, 0); + call_task_dequeue(sch, rq, p, 0); /* * If @rq is in balance, the CPU is already vacant and looking for the @@ -1519,7 +1519,7 @@ static void dispatch_enqueue(struct scx_sched *sch, struct rq *rq, * concurrently in a non-atomic way. */ if (is_local) { - local_dsq_post_enq(dsq, p, enq_flags); + local_dsq_post_enq(sch, dsq, p, enq_flags); } else { /* * Task on global/bypass DSQ: leave custody, task on @@ -2130,7 +2130,8 @@ static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p, int wake_fl schedule_reenq_local(rq, 0); } -static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags, +static void move_local_task_to_local_dsq(struct scx_sched *sch, + struct task_struct *p, u64 enq_flags, struct scx_dispatch_q *src_dsq, struct rq *dst_rq) { @@ -2150,7 +2151,7 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags, dsq_inc_nr(dst_dsq, p, enq_flags); p->scx.dsq = dst_dsq; - local_dsq_post_enq(dst_dsq, p, enq_flags); + local_dsq_post_enq(sch, dst_dsq, p, enq_flags); } /** @@ -2371,7 +2372,7 @@ static struct rq *move_task_between_dsqs(struct scx_sched *sch, /* @p is going from a non-local DSQ to a local DSQ */ if (src_rq == dst_rq) { task_unlink_from_dsq(p, src_dsq); - move_local_task_to_local_dsq(p, enq_flags, + move_local_task_to_local_dsq(sch, p, enq_flags, src_dsq, dst_rq); raw_spin_unlock(&src_dsq->lock); } else { @@ -2424,7 +2425,7 @@ retry: if (rq == task_rq) { task_unlink_from_dsq(p, dsq); - move_local_task_to_local_dsq(p, enq_flags, dsq, rq); + move_local_task_to_local_dsq(sch, p, enq_flags, dsq, rq); raw_spin_unlock(&dsq->lock); return true; } -- cgit v1.2.3 From 96fe420bebc159599fb8da1080e9ff207bdb650a Mon Sep 17 00:00:00 2001 From: Jingle Wu 吳金國 Date: Tue, 21 Apr 2026 07:00:10 +0000 Subject: Input: elan_i2c - add ic type 0x19 The 0x19 is valid 3000 serial ic type too. Signed-off-by: Jingle Wu Link: https://patch.msgid.link/KL1PR01MB511699853D1B66D137C06806DC2C2@KL1PR01MB5116.apcprd01.prod.exchangelabs.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index fee1796da3d0..7475803c6ce4 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -162,6 +162,9 @@ static int elan_get_fwinfo(u16 ic_type, u8 iap_version, u16 *validpage_count, case 0x15: *validpage_count = 1024; break; + case 0x19: + *validpage_count = 2032; + break; default: /* unknown ic type clear value */ *validpage_count = 0; -- cgit v1.2.3 From 8f9d6cd6d3916add4c47a9dd1622e4fc057f877b Mon Sep 17 00:00:00 2001 From: Jingle Wu 吳金國 Date: Tue, 21 Apr 2026 07:02:33 +0000 Subject: Input: elan_i2c - increase device reset wait timeout after update FW Extend wait_for_completion_timeout from 300ms to 700ms to ensure sufficient time for device reset after firmware update. Signed-off-by: Jingle Wu Link: https://patch.msgid.link/KL1PR01MB5116031986614B3214EF2F30DC2C2@KL1PR01MB5116.apcprd01.prod.exchangelabs.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index a9057d124a88..88d4070d4b44 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -690,7 +690,7 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client, if (error) { dev_err(dev, "device reset failed: %d\n", error); } else if (!wait_for_completion_timeout(completion, - msecs_to_jiffies(300))) { + msecs_to_jiffies(700))) { dev_err(dev, "timeout waiting for device reset\n"); error = -ETIMEDOUT; } -- cgit v1.2.3 From 13e786b64bd3fd81c7eb22aa32bf8305c32f2ccf Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Thu, 23 Apr 2026 04:48:26 -0500 Subject: cgroup: Increment nr_dying_subsys_* from rmdir context Incrementing nr_dying_subsys_* in offline_css(), which is executed by cgroup_offline_wq worker, leads to a race where user can see the value to be 0 if he reads cgroup.stat after calling rmdir and before the worker executes. This makes the user wrongly expect resources released by the removed cgroup to be available for a new assignment. Increment nr_dying_subsys_* from kill_css(), which is called from the cgroup_rmdir() context. Fixes: ab0312526867 ("cgroup: Show # of subsystem CSSes in cgroup.stat") Signed-off-by: Petr Malat Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3243c2087ee3..c928dea9dea6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5724,16 +5724,6 @@ static void offline_css(struct cgroup_subsys_state *css) RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL); wake_up_all(&css->cgroup->offline_waitq); - - css->cgroup->nr_dying_subsys[ss->id]++; - /* - * Parent css and cgroup cannot be freed until after the freeing - * of child css, see css_free_rwork_fn(). - */ - while ((css = css->parent)) { - css->nr_descendants--; - css->cgroup->nr_dying_subsys[ss->id]++; - } } /** @@ -6045,6 +6035,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) */ static void kill_css(struct cgroup_subsys_state *css) { + struct cgroup_subsys *ss = css->ss; + lockdep_assert_held(&cgroup_mutex); if (css->flags & CSS_DYING) @@ -6081,6 +6073,16 @@ static void kill_css(struct cgroup_subsys_state *css) * css is confirmed to be seen as killed on all CPUs. */ percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); + + css->cgroup->nr_dying_subsys[ss->id]++; + /* + * Parent css and cgroup cannot be freed until after the freeing + * of child css, see css_free_rwork_fn(). + */ + while ((css = css->parent)) { + css->nr_descendants--; + css->cgroup->nr_dying_subsys[ss->id]++; + } } /** -- cgit v1.2.3 From 4a1b534177395627579c1fb9e7f9100ee88955dd Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Tue, 10 Feb 2026 11:07:31 +0800 Subject: wifi: ath12k: prepare REO update element only for primary link Commit [1] introduces dp->reo_cmd_update_rx_queue_list for the purpose of tracking all pending REO queue flush commands. The helper ath12k_dp_prepare_reo_update_elem() allocates an element and populates it with REO queue information, then add it to the list. The element would be helpful during clean up stage to finally unmap/free the corresponding REO queue buffer. In MLO scenarios with more than one links, for non dp_primary_link_only chips like WCN7850, that helper is called for each link peer. This results in multiple elements added to the list but all of them pointing to the same REO queue buffer. Consequently the same buffer gets unmap/freed multiple times: BUG kmalloc-2k (Tainted: G B W O ): Object already free ----------------------------------------------------------------------------- Allocated in ath12k_wifi7_dp_rx_assign_reoq+0xce/0x280 [ath12k_wifi7] age=7436 cpu=10 pid=16130 __kmalloc_noprof ath12k_wifi7_dp_rx_assign_reoq ath12k_dp_rx_peer_tid_setup ath12k_dp_peer_setup ath12k_mac_station_add ath12k_mac_op_sta_state [...] Freed in ath12k_dp_rx_tid_cleanup.part.0+0x25/0x40 [ath12k] age=1 cpu=27 pid=16137 kfree ath12k_dp_rx_tid_cleanup.part.0 ath12k_dp_rx_reo_cmd_list_cleanup ath12k_dp_cmn_device_deinit ath12k_core_stop ath12k_core_hw_group_cleanup ath12k_pci_remove Fix this by allowing list addition for primary link only. Note dp_primary_link_only chips like QCN9274 are not affected by this change, because that's what they were doing in the first place. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 3bf2e57e7d6c ("wifi: ath12k: Add Retry Mechanism for REO RX Queue Update Failures") # [1] Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221011 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260210-ath12k-rxtid-double-free-v1-1-8b523fb2886d@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 250459facff3..25557dea5826 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -565,6 +565,9 @@ static int ath12k_dp_prepare_reo_update_elem(struct ath12k_dp *dp, lockdep_assert_held(&dp->dp_lock); + if (!peer->primary_link) + return 0; + elem = kzalloc_obj(*elem, GFP_ATOMIC); if (!elem) return -ENOMEM; -- cgit v1.2.3 From f3ba9e05cc7b65f41f58bb4808f6c3a8f7894bb1 Mon Sep 17 00:00:00 2001 From: Aaradhana Sahu Date: Fri, 10 Apr 2026 12:43:00 +0530 Subject: wifi: ath12k: fix OF node refcount imbalance in WSI graph traversal ath12k_core_get_wsi_info() traverses the WSI (Wired Serial Interface) device graph starting from dev->of_node. The current code uses dev->of_node directly as the local traversal pointer and calls of_node_put() on error. Since the driver does not own a reference to dev->of_node, dropping it during traversal results in the following OF refcount underflow: OF: ERROR: of_node_release() detected bad of_node_put() on /soc@0/wifi@c000000 CPU: 1 UID: 0 PID: 210 Comm: insmod Not tainted 6.19.0-rc4-next-20260109-00023-g797dd36dc178 #26 PREEMPT Hardware name: Qualcomm Technologies, Inc. IPQ5332 MI01.2 (DT) Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x60/0x80 dump_stack+0x18/0x24 of_node_release+0x164/0x1a0 kobject_put+0xb4/0x278 of_node_put+0x18/0x28 ath12k_core_init+0x29c/0x5d4 [ath12k] ath12k_ahb_probe+0x950/0xc14 [ath12k] platform_probe+0x5c/0xa4 really_probe+0xc0/0x3ec __driver_probe_device+0x80/0x170 driver_probe_device+0x3c/0x120 __driver_attach+0xc4/0x218 OF: ERROR: next of_node_put() on this node will result in a kobject warning 'refcount_t: underflow; use-after-free.' Fix this by explicitly acquiring a reference to the starting node using of_node_get() and attaching automatic cleanup via __free(device_node). Each discovered WSI node is stored in ag->wsi_node[] with its own of_node_get() reference. These references are later released in ath12k_core_free_wsi_info() during driver teardown. Also remove unnecessary memset() of wsi_node array since cleanup now explicitly sets pointers to NULL. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Fixes: 908c10c860e0 ("wifi: ath12k: parse multiple device information from Device Tree") Signed-off-by: Aaradhana Sahu Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260410071300.2323603-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 77 +++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 2519e2400d58..980a12fb2c6e 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1838,10 +1838,22 @@ static struct ath12k_hw_group *ath12k_core_hw_group_alloc(struct ath12k_base *ab return ag; } +static void ath12k_core_free_wsi_info(struct ath12k_hw_group *ag) +{ + int i; + + for (i = 0; i < ag->num_devices; i++) { + of_node_put(ag->wsi_node[i]); + ag->wsi_node[i] = NULL; + } + ag->num_devices = 0; +} + static void ath12k_core_hw_group_free(struct ath12k_hw_group *ag) { mutex_lock(&ath12k_hw_group_mutex); + ath12k_core_free_wsi_info(ag); list_del(&ag->list); kfree(ag); @@ -1867,52 +1879,59 @@ static struct ath12k_hw_group *ath12k_core_hw_group_find_by_dt(struct ath12k_bas static int ath12k_core_get_wsi_info(struct ath12k_hw_group *ag, struct ath12k_base *ab) { - struct device_node *wsi_dev = ab->dev->of_node, *next_wsi_dev; - struct device_node *tx_endpoint, *next_rx_endpoint; - int device_count = 0; - - next_wsi_dev = wsi_dev; + struct device_node *next_wsi_dev; + int device_count = 0, ret = 0; + struct device_node *wsi_dev; - if (!next_wsi_dev) + wsi_dev = of_node_get(ab->dev->of_node); + if (!wsi_dev) return -ENODEV; do { - ag->wsi_node[device_count] = next_wsi_dev; + if (device_count >= ATH12K_MAX_DEVICES) { + ath12k_warn(ab, "device count in DT %d is more than limit %d\n", + device_count, ATH12K_MAX_DEVICES); + ret = -EINVAL; + break; + } + + ag->wsi_node[device_count++] = of_node_get(wsi_dev); - tx_endpoint = of_graph_get_endpoint_by_regs(next_wsi_dev, 0, -1); + struct device_node *tx_endpoint __free(device_node) = + of_graph_get_endpoint_by_regs(wsi_dev, 0, -1); if (!tx_endpoint) { - of_node_put(next_wsi_dev); - return -ENODEV; + ret = -ENODEV; + break; } - next_rx_endpoint = of_graph_get_remote_endpoint(tx_endpoint); + struct device_node *next_rx_endpoint __free(device_node) = + of_graph_get_remote_endpoint(tx_endpoint); if (!next_rx_endpoint) { - of_node_put(next_wsi_dev); - of_node_put(tx_endpoint); - return -ENODEV; + ret = -ENODEV; + break; } - of_node_put(tx_endpoint); - of_node_put(next_wsi_dev); - next_wsi_dev = of_graph_get_port_parent(next_rx_endpoint); if (!next_wsi_dev) { - of_node_put(next_rx_endpoint); - return -ENODEV; + ret = -ENODEV; + break; } - of_node_put(next_rx_endpoint); + of_node_put(wsi_dev); + wsi_dev = next_wsi_dev; + } while (ab->dev->of_node != wsi_dev); - device_count++; - if (device_count > ATH12K_MAX_DEVICES) { - ath12k_warn(ab, "device count in DT %d is more than limit %d\n", - device_count, ATH12K_MAX_DEVICES); - of_node_put(next_wsi_dev); - return -EINVAL; + if (ret) { + while (--device_count >= 0) { + of_node_put(ag->wsi_node[device_count]); + ag->wsi_node[device_count] = NULL; } - } while (wsi_dev != next_wsi_dev); - of_node_put(next_wsi_dev); + of_node_put(wsi_dev); + return ret; + } + + of_node_put(wsi_dev); ag->num_devices = device_count; return 0; @@ -1983,9 +2002,9 @@ static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *a ath12k_core_get_wsi_index(ag, ab)) { ath12k_dbg(ab, ATH12K_DBG_BOOT, "unable to get wsi info from dt, grouping single device"); + ath12k_core_free_wsi_info(ag); ag->id = ATH12K_INVALID_GROUP_ID; ag->num_devices = 1; - memset(ag->wsi_node, 0, sizeof(ag->wsi_node)); wsi->index = 0; } -- cgit v1.2.3 From c4b6ad0e14f5df942eed5ebadaff84b468bd2496 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 18 Apr 2026 22:37:00 +0300 Subject: wifi: ath10k: snoc: select POWER_SEQUENCING The commit afcf3ec615c9 ("wifi: ath10k: snoc: support powering on the device via pwrseq") made ath10k SNOC driver use devm_pwrseq_get(). Select the corresponding Kconfig symbol to make sure that API call is always available and doesn't return an error per se. Fixes: afcf3ec615c9 ("wifi: ath10k: snoc: support powering on the device via pwrseq") Reported-by: Luca Weiss Closes: https://lore.kernel.org/r/DHUHU7UIT487.139L3KIVRVREU@fairphone.com Signed-off-by: Dmitry Baryshkov Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260418-ath10k-snoc-pwrseq-v1-1-832594ba3294@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig index 876aed765833..efb9f022d8c6 100644 --- a/drivers/net/wireless/ath/ath10k/Kconfig +++ b/drivers/net/wireless/ath/ath10k/Kconfig @@ -46,6 +46,7 @@ config ATH10K_SNOC depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_SMEM depends on QCOM_RPROC_COMMON || QCOM_RPROC_COMMON=n + select POWER_SEQUENCING select QCOM_SCM select QCOM_QMI_HELPERS help -- cgit v1.2.3 From 4498664e2d5888efabb96428196a926acdaa25ed Mon Sep 17 00:00:00 2001 From: Yu-Hsiang Tseng Date: Thu, 23 Apr 2026 02:08:14 +0800 Subject: wifi: ath12k: use lockdep_assert_in_rcu_read_lock() for RCU assertions Two functions in ath12k assert that the caller holds an RCU read lock: ath12k_mac_get_arvif() and ath12k_p2p_noa_update_vdev_iter(). Both use: WARN_ON(!rcu_read_lock_any_held()); On kernels using preemptible RCU (CONFIG_PREEMPT=y or CONFIG_PREEMPT_RT=y) without CONFIG_DEBUG_LOCK_ALLOC, this produces a false positive splat whenever these functions are invoked from paths that do hold the RCU read lock (e.g. firmware stats processing or mac80211 interface iteration). Root cause: - Without CONFIG_DEBUG_LOCK_ALLOC, rcu_read_lock_any_held() is a static inline that returns !preemptible() as a proxy for "in an RCU read section". - With preemptible RCU, rcu_read_lock() does not disable preemption. A task can therefore be preemptible while legitimately holding an RCU read lock, making the proxy unreliable. - Callers such as ath12k_wmi_tlv_rssi_chain_parse() (via guard(rcu)()) and ieee80211_iterate_active_interfaces_atomic() do hold the RCU read lock, so these warnings are incorrect. Typical splat seen on a WCN7850 station with periodic fw stats processing: WARNING: drivers/net/wireless/ath/ath12k/mac.c:791 at ath12k_mac_get_arvif+0x9e/0xd0 [ath12k] Tainted: G W O 6.19.13-rt #1 PREEMPT_RT Call Trace: ath12k_wmi_tlv_rssi_chain_parse+0x69/0x170 [ath12k] ath12k_wmi_tlv_iter+0x7f/0x120 [ath12k] ath12k_wmi_tlv_fw_stats_parse+0x342/0x6b0 [ath12k] ath12k_wmi_op_rx+0xe9e/0x3150 [ath12k] ath12k_htc_rx_completion_handler+0x3df/0x5b0 [ath12k] ath12k_ce_per_engine_service+0x325/0x3e0 [ath12k] ath12k_pci_ce_workqueue+0x20/0x40 [ath12k] Replace WARN_ON(!rcu_read_lock_any_held()) with lockdep_assert_in_rcu_read_lock(), which is gated on CONFIG_PROVE_RCU and therefore compiles out entirely when PROVE_RCU is disabled. PROVE_RCU kernels continue to get the full lockdep-based check, and the new helper precisely checks for rcu_read_lock() rather than any RCU variant, which better matches the callers' expectations. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 3dd2c68f206e ("wifi: ath12k: prepare vif data structure for MLO handling") Suggested-by: Baochen Qiang Suggested-by: Sebastian Andrzej Siewior Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Signed-off-by: Yu-Hsiang Tseng Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20260422180814.1938317-1-asas1asas200@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 2 +- drivers/net/wireless/ath/ath12k/p2p.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index fbdfe6424fd7..df2334f3bad6 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -788,7 +788,7 @@ struct ath12k_link_vif *ath12k_mac_get_arvif(struct ath12k *ar, u32 vdev_id) /* To use the arvif returned, caller must have held rcu read lock. */ - WARN_ON(!rcu_read_lock_any_held()); + lockdep_assert_in_rcu_read_lock(); arvif_iter.vdev_id = vdev_id; arvif_iter.ar = ar; diff --git a/drivers/net/wireless/ath/ath12k/p2p.c b/drivers/net/wireless/ath/ath12k/p2p.c index 59589748f1a8..19ebcd1d8eb2 100644 --- a/drivers/net/wireless/ath/ath12k/p2p.c +++ b/drivers/net/wireless/ath/ath12k/p2p.c @@ -123,7 +123,7 @@ static void ath12k_p2p_noa_update_vdev_iter(void *data, u8 *mac, struct ath12k_p2p_noa_arg *arg = data; struct ath12k_link_vif *arvif; - WARN_ON(!rcu_read_lock_any_held()); + lockdep_assert_in_rcu_read_lock(); arvif = &ahvif->deflink; if (!arvif->is_created || arvif->ar != arg->ar || arvif->vdev_id != arg->vdev_id) return; -- cgit v1.2.3 From 375e4e33c18dfa05c5dfd5f3dfffeb29343dd4c7 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Tue, 21 Apr 2026 23:54:12 -0700 Subject: bpf: Fix NULL pointer dereference in bpf_sk_storage_clone and diag paths bpf_selem_unlink_nofail() sets SDATA(selem)->smap to NULL before removing the selem from the storage hlist. A concurrent RCU reader in bpf_sk_storage_clone() can observe the selem still on the list with smap already NULL, causing a NULL pointer dereference. general protection fault, probably for non-canonical address 0xdffffc000000000a: KASAN: null-ptr-deref in range [0x0000000000000050-0x0000000000000057] RIP: 0010:bpf_sk_storage_clone+0x1cd/0xaa0 net/core/bpf_sk_storage.c:174 Call Trace: sk_clone+0xfed/0x1980 net/core/sock.c:2591 inet_csk_clone_lock+0x30/0x760 net/ipv4/inet_connection_sock.c:1222 tcp_create_openreq_child+0x35/0x2680 net/ipv4/tcp_minisocks.c:571 tcp_v4_syn_recv_sock+0x123/0xf90 net/ipv4/tcp_ipv4.c:1729 tcp_check_req+0x8e1/0x2580 include/net/tcp.h:855 tcp_v4_rcv+0x1845/0x3b80 net/ipv4/tcp_ipv4.c:2347 Add a NULL check for smap in bpf_sk_storage_clone(). bpf_sk_storage_diag_put_all() has the same issue. Add a NULL check and pass the validated smap directly to diag_get(), which is refactored to take smap as a parameter instead of reading it internally. bpf_sk_storage_diag_put() uses diag->maps[i] which is always valid under its refcount, so diag->maps[i] is passed directly to diag_get(). Fixes: 5d800f87d0a5 ("bpf: Support lockless unlink when freeing map or local storage") Reported-by: Xiang Mei Acked-by: Amery Hung Signed-off-by: Weiming Shi Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260422065411.1007737-2-bestswngs@gmail.com --- net/core/bpf_sk_storage.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 14eb7812bda4..dc3e8fce8809 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) struct bpf_map *map; smap = rcu_dereference(SDATA(selem)->smap); - if (!(smap->map.map_flags & BPF_F_CLONE)) + if (!smap || !(smap->map.map_flags & BPF_F_CLONE)) continue; /* Note that for lockless listeners adding new element @@ -531,10 +531,10 @@ err_free: } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); -static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) +static int diag_get(struct bpf_local_storage_map *smap, + struct bpf_local_storage_data *sdata, struct sk_buff *skb) { struct nlattr *nla_stg, *nla_value; - struct bpf_local_storage_map *smap; /* It cannot exceed max nlattr's payload */ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); @@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) if (!nla_stg) return -EMSGSIZE; - smap = rcu_dereference(sdata->smap); if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) goto errout; @@ -596,9 +595,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, saved_len = skb->len; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { smap = rcu_dereference(SDATA(selem)->smap); + if (!smap) + continue; diag_size += nla_value_size(smap->map.value_size); - if (nla_stgs && diag_get(SDATA(selem), skb)) + if (nla_stgs && diag_get(smap, SDATA(selem), skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } @@ -665,7 +666,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, diag_size += nla_value_size(diag->maps[i]->value_size); - if (nla_stgs && diag_get(sdata, skb)) + if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } -- cgit v1.2.3 From 6451d58a355642b612f2bf948ad39108c998ac2a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 20 Apr 2026 19:48:41 +0000 Subject: sockmap: Fix sk_psock_drop() race vs sock_map_{unhash,close,destroy}(). syzbot reported a splat in sock_map_destroy() [0], where psock was NULL even though sk->sk_prot still pointed to tcp_bpf_prots[][]. The stack trace shows how badly the path was excercised, see inet_release() calls tcp_close(), not sock_map_close() yet, but finally reaching sock_map_destroy(). The root cause is a lack of synchronisation. Even if sk_psock_get() fails to bump psock->refcnt, it does not guarantee that sk_psock_drop() has finished, and thus sk->sk_prot might not have been restored to the original one. Commit 4b4647add7d3 ("sock_map: avoid race between sock_map_close and sk_psock_put") attempted to address this, but it was insufficient for two reasons. It did not cover sock_map_unhash() and sock_map_destroy(), and it missed the corner case where sk_psock() is NULL. On non-x86 platforms, sk_psock_restore_proto(sk, psock) and rcu_assign_sk_user_data(sk, NULL) can be reordered because there is no address dependency between sk->sk_prot and sk->sk_user_data. sk_psock_get() returning NULL implies nothing about sk->sk_prot. Let's simply retry sk_psock_get() in the unlikely case. Note that we cannot avoid loop even if we added memory barrier in sk_psock_drop() and sock_map_psock_get_checked(). Also note that sock_map_destroy() cannot be called from softirq while sock_map_close() has also been running. It is because sock_map_destroy() requires SOCK_DEAD, so sock_map_destroy() cannot happen until sock_map_close() has finished the saved_close() (which is tcp_close()). [0]: WARNING: CPU: 1 PID: 8459 at net/core/sock_map.c:1667 sock_map_destroy+0x28b/0x2b0 net/core/sock_map.c:1667 Modules linked in: CPU: 1 UID: 0 PID: 8459 Comm: syz.0.1109 Not tainted syzkaller #0 PREEMPT_{RT,(full)} Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:sock_map_destroy+0x28b/0x2b0 net/core/sock_map.c:1667 Code: 8b 36 49 83 c6 38 4c 89 f0 48 c1 e8 03 42 80 3c 38 00 74 08 4c 89 f7 e8 93 62 22 f9 4d 8b 3e e9 79 ff ff ff e8 a6 2b c3 f8 90 <0f> 0b 90 eb 9c e8 9b 2b c3 f8 4c 89 e7 be 03 00 00 00 e8 0e 4e bc RSP: 0018:ffffc9000d067be8 EFLAGS: 00010293 RAX: ffffffff88fb30aa RBX: ffff888024832000 RCX: ffff888024283b80 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: dffffc0000000000 R11: ffffed100862e946 R12: dffffc0000000000 R13: ffff888024832000 R14: ffffffff995b2208 R15: ffffffff88fb2e20 FS: 0000555579a7d500(0000) GS:ffff8881269c2000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00002000000048c0 CR3: 000000003713a000 CR4: 00000000003526f0 Call Trace: inet_csk_destroy_sock+0x166/0x3a0 net/ipv4/inet_connection_sock.c:1294 __tcp_close+0xcc1/0xfd0 net/ipv4/tcp.c:3262 tcp_close+0x28/0x110 net/ipv4/tcp.c:3274 inet_release+0x144/0x190 net/ipv4/af_inet.c:435 __sock_release net/socket.c:649 [inline] sock_close+0xc0/0x240 net/socket.c:1439 __fput+0x45b/0xa80 fs/file_table.c:468 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f265847ebe9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd158dfbd8 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 000000000002ddb0 RCX: 00007f265847ebe9 RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007f26586a7da0 R08: 0000000000000001 R09: 0000000e158dfecf R10: 0000001b30a20000 R11: 0000000000000246 R12: 00007f26586a5fac R13: 00007f26586a5fa0 R14: ffffffffffffffff R15: 00007ffd158dfcf0 Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks") Fixes: b05545e15e1f ("bpf: sockmap, fix transition through disconnect without close") Fixes: d8616ee2affc ("bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues") Reported-by: syzbot+b0842d38af58376d1fdc@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/69cec5ef.050a0220.2dbe29.0009.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260420194846.1089595-1-kuniyu@google.com --- net/core/sock_map.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 02a68be3002a..99e3789492a0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1630,18 +1630,23 @@ void sock_map_unhash(struct sock *sk) void (*saved_unhash)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_unhash = READ_ONCE(sk->sk_prot)->unhash; + if (unlikely(saved_unhash == sock_map_unhash)) + goto retry; } else { saved_unhash = psock->saved_unhash; sock_map_remove_links(sk, psock); rcu_read_unlock(); + + if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) + return; } - if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) - return; + if (saved_unhash) saved_unhash(sk); } @@ -1652,20 +1657,25 @@ void sock_map_destroy(struct sock *sk) void (*saved_destroy)(struct sock *sk); struct sk_psock *psock; +retry: rcu_read_lock(); psock = sk_psock_get(sk); if (unlikely(!psock)) { rcu_read_unlock(); saved_destroy = READ_ONCE(sk->sk_prot)->destroy; + if (unlikely(saved_destroy == sock_map_destroy)) + goto retry; } else { saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); sk_psock_stop(psock); sk_psock_put(sk, psock); + + if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) + return; } - if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) - return; + if (saved_destroy) saved_destroy(sk); } @@ -1676,32 +1686,33 @@ void sock_map_close(struct sock *sk, long timeout) void (*saved_close)(struct sock *sk, long timeout); struct sk_psock *psock; +retry: lock_sock(sk); rcu_read_lock(); - psock = sk_psock(sk); + psock = sk_psock_get(sk); if (likely(psock)) { saved_close = psock->saved_close; sock_map_remove_links(sk, psock); - psock = sk_psock_get(sk); - if (unlikely(!psock)) - goto no_psock; rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); + + /* Make sure we do not recurse. This is a bug. + * Leak the socket instead of crashing on a stack overflow. + */ + if (WARN_ON_ONCE(saved_close == sock_map_close)) + return; } else { saved_close = READ_ONCE(sk->sk_prot)->close; -no_psock: rcu_read_unlock(); release_sock(sk); + + if (unlikely(saved_close == sock_map_close)) + goto retry; } - /* Make sure we do not recurse. This is a bug. - * Leak the socket instead of crashing on a stack overflow. - */ - if (WARN_ON_ONCE(saved_close == sock_map_close)) - return; saved_close(sk, timeout); } EXPORT_SYMBOL_GPL(sock_map_close); -- cgit v1.2.3 From 1081de1accb2b224516cca7071122c59532d0b22 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 23 Apr 2026 11:38:32 -0700 Subject: bpf: Fix NULL pointer dereference in bpf_skb_fib_lookup() When tot_len is not provided by the user, bpf_skb_fib_lookup() resolves the FIB result's output device via dev_get_by_index_rcu() to check skb forwardability and fill in mtu_result. The returned pointer is dereferenced without a NULL check. If the device is concurrently unregistered, dev_get_by_index_rcu() returns NULL and is_skb_forwardable() crashes at dev->flags: KASAN: null-ptr-deref in range [0x00000000000000b0-0x00000000000000b7] Call Trace: is_skb_forwardable (include/linux/netdevice.h:4365) bpf_skb_fib_lookup (net/core/filter.c:6446) bpf_prog_test_run_skb (net/bpf/test_run.c) __sys_bpf (kernel/bpf/syscall.c) Add the missing NULL check, returning -ENODEV to be consistent with how bpf_ipv4_fib_lookup() and bpf_ipv6_fib_lookup() handle the same condition. Fixes: 4f74fede40df ("bpf: Add mtu checking to FIB forwarding helper") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Signed-off-by: Martin KaFai Lau Acked-by: Paul Chaignon Link: https://patch.msgid.link/20260423183831.1325480-2-bestswngs@gmail.com --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 2914f5330310..bc96c18df4e0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6473,6 +6473,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, * against MTU of FIB lookup resulting net_device */ dev = dev_get_by_index_rcu(net, params->ifindex); + if (unlikely(!dev)) + return -ENODEV; if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; -- cgit v1.2.3 From a0e6ae45af17e8b27958830595799c702ffbab8d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 7 Apr 2026 21:27:02 +0100 Subject: KVM: arm64: vgic: Fix IIDR revision field extracted from wrong value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uaccess write handlers for GICD_IIDR in both GICv2 and GICv3 extract the revision field from 'reg' (the current IIDR value read back from the emulated distributor) instead of 'val' (the value userspace is trying to write). This means userspace can never actually change the implementation revision — the extracted value is always the current one. Fix the FIELD_GET to use 'val' so that userspace can select a different revision for migration compatibility. Fixes: 49a1a2c70a7f ("KVM: arm64: vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision") Signed-off-by: David Woodhouse Link: https://patch.msgid.link/20260407210949.2076251-2-dwmw2@infradead.org Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/vgic/vgic-mmio-v2.c | 2 +- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index 406845b3117c..0643e333db35 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -91,7 +91,7 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, * migration from old kernels to new kernels with legacy * userspace. */ - reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 89edb84d1ac6..5913a20d8301 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -194,7 +194,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) return -EINVAL; - reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: -- cgit v1.2.3 From 480ea48cad873b49a1fabd07c0847c3cf1c32286 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Wed, 8 Apr 2026 11:41:18 +0000 Subject: KVM: arm64: Reject non compliant SMCCC function calls in pKVM Prevent the propagation of a function-id that has the top bits set since this is not compliant with the SMCCC spec and can overlap with the already known function-id decoders. (eg. if we invoke an smc with 0xffffffffc4000012 it will be decoded as a PSCI reset call). Instead, make it clear that we don't support it and return an error. Signed-off-by: Sebastian Ene Link: https://patch.msgid.link/20260408114118.422604-1-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 1de9c70599c6..06db299c37a8 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -805,6 +805,10 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) } func_id &= ~ARM_SMCCC_CALL_HINTS; + if (upper_32_bits(func_id)) { + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; + goto exit_skip_instr; + } handled = kvm_host_psci_handler(host_ctxt, func_id); if (!handled) -- cgit v1.2.3 From 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:03 +0100 Subject: KVM: arm64: Fix FEAT_Debugv8p9 to check DebugVer, not PMUVer FEAT_Debugv8p9 is incorrectly defined against ID_AA64DFR0_EL1.PMUVer instead of ID_AA64DFR0_EL1.DebugVer. All three consumers of the macro gate features that are architecturally tied to FEAT_Debugv8p9 (DebugVer = 0b1011, DDI0487 M.b A2.2.10): - HDFGRTR2_EL2.nMDSELR_EL1, HDFGWTR2_EL2.nMDSELR_EL1: MDSELR_EL1 is present only when FEAT_Debugv8p9 is implemented (D24.3.21). - MDCR_EL2.EBWE: the Extended Breakpoint and Watchpoint Enable bit is RES0 unless FEAT_Debugv8p9 is implemented (D24.3.17). Neither register has any dependency on PMUVer. FEAT_Debugv8p9 and FEAT_PMUv3p9 are independent. Per DDI0487 M.b A2.2.10, FEAT_Debugv8p9 is unconditionally mandatory from Armv8.9, whereas FEAT_PMUv3p9 is mandatory only when FEAT_PMUv3 is implemented. An Armv8.9 CPU without a PMU has DebugVer = 0b1011 but PMUVer = 0b0000, so the wrong field check would cause KVM to incorrectly treat EBWE and MDSELR_EL1 as RES0 on such hardware. Fixes: 4bc0fe089840 ("KVM: arm64: Add sanitisation for FEAT_FGT2 registers") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-2-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index f35b8dddd7c1..093290b366e6 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -192,7 +192,7 @@ struct reg_feat_map_desc { #define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP #define FEAT_PoPS ID_AA64MMFR4_EL1, PoPS, IMP #define FEAT_PFAR ID_AA64PFR1_EL1, PFAR, IMP -#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, PMUVer, V3P9 +#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, DebugVer, V8P9 #define FEAT_PMUv3_SS ID_AA64DFR0_EL1, PMSS, IMP #define FEAT_SEBEP ID_AA64DFR0_EL1, SEBEP, IMP #define FEAT_EBEP ID_AA64DFR1_EL1, EBEP, IMP -- cgit v1.2.3 From 2a623408112626d2625a6f00aed665861d59665c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:04 +0100 Subject: KVM: arm64: Fix typo in feature check comments Revists -> Revisit. The following patch will add another similar line. No functional change intended. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 093290b366e6..a722ea178f68 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -283,7 +283,7 @@ static bool feat_anerr(struct kvm *kvm) static bool feat_sme_smps(struct kvm *kvm) { /* - * Revists this if KVM ever supports SME -- this really should + * Revisit this if KVM ever supports SME -- this really should * look at the guest's view of SMIDR_EL1. Funnily enough, this * is not captured in the JSON file, but only as a note in the * ARM ARM. @@ -295,7 +295,7 @@ static bool feat_sme_smps(struct kvm *kvm) static bool feat_spe_fds(struct kvm *kvm) { /* - * Revists this if KVM ever supports SPE -- this really should + * Revisit this if KVM ever supports SPE -- this really should * look at the guest's view of PMSIDR_EL1. */ return (kvm_has_feat(kvm, FEAT_SPEv1p4) && @@ -305,7 +305,7 @@ static bool feat_spe_fds(struct kvm *kvm) static bool feat_trbe_mpam(struct kvm *kvm) { /* - * Revists this if KVM ever supports both MPAM and TRBE -- + * Revisit this if KVM ever supports both MPAM and TRBE -- * this really should look at the guest's view of TRBIDR_EL1. */ return (kvm_has_feat(kvm, FEAT_TRBE) && -- cgit v1.2.3 From 08d715338287a1affb4c7ad5733decef4558a5c8 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:05 +0100 Subject: KVM: arm64: Fix FEAT_SPE_FnE to use PMSIDR_EL1.FnE, not PMSVer FEAT_SPE_FnE is architecturally detected via PMSIDR_EL1.FnE [6], not ID_AA64DFR0_EL1.PMSVer. The FEAT_X macro form (register, field, value) cannot encode a PMSIDR_EL1-based feature, so FEAT_SPE_FnE was defined identically to FEAT_SPEv1p2 (ID_AA64DFR0_EL1, PMSVer, V1P2), producing a duplicate that used PMSVer >= V1P2 as a proxy. Replace the macro with feat_spe_fne(), following the same pattern as the sibling feat_spe_fds(): guard on FEAT_SPEv1p2 and read PMSIDR_EL1.FnE [6] directly. Wire the two NEEDS_FEAT consumers to use the new function. Remove the now-unused FEAT_SPE_FnE macro. Fixes: 63d423a7635b ("KVM: arm64: Switch to table-driven FGU configuration") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-4-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/config.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index a722ea178f68..0622162b089e 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -131,7 +131,6 @@ struct reg_feat_map_desc { } #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP -#define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP #define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP #define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP @@ -302,6 +301,16 @@ static bool feat_spe_fds(struct kvm *kvm) (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FDS)); } +static bool feat_spe_fne(struct kvm *kvm) +{ + /* + * Revisit this if KVM ever supports SPE -- this really should + * look at the guest's view of PMSIDR_EL1. + */ + return (kvm_has_feat(kvm, FEAT_SPEv1p2) && + (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FnE)); +} + static bool feat_trbe_mpam(struct kvm *kvm) { /* @@ -537,7 +546,7 @@ static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { HDFGRTR_EL2_PMBPTR_EL1 | HDFGRTR_EL2_PMBLIMITR_EL1, FEAT_SPE), - NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), + NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), NEEDS_FEAT(HDFGRTR_EL2_nBRBDATA | HDFGRTR_EL2_nBRBCTL | HDFGRTR_EL2_nBRBIDR, @@ -605,7 +614,7 @@ static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { HDFGWTR_EL2_PMBPTR_EL1 | HDFGWTR_EL2_PMBLIMITR_EL1, FEAT_SPE), - NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), + NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), NEEDS_FEAT(HDFGWTR_EL2_nBRBDATA | HDFGWTR_EL2_nBRBCTL, FEAT_BRBE), -- cgit v1.2.3 From d89fdda7dd8a488f922e1175e6782f781ba8a23b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:06 +0100 Subject: KVM: arm64: Fix kvm_vcpu_initialized() macro parameter The macro is defined with parameter 'v' but the body references the literal token 'vcpu' instead, causing it to silently operate on whatever 'vcpu' resolves to in the caller's scope rather than the value passed by the caller. All current call sites happen to use a variable named 'vcpu', so the bug is latent. Fixes: e016333745c7 ("KVM: arm64: Only reset vCPU-scoped feature ID regs once") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-5-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 44211e86f5eb..65eead8362e0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1545,7 +1545,7 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) #define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) -#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) +#define kvm_vcpu_initialized(v) vcpu_get_flag(v, VCPU_INITIALIZED) int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM -- cgit v1.2.3 From 73b9c1e5da84cd69b1a86e374e450817cd051371 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:07 +0100 Subject: KVM: arm64: Fix pin leak and publication ordering in __pkvm_init_vcpu() Two bugs exist in the vCPU initialisation path: 1. If a check fails after hyp_pin_shared_mem() succeeds, the cleanup path jumps to 'unlock' without calling unpin_host_vcpu() or unpin_host_sve_state(), permanently leaking pin references on the host vCPU and SVE state pages. Extract a register_hyp_vcpu() helper that performs the checks and the store. When register_hyp_vcpu() returns an error, call unpin_host_vcpu() and unpin_host_sve_state() inline before falling through to the existing 'unlock' label. 2. register_hyp_vcpu() publishes the new vCPU pointer into 'hyp_vm->vcpus[]' with a bare store, allowing a concurrent caller of pkvm_load_hyp_vcpu() to observe a partially initialised vCPU object. Ensure the store uses smp_store_release() and the load uses smp_load_acquire(). While 'vm_table_lock' currently serialises the store and the load, these barriers ensure the reader sees the fully initialised 'hyp_vcpu' object even if there were a lockless path or if the lock's own ordering guarantees were insufficient for nested object initialization. Fixes: 49af6ddb8e5c ("KVM: arm64: Add infrastructure to create and track pKVM instances at EL2") Reported-by: Ben Simner Co-developed-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-6-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 7ed96d64d611..e7496eb85628 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -266,7 +266,8 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, if (hyp_vm->kvm.created_vcpus <= vcpu_idx) goto unlock; - hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + /* Pairs with smp_store_release() in register_hyp_vcpu(). */ + hyp_vcpu = smp_load_acquire(&hyp_vm->vcpus[vcpu_idx]); if (!hyp_vcpu) goto unlock; @@ -860,12 +861,30 @@ err_unpin_kvm: * the page-aligned size of 'struct pkvm_hyp_vcpu'. * Return 0 on success, negative error code on failure. */ +static int register_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm, + struct pkvm_hyp_vcpu *hyp_vcpu) +{ + unsigned int idx = hyp_vcpu->vcpu.vcpu_idx; + + if (idx >= hyp_vm->kvm.created_vcpus) + return -EINVAL; + + if (hyp_vm->vcpus[idx]) + return -EINVAL; + + /* + * Ensure the hyp_vcpu is initialised before publishing it to + * the vCPU-load path via 'hyp_vm->vcpus[]'. + */ + smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu); + return 0; +} + int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unsigned long vcpu_hva) { struct pkvm_hyp_vcpu *hyp_vcpu; struct pkvm_hyp_vm *hyp_vm; - unsigned int idx; int ret; hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); @@ -884,18 +903,11 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, if (ret) goto unlock; - idx = hyp_vcpu->vcpu.vcpu_idx; - if (idx >= hyp_vm->kvm.created_vcpus) { - ret = -EINVAL; - goto unlock; - } - - if (hyp_vm->vcpus[idx]) { - ret = -EINVAL; - goto unlock; + ret = register_hyp_vcpu(hyp_vm, hyp_vcpu); + if (ret) { + unpin_host_vcpu(host_vcpu); + unpin_host_sve_state(hyp_vcpu); } - - hyp_vm->vcpus[idx] = hyp_vcpu; unlock: hyp_spin_unlock(&vm_table_lock); -- cgit v1.2.3 From 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 24 Apr 2026 09:49:08 +0100 Subject: KVM: arm64: Fix initialisation order in __pkvm_init_finalise() fix_host_ownership() walks the hypervisor's stage-1 page-table to adjust the host's stage-2 accordingly. Any such adjustment that requires cache maintenance operations depends on the per-CPU hyp fixmap being present. However, fix_host_ownership() is currently called before fix_hyp_pgtable_refcnt() and hyp_create_fixmap(), so the fixmap does not yet exist when it runs. This is benign today because the host stage-2 starts empty and no CMOs are needed, but it becomes a latent crash as soon as fix_host_ownership() is extended to operate on a non-empty page-table. Reorder the calls so that fix_hyp_pgtable_refcnt() and hyp_create_fixmap() complete before fix_host_ownership() is invoked. Fixes: 0d16d12eb26e ("KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2") Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-7-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/hyp/nvhe/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index d8e5b563fd3d..d461981616d9 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -312,15 +312,15 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = fix_host_ownership(); + ret = fix_hyp_pgtable_refcnt(); if (ret) goto out; - ret = fix_hyp_pgtable_refcnt(); + ret = hyp_create_fixmap(); if (ret) goto out; - ret = hyp_create_fixmap(); + ret = fix_host_ownership(); if (ret) goto out; -- cgit v1.2.3 From 4ce98bf0865c349e7026ad9c14f48da264920953 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 23 Apr 2026 17:36:07 +0100 Subject: KVM: arm64: Wake-up from WFI when iqrchip is in userspace It appears that there is nothing in the wake-up path that evaluates whether the in-kernel interrupts are pending unless we have a vgic. This means that the userspace irqchip support has been broken for about four years, and nobody noticed. It was also broken before as we wouldn't wake-up on a PMU interrupt, but hey, who cares... It is probably time to remove the feature altogether, because it was a terrible idea 10 years ago, and it still is. Fixes: b57de4ffd7c6d ("KVM: arm64: Simplify kvm_cpu_has_pending_timer()") Link: https://patch.msgid.link/20260423163607.486345-1-maz@kernel.org Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 176cbe8baad3..8bb2c7422cc8 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -824,6 +824,10 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); + irq_lines |= (!irqchip_in_kernel(v->kvm) && + (kvm_timer_should_notify_user(v) || + kvm_pmu_should_notify_user(v))); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } -- cgit v1.2.3 From 095a8b0ad3c3b5cdc3850d961adb8a8f735220bb Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 20 Apr 2026 14:57:15 -0700 Subject: drm/amdgpu: fix zero-size GDS range init on RDNA4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RDNA4 (GFX 12) hardware removes the GDS, GWS, and OA on-chip memory resources. The gfx_v12_0 initialisation code correctly leaves adev->gds.gds_size, adev->gds.gws_size, and adev->gds.oa_size at zero to reflect this. amdgpu_ttm_init() unconditionally calls amdgpu_ttm_init_on_chip() for each of these resources regardless of size. When the size is zero, amdgpu_ttm_init_on_chip() forwards the call to ttm_range_man_init(), which calls drm_mm_init(mm, 0, 0). drm_mm_init() immediately fires DRM_MM_BUG_ON(start + size <= start) -- trivially true when size is zero -- crashing the kernel during modprobe of amdgpu on an RX 9070 XT. Guard against this by returning 0 early from amdgpu_ttm_init_on_chip() when size_in_page is zero. This skips TTM resource manager registration for hardware resources that are absent, without affecting any other GPU type. DRM_MM_BUG_ON() only asserts if CONFIG_DRM_DEBUG_MM is enabled in the kernel config. This is apparently rarely enabled as these chips have been in the market for over a year and this issue was only reported now. Link: https://lore.kernel.org/all/bug-221376-2300@https.bugzilla.kernel.org%2F/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=221376 Oops-Analysis: http://oops.fenrus.org/reports/bugzilla.korg/221376/report.html Assisted-by: GitHub Copilot:Claude Sonnet 4.6 linux-kernel-oops-x86. Signed-off-by: Arjan van de Ven Cc: Alex Deucher Cc: "Christian König" Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Alex Deucher (cherry picked from commit 5719ce5865279cad4fd5f01011fe037168503f2d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0dc68fb9d88e..3d2e00efc741 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -75,6 +75,9 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, unsigned int type, uint64_t size_in_page) { + if (!size_in_page) + return 0; + return ttm_range_man_init(&adev->mman.bdev, type, false, size_in_page); } -- cgit v1.2.3 From 4867cef03b58ca53651593842efcfd0587a707f2 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 15 Apr 2026 17:45:10 -0400 Subject: drm/amd/display: Restore analog connector support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] The analog connector support was accidentally removed, causing a crash when connecting an analog monitor. [How] This patch restores the functions and pointers required for proper analog and DP bridge encoder support on legacy GPUs. V2: Restore the external encoder control functions. V3: - Restore BIOS parser external encoder DAC load detection - Restore stream initialization and source selection changes Fixes: e56e3cff2a1b ("drm/amd/display: Sync dcn42 with DC 3.2.373") Cc: Timur Kristóf Signed-off-by: Roman Li Reviewed-by: Alex Hung Reviewed-by: Timur Kristóf Tested-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit cea8349e4494d2892ea57eef3fe4a8987464a876) --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 11 ++- drivers/gpu/drm/amd/display/dc/dc_bios_types.h | 3 +- .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 94 ++++++++++++++-------- 3 files changed, 71 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index dd362071a6c9..e270b1d2457c 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -794,11 +794,13 @@ static enum bp_result bios_parser_external_encoder_control( static enum bp_result bios_parser_dac_load_detection( struct dc_bios *dcb, - enum engine_id engine_id) + enum engine_id engine_id, + struct graphics_object_id ext_enc_id) { struct bios_parser *bp = BP_FROM_DCB(dcb); struct dc_context *ctx = dcb->ctx; struct bp_load_detection_parameters bp_params = {0}; + struct bp_external_encoder_control ext_cntl = {0}; enum bp_result bp_result = BP_RESULT_UNSUPPORTED; uint32_t bios_0_scratch; uint32_t device_id_mask = 0; @@ -824,6 +826,13 @@ static enum bp_result bios_parser_dac_load_detection( bp_params.engine_id = engine_id; bp_result = bp->cmd_tbl.dac_load_detection(bp, &bp_params); + } else if (ext_enc_id.id) { + if (!bp->cmd_tbl.external_encoder_control) + return BP_RESULT_UNSUPPORTED; + + ext_cntl.action = EXTERNAL_ENCODER_CONTROL_DAC_LOAD_DETECT; + ext_cntl.encoder_id = ext_enc_id; + bp_result = bp->cmd_tbl.external_encoder_control(bp, &ext_cntl); } if (bp_result != BP_RESULT_OK) diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 6f96c5cf39fe..526f71616f94 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -102,7 +102,8 @@ struct dc_vbios_funcs { struct bp_external_encoder_control *cntl); enum bp_result (*dac_load_detection)( struct dc_bios *bios, - enum engine_id engine_id); + enum engine_id engine_id, + struct graphics_object_id ext_enc_id); enum bp_result (*transmitter_control)( struct dc_bios *bios, struct bp_transmitter_control *cntl); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5273ca09fe12..f0abbb7c2cb2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -665,16 +665,45 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) } static void -dce110_dac_encoder_control(struct pipe_ctx *pipe_ctx, bool enable) +dce110_external_encoder_control(enum bp_external_encoder_control_action action, + struct dc_link *link, + struct dc_crtc_timing *timing) { - struct dc_link *link = pipe_ctx->stream->link; + struct dc *dc = link->ctx->dc; struct dc_bios *bios = link->ctx->dc_bios; - struct bp_encoder_control encoder_control = {0}; + const struct dc_link_settings *link_settings = &link->cur_link_settings; + enum bp_result bp_result = BP_RESULT_OK; + struct bp_external_encoder_control ext_cntl = { + .action = action, + .connector_obj_id = link->link_enc->connector, + .encoder_id = link->ext_enc_id, + .lanes_number = link_settings->lane_count, + .link_rate = link_settings->link_rate, + + /* Use signal type of the real link encoder, ie. DP */ + .signal = link->connector_signal, + + /* We don't know the timing yet when executing the SETUP action, + * so use a reasonably high default value. It seems that ENABLE + * can change the actual pixel clock but doesn't work with higher + * pixel clocks than what SETUP was called with. + */ + .pixel_clock = timing ? timing->pix_clk_100hz / 10 : 300000, + .color_depth = timing ? timing->display_color_depth : COLOR_DEPTH_888, + }; + DC_LOGGER_INIT(dc->ctx); - encoder_control.action = enable ? ENCODER_CONTROL_ENABLE : ENCODER_CONTROL_DISABLE; - encoder_control.engine_id = link->link_enc->analog_engine; - encoder_control.pixel_clock = pipe_ctx->stream->timing.pix_clk_100hz / 10; - bios->funcs->encoder_control(bios, &encoder_control); + bp_result = bios->funcs->external_encoder_control(bios, &ext_cntl); + + if (bp_result != BP_RESULT_OK) + DC_LOG_ERROR("Failed to execute external encoder action: 0x%x\n", action); +} + +static void +dce110_prepare_ddc(struct dc_link *link) +{ + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DDC_SETUP, link, NULL); } static bool @@ -684,7 +713,8 @@ dce110_dac_load_detect(struct dc_link *link) struct link_encoder *link_enc = link->link_enc; enum bp_result bp_result; - bp_result = bios->funcs->dac_load_detection(bios, link_enc->analog_engine); + bp_result = bios->funcs->dac_load_detection( + bios, link_enc->analog_engine, link->ext_enc_id); return bp_result == BP_RESULT_OK; } @@ -700,7 +730,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) uint32_t early_control = 0; struct timing_generator *tg = pipe_ctx->stream_res.tg; - link_hwss->setup_stream_attribute(pipe_ctx); link_hwss->setup_stream_encoder(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -719,8 +748,8 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) tg->funcs->set_early_control(tg, early_control); - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - dce110_dac_encoder_control(pipe_ctx, true); + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_ENABLE, link, timing); } static enum bp_result link_transmitter_control( @@ -1219,8 +1248,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) link_enc->transmitter - TRANSMITTER_UNIPHY_A); } - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - dce110_dac_encoder_control(pipe_ctx, false); + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DISABLE, link, NULL); } void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, @@ -1603,22 +1632,6 @@ static enum dc_status dce110_enable_stream_timing( return DC_OK; } -static void -dce110_select_crtc_source(struct pipe_ctx *pipe_ctx) -{ - struct dc_link *link = pipe_ctx->stream->link; - struct dc_bios *bios = link->ctx->dc_bios; - struct bp_crtc_source_select crtc_source_select = {0}; - enum engine_id engine_id = link->link_enc->preferred_engine; - - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - engine_id = link->link_enc->analog_engine; - crtc_source_select.controller_id = CONTROLLER_ID_D0 + pipe_ctx->stream_res.tg->inst; - crtc_source_select.color_depth = pipe_ctx->stream->timing.display_color_depth; - crtc_source_select.engine_id = engine_id; - crtc_source_select.sink_signal = pipe_ctx->stream->signal; - bios->funcs->select_crtc_source(bios, &crtc_source_select); -} enum dc_status dce110_apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx, @@ -1639,10 +1652,6 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( hws->funcs.disable_stream_gating(dc, pipe_ctx); } - if (pipe_ctx->stream->signal == SIGNAL_TYPE_RGB) { - dce110_select_crtc_source(pipe_ctx); - } - if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output = {0}; @@ -1722,8 +1731,7 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.tg->funcs->set_static_screen_control( pipe_ctx->stream_res.tg, event_triggers, 2); - if (!dc_is_virtual_signal(pipe_ctx->stream->signal) && - !dc_is_rgb_signal(pipe_ctx->stream->signal)) + if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); @@ -3376,6 +3384,15 @@ void dce110_enable_tmds_link_output(struct dc_link *link, link->phy_state.symclk_state = SYMCLK_ON_TX_ON; } +static void dce110_enable_analog_link_output( + struct dc_link *link, + uint32_t pix_clk_100hz) +{ + link->link_enc->funcs->enable_analog_output( + link->link_enc, + pix_clk_100hz); +} + void dce110_enable_dp_link_output( struct dc_link *link, const struct link_resource *link_res, @@ -3423,6 +3440,11 @@ void dce110_enable_dp_link_output( } } + if (link->ext_enc_id.id) { + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_INIT, link, NULL); + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_SETUP, link, NULL); + } + if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { if (dc->clk_mgr->funcs->notify_link_rate_change) dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link); @@ -3513,8 +3535,10 @@ static const struct hw_sequencer_funcs dce110_funcs = { .enable_lvds_link_output = dce110_enable_lvds_link_output, .enable_tmds_link_output = dce110_enable_tmds_link_output, .enable_dp_link_output = dce110_enable_dp_link_output, + .enable_analog_link_output = dce110_enable_analog_link_output, .disable_link_output = dce110_disable_link_output, .dac_load_detect = dce110_dac_load_detect, + .prepare_ddc = dce110_prepare_ddc, }; static const struct hwseq_private_funcs dce110_private_funcs = { -- cgit v1.2.3 From 508babf310365f1107a2e8831c267c292a286818 Mon Sep 17 00:00:00 2001 From: Hongyan Xu Date: Wed, 22 Apr 2026 20:38:17 +0800 Subject: drm/amdgpu: avoid double drm_exec_fini() in userq validate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When new_addition is true, amdgpu_userq_vm_validate() calls drm_exec_fini(&exec) before iterating over the collected HMM ranges and calling amdgpu_ttm_tt_get_user_pages(). If amdgpu_ttm_tt_get_user_pages() fails in that path, the code jumps to unlock_all and calls drm_exec_fini(&exec) a second time on the same exec object. drm_exec_fini() is not idempotent: it frees exec->objects and may also drop exec->contended and finalize the ww acquire context. Route that error path directly to the range cleanup once exec has already been finalized. Fixes: 42f148788469 ("drm/amdgpu/userqueue: validate userptrs for userqueues") Issue found using a prototype static analysis tool and confirmed by code review. Reviewed-by: Christian König Signed-off-by: Hongyan Xu Signed-off-by: Slavin Liu <220245772@seu.edu.cn> Signed-off-by: Alex Deucher (cherry picked from commit 2802952e4a07306da6ebe813ff1acacc5691851a) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index d5abf785ca17..a15ca3e35344 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -1187,7 +1187,7 @@ retry_lock: bo = range->bo; ret = amdgpu_ttm_tt_get_user_pages(bo, range); if (ret) - goto unlock_all; + goto free_ranges; } invalidated = true; @@ -1214,6 +1214,7 @@ retry_lock: unlock_all: drm_exec_fini(&exec); +free_ranges: xa_for_each(&xa, tmp_key, range) { if (!range) continue; -- cgit v1.2.3 From 87612bab9656a63affa0e2788e0d7a4a1dffa89e Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Wed, 10 Dec 2025 14:15:08 -0600 Subject: amdkfd: Only ignore -ENOENT for KFD init failuires When compiled without CONFIG_HSA_AMD KFD will return -ENOENT. As other errors will cause KFD functionality issues this is the only error code that should be ignored at init. Reviewed-by: Kent Russell Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 4259a25341abf77939767215706f4e3cfd4b73b8) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e47921e2a9af..46aae3fad4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3158,8 +3158,10 @@ static int __init amdgpu_init(void) amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); - /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ - amdgpu_amdkfd_init(); + /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */ + r = amdgpu_amdkfd_init(); + if (r && r != -ENOENT) + goto error_fence; if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); -- cgit v1.2.3 From 36d65da7570bf72ce28504fa9a81abfc728e6d96 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Sat, 18 Apr 2026 23:49:30 +0200 Subject: drm/amdgpu/gmc: Fix AMDGPU_GART_PLACEMENT_LOW to not overlap with VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the GART placement is set to AMDGPU_GART_PLACEMENT_LOW: Make sure that GART does not overlap with VRAM when VRAM is configured to be in the low address space. Solve this according to the following logic: - When GART fits before VRAM, use zero address for GART - Otherwise, put GART after the end of VRAM, aligned to 4 GiB Previously, I had assumed this was not possible so it was OK to not handle it, but now we got a report from a user who has a board that is configured this way. Fixes: 917f91d8d8e8 ("drm/amdgpu/gmc: add a way to force a particular placement for GART") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 3d9de5d86a1658cadb311461b001eb1df67263ad) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 285e217fba04..3d9497d121ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -314,7 +314,10 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, mc->gart_start = max_mc_address - mc->gart_size + 1; break; case AMDGPU_GART_PLACEMENT_LOW: - mc->gart_start = 0; + if (size_bf >= mc->gart_size) + mc->gart_start = 0; + else + mc->gart_start = ALIGN(mc->fb_end, four_gb); break; case AMDGPU_GART_PLACEMENT_BEST_FIT: default: -- cgit v1.2.3 From ccf8932ed8cf4fbfdcd4df2c6b524913691ee700 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 22 Apr 2026 18:41:42 +0800 Subject: drm/amd/pm: fix missing fine-grained dpm table flag on aldebaran Add the missing SMU_DPM_TABLE_FINE_GRAINED flag to aldebaran DPM table. This fixes the pp_dpm_sclk node issue caused by missing flag configuration. Fixes: 7ea1c722fe1d ("drm/amd/pm: Use common helper for aldebaran dpm table") Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 3427dea3a48ebddb491a26093f3627384b3cb2c2) --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 7f386ff0c872..9d8b1227388f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -425,6 +425,7 @@ static int aldebaran_set_default_dpm_table(struct smu_context *smu) dpm_table->dpm_levels[0].enabled = true; dpm_table->dpm_levels[1].value = pptable->GfxclkFmax; dpm_table->dpm_levels[1].enabled = true; + dpm_table->flags |= SMU_DPM_TABLE_FINE_GRAINED; } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; -- cgit v1.2.3 From 0ef196a208385b7d7da79f411c161b04e97283e2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 17 Apr 2026 15:52:45 +0200 Subject: drm/amdgpu: fix AMDGPU_INFO_READ_MMR_REG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were multiple issues in that code. First of all the order between the reset semaphore and the mm_lock was wrong (e.g. copy_to_user) was called while holding the lock. Then we allocated memory while holding the reset semaphore which is also a pretty big bug and can deadlock. Then we used down_read_trylock() instead of waiting for the reset to finish. Signed-off-by: Christian König Fixes: 9e823f307074 ("drm/amdgpu: Block MMR_READ IOCTL in reset") Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 361b6e6b303d4b691f6c5974d3eaab67ca6dd90e) --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 57 ++++++++++++++------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 06efce38f323..71272f40feef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -873,68 +873,59 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { - int ret = 0; - unsigned int n, alloc_size; - uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & AMDGPU_INFO_MMR_SE_INDEX_MASK; unsigned int sh_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; - - if (!down_read_trylock(&adev->reset_domain->sem)) - return -ENOENT; + unsigned int alloc_size; + uint32_t *regs; + int ret; /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) se_num = 0xffffffff; - } else if (se_num >= AMDGPU_GFX_MAX_SE) { - ret = -EINVAL; - goto out; - } + else if (se_num >= AMDGPU_GFX_MAX_SE) + return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; - } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { - ret = -EINVAL; - goto out; - } + else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) + return -EINVAL; - if (info->read_mmr_reg.count > 128) { - ret = -EINVAL; - goto out; - } + if (info->read_mmr_reg.count > 128) + return -EINVAL; - regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) { - ret = -ENOMEM; - goto out; - } + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), + GFP_KERNEL); + if (!regs) + return -ENOMEM; + down_read(&adev->reset_domain->sem); alloc_size = info->read_mmr_reg.count * sizeof(*regs); - amdgpu_gfx_off_ctrl(adev, false); + ret = 0; for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); - kfree(regs); - amdgpu_gfx_off_ctrl(adev, true); ret = -EFAULT; - goto out; + break; } } amdgpu_gfx_off_ctrl(adev, true); - n = copy_to_user(out, regs, min(size, alloc_size)); - kfree(regs); - ret = (n ? -EFAULT : 0); -out: up_read(&adev->reset_domain->sem); + + if (!ret) { + ret = copy_to_user(out, regs, min(size, alloc_size)) + ? -EFAULT : 0; + } + kfree(regs); return ret; } case AMDGPU_INFO_DEV_INFO: { -- cgit v1.2.3 From 13e4cf116dbf7a1fb8123a59bea2c098f30d3736 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Sat, 18 Apr 2026 23:49:31 +0200 Subject: drm/amdgpu/uvd3.1: Don't validate the firmware when already validated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UVD 3.1 firmware validation seems to always fail after attempting it when it had already been validated. (This works similarly with the VCE 1.0 as well.) Don't attempt repeating the validation when it's already done. This caused issues in situations when the system isn't able to suspend the GPU properly and so the GPU isn't actually powered down. Then amdgpu would fail when calling the IP block resume function. Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/2887 Fixes: bb7978111dd3 ("drm/amdgpu: fix SI UVD firmware validate resume fail") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 889a2cfd889c4a4dd9d0c89ce9a8e60b78be71dd) --- drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index fea576a7f397..efb3fde919ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -242,6 +242,10 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) uint64_t addr; uint32_t size; + /* When the keyselect is already set, don't perturb it. */ + if (RREG32(mmUVD_FW_START)) + return; + /* program the VCPU memory controller bits 0-27 */ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; @@ -284,6 +288,12 @@ static int uvd_v3_1_fw_validate(struct amdgpu_device *adev) int i; uint32_t keysel = adev->uvd.keyselect; + if (RREG32(mmUVD_FW_START) & UVD_FW_STATUS__PASS_MASK) { + dev_dbg(adev->dev, "UVD keyselect already set: 0x%x (on CPU: 0x%x)\n", + RREG32(mmUVD_FW_START), adev->uvd.keyselect); + return 0; + } + WREG32(mmUVD_FW_START, keysel); for (i = 0; i < 10; ++i) { -- cgit v1.2.3 From fe2b84f9228e2a0903221a4d0d8c350b018e9c0c Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Sat, 18 Apr 2026 23:49:33 +0200 Subject: drm/amdgpu/gfx6: Support harvested SI chips with disabled TCCs (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes amdgpu to work on the Radeon HD 7870 XT which has never worked with the Linux open source drivers before. Some boards have "harvested" chips, meaning that some parts of the chip are disabled and fused, and it's sold for cheaper and under a different marketing name. On a harvested chip, any of the following can be disabled: - CUs (Compute Units) - RBs (Render Backend, aka. ROP) - Memory channels (ie. the chip has a lower bandwidth) - TCCs (ie. less L2 cache) Handle chips with harvested TCCs by patching the registers that configure how TCCs are mapped. If some TCCs are disabled, we need to make sure that the disabled TCCs are not used, and the remaining TCCs are used optimally. TCP_CHAN_STEER_LO/HI control which TCC is used by TCP channels. TCP_ADDR_CONFIG.NUM_TCC_BANKS controls how many channels are used. Note that the TCC configuration is highly relevant to performance. Suboptimal configuration (eg. CHAN_STEER=0) can significantly reduce gaming performance. For optimal performance: - Rely on the CHAN_STEER from the golden registers table, only skip disabled TCCs but keep the mapping order. - Limit NUM_TCC_BANKS to number of active TCCs to avoid thrashing, which performs better than using the same TCC twice. v2: - Also consider CGTS_USER_TCC_DISABLE for disabled TCCs. Link: https://bugs.freedesktop.org/show_bug.cgi?id=60879 Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/2664 Fixes: 2cd46ad22383 ("drm/amdgpu: add graphic pipeline implementation for si v8") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 00218d15528fab9f6b31241fe5904eea4fcaa30d) --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 66 +++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 73223d97a87f..ac90d8e9d86a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1571,6 +1571,71 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gfx_v6_0_setup_tcc() - setup which TCCs are used + * + * @adev: amdgpu_device pointer + * + * Verify whether the current GPU has any TCCs disabled, + * which can happen when the GPU is harvested and some + * memory channels are disabled, reducing the memory bus width. + * For example, on the Radeon HD 7870 XT (Tahiti LE). + * + * If some TCCs are disabled, we need to make sure that + * the disabled TCCs are not used, and the remaining TCCs + * are used optimally. + * + * TCP_CHAN_STEER_LO/HI control which TCC is used by TCP channels. + * TCP_ADDR_CONFIG.NUM_TCC_BANKS controls how many channels are used. + * + * For optimal performance: + * - Rely on the CHAN_STEER from the golden registers table, + * only skip disabled TCCs but keep the mapping order. + * - Limit NUM_TCC_BANKS to number of active TCCs to avoid thrashing, + * which performs better than using the same TCC twice. + */ +static void gfx_v6_0_setup_tcc(struct amdgpu_device *adev) +{ + u32 i, tcc, tcp_addr_config, num_active_tcc = 0; + u64 chan_steer, patched_chan_steer = 0; + const u32 num_max_tcc = adev->gfx.config.max_texture_channel_caches; + const u32 dis_tcc_mask = + amdgpu_gfx_create_bitmask(num_max_tcc) & + (REG_GET_FIELD(RREG32(mmCGTS_TCC_DISABLE), + CGTS_TCC_DISABLE, TCC_DISABLE) | + REG_GET_FIELD(RREG32(mmCGTS_USER_TCC_DISABLE), + CGTS_USER_TCC_DISABLE, TCC_DISABLE)); + + /* When no TCC is disabled, the golden registers table already has optimal TCC setup */ + if (!dis_tcc_mask) + return; + + /* Each 4-bit nibble contains the index of a TCC used by all TCPs */ + chan_steer = RREG32(mmTCP_CHAN_STEER_LO) | ((u64)RREG32(mmTCP_CHAN_STEER_HI) << 32ull); + + /* Patch the TCP to TCC mapping to skip disabled TCCs */ + for (i = 0; i < num_max_tcc; ++i) { + tcc = (chan_steer >> (u64)(4 * i)) & 0xf; + + if (!((1 << tcc) & dis_tcc_mask)) { + /* Copy enabled TCC indices to the patched register value. */ + patched_chan_steer |= (u64)tcc << (u64)(4 * num_active_tcc); + ++num_active_tcc; + } + } + + WARN_ON(num_active_tcc != num_max_tcc - hweight32(dis_tcc_mask)); + + /* Patch number of TCCs used by TCPs */ + tcp_addr_config = REG_SET_FIELD(RREG32(mmTCP_ADDR_CONFIG), + TCP_ADDR_CONFIG, NUM_TCC_BANKS, + num_active_tcc - 1); + + WREG32(mmTCP_ADDR_CONFIG, tcp_addr_config); + WREG32(mmTCP_CHAN_STEER_HI, upper_32_bits(patched_chan_steer)); + WREG32(mmTCP_CHAN_STEER_LO, lower_32_bits(patched_chan_steer)); +} + static void gfx_v6_0_config_init(struct amdgpu_device *adev) { adev->gfx.config.double_offchip_lds_buf = 0; @@ -1729,6 +1794,7 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_tiling_mode_table_init(adev); gfx_v6_0_setup_rb(adev); + gfx_v6_0_setup_tcc(adev); gfx_v6_0_setup_spi(adev); -- cgit v1.2.3 From 686e5985d9f5ba29e2fd43d618548039727adee2 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Mon, 20 Apr 2026 10:23:39 +0200 Subject: drm/amdgpu: fix root reservation in amdgpu_vm_handle_fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit svm_range_restore_pages might reserve the root bo so it must be called after unreserving it. Fixes: 1b135c6da061 ("drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault") Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 5cdc219fe86a1720aa4b5b4f42f11913146e6a93) --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 115a7b269af3..9ba9de16a27a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3023,11 +3023,22 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, is_compute_context = vm->is_compute_context; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr >> PAGE_SHIFT, ts, write_fault)) { + if (is_compute_context) { + /* Unreserve root since svm_range_restore_pages might try to reserve it. */ + /* TODO: rework svm_range_restore_pages so that this isn't necessary. */ amdgpu_bo_unreserve(root); + + if (!svm_range_restore_pages(adev, pasid, vmid, + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { + amdgpu_bo_unref(&root); + return true; + } amdgpu_bo_unref(&root); - return true; + + /* Re-acquire the VM lock, could be that the VM was freed in between. */ + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); + if (!vm) + return false; } addr /= AMDGPU_GPU_PAGE_SIZE; -- cgit v1.2.3 From b56922fc37454633b831a2a04a1537616742977d Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Wed, 22 Apr 2026 09:34:04 -0400 Subject: drm/amdgpu: Only send RMA CPER when threshold is exceeded According to our documentation, the RMA should only occur when the threshold has been exceeded, not met. Fixes: 5028a24aa89a ("drm/amdgpu: Send applicable RMA CPERs at end of RAS init") Signed-off-by: Kent Russell Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher (cherry picked from commit 8bc09a7d0e90ec45a0b4865661cf45cbbce1c3d7) --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index cdf4909592d2..0c57fe259894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1950,7 +1950,7 @@ void amdgpu_ras_check_bad_page_status(struct amdgpu_device *adev) if (!control || amdgpu_bad_page_threshold == 0) return; - if (control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) { + if (control->ras_num_bad_pages > ras->bad_page_cnt_threshold) { if (amdgpu_dpm_send_rma_reason(adev)) dev_warn(adev->dev, "Unable to send out-of-band RMA CPER"); else -- cgit v1.2.3 From 47776ac1e3f4a2aefcf7fe7c7e4a11151b676222 Mon Sep 17 00:00:00 2001 From: Shubhankar Milind Sardeshpande Date: Tue, 21 Apr 2026 17:01:21 +0530 Subject: drm/amdgpu: Avoid reset in AMDGPU unload path for APUs with GFX V11 and higher. GFX V11 has GC block as default off IP. Every time AMDGPU driver sends a request to PMFW to unload MP1, PMFW will put GC in reset and power down the voltage.Hence, skipping reset for APUs with GFX V11 or later to avoid reset related failures. Fixes: 34355e61835e ("drm/amdgpu: Fix GFX hang on SteamDeck when amdgpu is reloaded") Reviewed-by: Alex Deucher Signed-off-by: Shubhankar Milind Sardeshpande Signed-off-by: Alex Deucher (cherry picked from commit d0a8cadffc818f51d05bc234d8da1af228bc59a3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 737ef1ef96a5..66ca043658ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2839,8 +2839,12 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) * that checks whether the PSP is running. A solution for those issues * in the APU is to trigger a GPU reset, but this should be done during * the unload phase to avoid adding boot latency and screen flicker. + * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends + * a request to PMFW to unload MP1, PMFW will put GC in reset and power down + * the voltage. Hence, skipping reset for APUs with GFX V11 or later. */ - if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) { + if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) { r = amdgpu_asic_reset(adev); if (r) dev_err(adev->dev, "asic reset on %s failed\n", __func__); -- cgit v1.2.3 From 045e0ff208f0838a246c10204105126611b267a1 Mon Sep 17 00:00:00 2001 From: Alysa Liu Date: Tue, 21 Apr 2026 10:18:28 -0400 Subject: drm/amdkfd: validate SVM ioctl nattr against buffer size Validate nattr field against the buffer size, preventing out-of-bounds buffer access via user-controlled attribute count. Reviewed-by: Amir Shetaia Signed-off-by: Alysa Liu Signed-off-by: Alex Deucher (cherry picked from commit 5eca8bfdfa456c3304ca77523718fe24254c172f) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 26 ++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 55ea5145a28a..f829d65a79b4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1695,6 +1696,16 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(pdd->dev, &args->anon_fd); } +static int kfd_ioctl_svm_validate(void *kdata, unsigned int usize) +{ + struct kfd_ioctl_svm_args *args = kdata; + size_t expected = struct_size(args, attrs, args->nattr); + + if (expected == SIZE_MAX || usize < expected) + return -EINVAL; + return 0; +} + #if IS_ENABLED(CONFIG_HSA_AMD_SVM) static int kfd_ioctl_set_xnack_mode(struct file *filep, @@ -3209,7 +3220,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ - .cmd_drv = 0, .name = #ioctl} + .validate = NULL, .cmd_drv = 0, .name = #ioctl} + +#define AMDKFD_IOCTL_DEF_V(ioctl, _func, _validate, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ + .validate = _validate, .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { @@ -3306,7 +3321,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, kfd_ioctl_smi_events, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), + AMDKFD_IOCTL_DEF_V(AMDKFD_IOC_SVM, kfd_ioctl_svm, + kfd_ioctl_svm_validate, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), @@ -3431,6 +3447,12 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) memset(kdata, 0, usize); } + if (ioctl->validate) { + retcode = ioctl->validate(kdata, usize); + if (retcode) + goto err_i1; + } + retcode = func(filep, process, kdata); if (cmd & IOC_OUT) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6e333bfa17d6..163d665a6074 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1047,10 +1047,13 @@ extern struct srcu_struct kfd_processes_srcu; typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, void *data); +typedef int amdkfd_ioctl_validate_t(void *kdata, unsigned int usize); + struct amdkfd_ioctl_desc { unsigned int cmd; int flags; amdkfd_ioctl_t *func; + amdkfd_ioctl_validate_t *validate; unsigned int cmd_drv; const char *name; }; -- cgit v1.2.3 From d0f5711fa14a09c010537375cf34893cd33bc2ee Mon Sep 17 00:00:00 2001 From: YuanShang Date: Thu, 26 Mar 2026 18:27:30 +0800 Subject: drm/amdkfd: check if vm ready in svm map and unmap to gpu Don't map or unmap svm range to gpu if vm is not ready for updates. Why: DRM entity may already be killed when the svm worker try to update gpu vm. Signed-off-by: YuanShang Reviewed-by: Philip Yang Signed-off-by: Alex Deucher (cherry picked from commit 55f8e366c326980174a4f2b9501b524d8eb25135) --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b120fdb0ef77..38085a0a0f58 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1366,6 +1366,12 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("CPU[0x%llx 0x%llx] -> GPU[0x%llx 0x%llx]\n", start, last, gpu_start, gpu_end); + + if (!amdgpu_vm_ready(vm)) { + pr_debug("VM not ready, canceling unmap\n"); + return -EINVAL; + } + return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, gpu_start, gpu_end, init_pte_value, 0, 0, NULL, NULL, fence); @@ -1443,6 +1449,11 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); + if (!amdgpu_vm_ready(vm)) { + pr_debug("VM not ready, canceling map\n"); + return -EINVAL; + } + for (i = offset; i < offset + npages; i++) { uint64_t gpu_start; uint64_t gpu_end; -- cgit v1.2.3 From 510a27055446b8f0d29487ca8b8d2033dc2b6ca6 Mon Sep 17 00:00:00 2001 From: Richard Cheng Date: Fri, 24 Apr 2026 18:02:21 +0800 Subject: sched_ext: sync disable_irq_work in bpf_scx_unreg() When unregistered my self-written scx scheduler, the following panic occurs. [ 229.923133] Kernel text patching generated an invalid instruction at 0xffff80009bc2c1f8! [ 229.923146] Internal error: Oops - BRK: 00000000f2000100 [#1] SMP [ 230.077871] CPU: 48 UID: 0 PID: 1760 Comm: kworker/u583:7 Not tainted 7.0.0+ #3 PREEMPT(full) [ 230.086677] Hardware name: NVIDIA GB200 NVL/P3809-BMC, BIOS 02.05.12 20251107 [ 230.093972] Workqueue: events_unbound bpf_map_free_deferred [ 230.099675] Sched_ext: invariant_0.1.0_aarch64_unknown_linux_gnu_debug (disabling), task: runnable_at=-174ms [ 230.116843] pc : 0xffff80009bc2c1f8 [ 230.120406] lr : dequeue_task_scx+0x270/0x2d0 [ 230.217749] Call trace: [ 230.228515] 0xffff80009bc2c1f8 (P) [ 230.232077] dequeue_task+0x84/0x188 [ 230.235728] sched_change_begin+0x1dc/0x250 [ 230.240000] __set_cpus_allowed_ptr_locked+0x17c/0x240 [ 230.245250] __set_cpus_allowed_ptr+0x74/0xf0 [ 230.249701] ___migrate_enable+0x4c/0xa0 [ 230.253707] bpf_map_free_deferred+0x1a4/0x1b0 [ 230.258246] process_one_work+0x184/0x540 [ 230.262342] worker_thread+0x19c/0x348 [ 230.266170] kthread+0x13c/0x150 [ 230.269465] ret_from_fork+0x10/0x20 [ 230.281393] Code: d4202000 d4202000 d4202000 d4202000 (d4202000) [ 230.287621] ---[ end trace 0000000000000000 ]--- [ 231.160046] Kernel panic - not syncing: Oops - BRK: Fatal exception in interrupt The root cause is that the JIT page backing ops->quiescent() is freed before all callers of that function have stopped. The expected ordering during teardown is: bitmap_zero(sch->has_op) + synchronize_rcu() -> guarantees no CPU will ever call sch->ops.* again -> only THEN free the BPF struct_ops JIT page bpf_scx_unreg() is supposed to enforce the order, but after commit f4a6c506d118 ("sched_ext: Always bounce scx_disable() through irq_work"), disable_work is no longer queued directly, causing kthread_flush_work() to be a noop. Thus, the caller drops the struct_ops map too early and poisoned with AARCH64_BREAK_FAULT before disable_workfn ever execute. So the subsequent dequeue_task() still sees SCX_HAS_OP(sch, quiescent) as true and calls ops.quiescent, which hit on the poisoned page and BRK panic. Add a helper scx_flush_disable_work() so the future use cases that want to flush disable_work can use it. Also amend the call for scx_root_enable_workfn() and scx_sub_enable_workfn() which have similar pattern in the error path. Fixes: f4a6c506d118 ("sched_ext: Always bounce scx_disable() through irq_work") Signed-off-by: Richard Cheng Reviewed-by: Andrea Righi Reviewed-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1f670028bf19..a018034dd81c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5923,6 +5923,20 @@ static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind) irq_work_queue(&sch->disable_irq_work); } +/** + * scx_flush_disable_work - flush the disable work and wait for it to finish + * @sch: the scheduler + * + * sch->disable_work might still not queued, causing kthread_flush_work() + * as a noop. Syncing the irq_work first is required to guarantee the + * kthread work has been queued before waiting for it. + */ +static void scx_flush_disable_work(struct scx_sched *sch) +{ + irq_work_sync(&sch->disable_irq_work); + kthread_flush_work(&sch->disable_work); +} + static void dump_newline(struct seq_buf *s) { trace_sched_ext_dump(""); @@ -6823,7 +6837,7 @@ err_disable: * completion. sch's base reference will be put by bpf_scx_unreg(). */ scx_error(sch, "scx_root_enable() failed (%d)", ret); - kthread_flush_work(&sch->disable_work); + scx_flush_disable_work(sch); cmd->ret = 0; } @@ -7090,7 +7104,7 @@ err_unlock_and_disable: percpu_up_write(&scx_fork_rwsem); err_disable: mutex_unlock(&scx_enable_mutex); - kthread_flush_work(&sch->disable_work); + scx_flush_disable_work(sch); cmd->ret = 0; } @@ -7351,7 +7365,7 @@ static void bpf_scx_unreg(void *kdata, struct bpf_link *link) struct scx_sched *sch = rcu_dereference_protected(ops->priv, true); scx_disable(sch, SCX_EXIT_UNREG); - kthread_flush_work(&sch->disable_work); + scx_flush_disable_work(sch); RCU_INIT_POINTER(ops->priv, NULL); kobject_put(&sch->kobj); } -- cgit v1.2.3 From 4b2b4d7d4e203c92db8966b163edfacb1f0e1e29 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Fri, 17 Apr 2026 20:25:06 +0800 Subject: netfilter: xt_policy: fix strict mode inbound policy matching match_policy_in() walks sec_path entries from the last transform to the first one, but strict policy matching needs to consume info->pol[] in the same forward order as the rule layout. Derive the strict-match policy position from the number of transforms already consumed so that multi-element inbound rules are matched consistently. Fixes: c4b885139203 ("[NETFILTER]: x_tables: replace IPv4/IPv6 policy match by address family independant version") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index cb6e8279010a..b5fa65558318 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -63,7 +63,7 @@ match_policy_in(const struct sk_buff *skb, const struct xt_policy_info *info, return 0; for (i = sp->len - 1; i >= 0; i--) { - pos = strict ? i - sp->len + 1 : 0; + pos = strict ? sp->len - i - 1 : 0; if (pos >= info->len) return 0; e = &info->pol[pos]; -- cgit v1.2.3 From fe11e5c40817b84abaa5d83bfb6586d8412bfd07 Mon Sep 17 00:00:00 2001 From: Kai Ma Date: Wed, 22 Apr 2026 22:54:18 +0800 Subject: netfilter: reject zero shift in nft_bitwise Reject zero shift operands for nft_bitwise left and right shift expressions during initialization. The carry propagation logic computes the carry from the adjacent 32-bit word using BITS_PER_TYPE(u32) - shift. A zero shift operand turns this into a 32-bit shift, which is undefined behaviour. Reject zero shift operands in the control plane, alongside the existing check for values greater than or equal to 32, so malformed rules never reach the packet path. Fixes: 567d746b55bc ("netfilter: bitwise: add support for shifts.") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Kai Ma Signed-off-by: Ren Wei Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 13808e9cd999..94dccdcfa06b 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -196,7 +196,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, if (err < 0) return err; - if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { + if (!priv->data.data[0] || + priv->data.data[0] >= BITS_PER_TYPE(u32)) { nft_data_release(&priv->data, desc.type); return -EINVAL; } -- cgit v1.2.3 From 8cf6809cddcbe301aedfc6b51bcd4944d45795f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Apr 2026 02:19:11 +0200 Subject: netfilter: nf_conntrack_sip: don't use simple_strtoul MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace unsafe port parsing in epaddr_len(), ct_sip_parse_header_uri(), and ct_sip_parse_request() with a new sip_parse_port() helper that validates each digit against the buffer limit, eliminating the use of simple_strtoul() which assumes NUL-terminated strings. The previous code dereferenced pointers without bounds checks after sip_parse_addr() and relied on simple_strtoul() on non-NUL-terminated skb data. A port that reaches the buffer limit without a trailing character is also rejected as malformed. Also get rid of all simple_strtoul() usage in conntrack, prefer a stricter version instead. There are intentional changes: - Bail out if number is > UINT_MAX and indicate a failure, same for too long sequences. While we do accept 05535 as port 5535, we will not accept e.g. 'sip:10.0.0.1:005060'. While its syntactically valid under RFC 3261, we should restrict this to not waste cycles when presented with malformed packets with 64k '0' characters. - Force base 10 in ct_sip_parse_numerical_param(). This is used to fetch 'expire=' and 'rports='; both are expected to use base-10. - In nf_nat_sip.c, only accept the parsed value if its within the 1k-64k range. - epaddr_len now returns 0 if the port is invalid, as it already does for invalid ip addresses. This is intentional. nf_conntrack_sip performs lots of guesswork to find the right parts of the message to parse. Being stricter could break existing setups. Connection tracking helpers are designed to allow traffic to pass, not to block it. Based on an earlier patch from Jenny Guanni Qu . Fixes: 05e3ced297fe ("[NETFILTER]: nf_conntrack_sip: introduce SIP-URI parsing helper") Reported-by: Klaudia Kloc Reported-by: Dawid Moczadło Reported-by: Jenny Guanni Qu . Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 152 ++++++++++++++++++++++++++++++--------- net/netfilter/nf_nat_sip.c | 1 + 2 files changed, 119 insertions(+), 34 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 182cfb119448..1eb55907d470 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -181,6 +181,57 @@ static int sip_parse_addr(const struct nf_conn *ct, const char *cp, return 1; } +/* Parse optional port number after IP address. + * Returns false on malformed input, true otherwise. + * If port is non-NULL, stores parsed port in network byte order. + * If no port is present, sets *port to default SIP port. + */ +static bool sip_parse_port(const char *dptr, const char **endp, + const char *limit, __be16 *port) +{ + unsigned int p = 0; + int len = 0; + + if (dptr >= limit) + return false; + + if (*dptr != ':') { + if (port) + *port = htons(SIP_PORT); + if (endp) + *endp = dptr; + return true; + } + + dptr++; /* skip ':' */ + + while (dptr < limit && isdigit(*dptr)) { + p = p * 10 + (*dptr - '0'); + dptr++; + len++; + if (len > 5) /* max "65535" */ + return false; + } + + if (len == 0) + return false; + + /* reached limit while parsing port */ + if (dptr >= limit) + return false; + + if (p < 1024 || p > 65535) + return false; + + if (port) + *port = htons(p); + + if (endp) + *endp = dptr; + + return true; +} + /* skip ip address. returns its length. */ static int epaddr_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -193,11 +244,8 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, return 0; } - /* Port number */ - if (*dptr == ':') { - dptr++; - dptr += digits_len(ct, dptr, limit, shift); - } + if (!sip_parse_port(dptr, &dptr, limit, NULL)) + return 0; return dptr - aux; } @@ -228,6 +276,51 @@ static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr, return epaddr_len(ct, dptr, limit, shift); } +/* simple_strtoul stops after first non-number character. + * But as we're not dealing with c-strings, we can't rely on + * hitting \r,\n,\0 etc. before moving past end of buffer. + * + * This is a variant of simple_strtoul, but doesn't require + * a c-string. + * + * If value exceeds UINT_MAX, 0 is returned. + */ +static unsigned int sip_strtouint(const char *cp, unsigned int len, char **endp) +{ + const unsigned int max = sizeof("4294967295"); + unsigned int olen = len; + const char *s = cp; + u64 result = 0; + + if (len > max) + len = max; + + while (olen > 0 && isdigit(*s)) { + unsigned int value; + + if (len == 0) + goto err; + + value = *s - '0'; + result = result * 10 + value; + + if (result > UINT_MAX) + goto err; + s++; + len--; + olen--; + } + + if (endp) + *endp = (char *)s; + + return result; +err: + if (endp) + *endp = (char *)cp; + return 0; +} + /* Parse a SIP request line of the form: * * Request-Line = Method SP Request-URI SP SIP-Version CRLF @@ -241,7 +334,6 @@ int ct_sip_parse_request(const struct nf_conn *ct, { const char *start = dptr, *limit = dptr + datalen, *end; unsigned int mlen; - unsigned int p; int shift = 0; /* Skip method and following whitespace */ @@ -267,14 +359,8 @@ int ct_sip_parse_request(const struct nf_conn *ct, if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; - if (end < limit && *end == ':') { - end++; - p = simple_strtoul(end, (char **)&end, 10); - if (p < 1024 || p > 65535) - return -1; - *port = htons(p); - } else - *port = htons(SIP_PORT); + if (!sip_parse_port(end, &end, limit, port)) + return -1; if (end == dptr) return 0; @@ -509,7 +595,6 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, union nf_inet_addr *addr, __be16 *port) { const char *c, *limit = dptr + datalen; - unsigned int p; int ret; ret = ct_sip_walk_headers(ct, dptr, dataoff ? *dataoff : 0, datalen, @@ -520,14 +605,8 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; - if (*c == ':') { - c++; - p = simple_strtoul(c, (char **)&c, 10); - if (p < 1024 || p > 65535) - return -1; - *port = htons(p); - } else - *port = htons(SIP_PORT); + if (!sip_parse_port(c, &c, limit, port)) + return -1; if (dataoff) *dataoff = c - dptr; @@ -609,7 +688,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - *val = simple_strtoul(start, &end, 0); + *val = sip_strtouint(start, limit - start, (char **)&end); if (start == end) return -1; if (matchoff && matchlen) { @@ -1064,6 +1143,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, mediaoff = sdpoff; for (i = 0; i < ARRAY_SIZE(sdp_media_types); ) { + char *end; + if (ct_sip_get_sdp_header(ct, *dptr, mediaoff, *datalen, SDP_HDR_MEDIA, SDP_HDR_UNSPEC, &mediaoff, &medialen) <= 0) @@ -1079,8 +1160,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, mediaoff += t->len; medialen -= t->len; - port = simple_strtoul(*dptr + mediaoff, NULL, 10); - if (port == 0) + port = sip_strtouint(*dptr + mediaoff, *datalen - mediaoff, (char **)&end); + if (port == 0 || *dptr + mediaoff == end) continue; if (port < 1024 || port > 65535) { nf_ct_helper_log(skb, ct, "wrong port %u", port); @@ -1254,7 +1335,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, */ if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, &matchoff, &matchlen) > 0) - expires = simple_strtoul(*dptr + matchoff, NULL, 10); + expires = sip_strtouint(*dptr + matchoff, *datalen - matchoff, NULL); ret = ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, SIP_HDR_CONTACT, NULL, @@ -1358,7 +1439,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int protoff, if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_EXPIRES, &matchoff, &matchlen) > 0) - expires = simple_strtoul(*dptr + matchoff, NULL, 10); + expires = sip_strtouint(*dptr + matchoff, *datalen - matchoff, NULL); while (1) { unsigned int c_expires = expires; @@ -1418,10 +1499,12 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen, matchend; unsigned int code, cseq, i; + char *end; if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - code = simple_strtoul(*dptr + strlen("SIP/2.0 "), NULL, 10); + code = sip_strtouint(*dptr + strlen("SIP/2.0 "), + *datalen - strlen("SIP/2.0 "), NULL); if (!code) { nf_ct_helper_log(skb, ct, "cannot get code"); return NF_DROP; @@ -1432,8 +1515,8 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; } - cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq && *(*dptr + matchoff) != '0') { + cseq = sip_strtouint(*dptr + matchoff, *datalen - matchoff, (char **)&end); + if (*dptr + matchoff == end) { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1482,6 +1565,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; + char *end; handler = &sip_handlers[i]; if (handler->request == NULL) @@ -1498,8 +1582,8 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, nf_ct_helper_log(skb, ct, "cannot parse cseq"); return NF_DROP; } - cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq && *(*dptr + matchoff) != '0') { + cseq = sip_strtouint(*dptr + matchoff, *datalen - matchoff, (char **)&end); + if (*dptr + matchoff == end) { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1575,7 +1659,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, &matchoff, &matchlen) <= 0) break; - clen = simple_strtoul(dptr + matchoff, (char **)&end, 10); + clen = sip_strtouint(dptr + matchoff, datalen - matchoff, (char **)&end); if (dptr + matchoff == end) break; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index c845b6d1a2bd..9fbfc6bff0c2 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -246,6 +246,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, "rport=", &poff, &plen, &n) > 0 && + n >= 1024 && n <= 65535 && htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; -- cgit v1.2.3 From b5c111f4967ba4fdecdd318923ec7b081e9ef95f Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 23 Apr 2026 15:23:55 -0700 Subject: bpf: Fix sk_local_storage diag dumping uninitialized special fields Call check_and_init_map_value() after the copy_map_value() to zero out special field regions. diag_get() copies sk_local_storage map values into a netlink message using copy_map_value{_locked}(), which intentionally skip special fields. However, the destination buffer from nla_reserve_64bit() is not zeroed and the skipped regions contain uninitialized skb data can be sent to userspace. Fixes: 1ed4d92458a9 ("bpf: INET_DIAG support in bpf_sk_storage") Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260423222356.155387-1-ameryhung@gmail.com --- net/core/bpf_sk_storage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index dc3e8fce8809..ecd659f79fd4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -557,6 +557,7 @@ static int diag_get(struct bpf_local_storage_map *smap, sdata->data, true); else copy_map_value(&smap->map, nla_data(nla_value), sdata->data); + check_and_init_map_value(&smap->map, nla_data(nla_value)); nla_nest_end(skb, nla_stg); return 0; -- cgit v1.2.3 From bd2d76455b65aab77652823919db128a8e585825 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 10:14:32 -1000 Subject: sched_ext: Defer scx_hardlockup() out of NMI scx_hardlockup() runs from NMI and eventually calls scx_claim_exit(), which takes scx_sched_lock. scx_sched_lock isn't NMI-safe and grabbing it from NMI context can lead to deadlocks. The hardlockup handler is best-effort recovery and the disable path it triggers runs off of irq_work anyway. Move the handle_lockup() call into an irq_work so it runs in IRQ context. Fixes: ebeca1f930ea ("sched_ext: Introduce cgroup sub-sched support") Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a018034dd81c..34de1c9b7a7c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4940,6 +4940,25 @@ void scx_softlockup(u32 dur_s) smp_processor_id(), dur_s); } +/* + * scx_hardlockup() runs from NMI and eventually calls scx_claim_exit(), + * which takes scx_sched_lock. scx_sched_lock isn't NMI-safe and grabbing + * it from NMI context can lead to deadlocks. Defer via irq_work; the + * disable path runs off irq_work anyway. + */ +static atomic_t scx_hardlockup_cpu = ATOMIC_INIT(-1); + +static void scx_hardlockup_irq_workfn(struct irq_work *work) +{ + int cpu = atomic_xchg(&scx_hardlockup_cpu, -1); + + if (cpu >= 0 && handle_lockup("hard lockup - CPU %d", cpu)) + printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n", + cpu); +} + +static DEFINE_IRQ_WORK(scx_hardlockup_irq_work, scx_hardlockup_irq_workfn); + /** * scx_hardlockup - sched_ext hardlockup handler * @@ -4948,17 +4967,19 @@ void scx_softlockup(u32 dur_s) * Try kicking out the current scheduler in an attempt to recover the system to * a good state before taking more drastic actions. * - * Returns %true if sched_ext is enabled and abort was initiated, which may - * resolve the reported hardlockup. %false if sched_ext is not enabled or - * someone else already initiated abort. + * Queues an irq_work; the handle_lockup() call happens in IRQ context (see + * scx_hardlockup_irq_workfn). + * + * Returns %true if sched_ext is enabled and the work was queued, %false + * otherwise. */ bool scx_hardlockup(int cpu) { - if (!handle_lockup("hard lockup - CPU %d", cpu)) + if (!rcu_access_pointer(scx_root)) return false; - printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n", - cpu); + atomic_cmpxchg(&scx_hardlockup_cpu, -1, cpu); + irq_work_queue(&scx_hardlockup_irq_work); return true; } -- cgit v1.2.3 From 411d3ef1a70589755e3beed2f5bf1f8aa0c27d1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Unregister sub_kset on scheduler disable When ops.sub_attach is set, scx_alloc_and_add_sched() creates sub_kset as a child of &sch->kobj, which pins the parent with its own reference. The disable paths never call kset_unregister(), so the final kobject_put() in bpf_scx_unreg() leaves a stale reference and scx_kobj_release() never runs, leaking the whole struct scx_sched on every load/unload cycle. Unregister sub_kset in scx_root_disable() and scx_sub_disable() before kobject_del(&sch->kobj). Fixes: ebeca1f930ea ("sched_ext: Introduce cgroup sub-sched support") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 34de1c9b7a7c..7f991ecb1398 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5721,6 +5721,8 @@ static void scx_sub_disable(struct scx_sched *sch) if (sch->ops.exit) SCX_CALL_OP(sch, exit, NULL, sch->exit_info); + if (sch->sub_kset) + kset_unregister(sch->sub_kset); kobject_del(&sch->kobj); } #else /* CONFIG_EXT_SUB_SCHED */ @@ -5852,6 +5854,10 @@ static void scx_root_disable(struct scx_sched *sch) * could observe an object of the same name still in the hierarchy when * the next scheduler is loaded. */ +#ifdef CONFIG_EXT_SUB_SCHED + if (sch->sub_kset) + kset_unregister(sch->sub_kset); +#endif kobject_del(&sch->kobj); free_kick_syncs(); -- cgit v1.2.3 From 4fda9f0e7c950da4fe03cedeb2ac818edf5d03e9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Guard scx_dsq_move() against NULL kit->dsq after failed iter_new bpf_iter_scx_dsq_new() clears kit->dsq on failure and bpf_iter_scx_dsq_{next,destroy}() guard against that. scx_dsq_move() doesn't - it dereferences kit->dsq immediately, so a BPF program that calls scx_bpf_dsq_move[_vtime]() after a failed iter_new oopses the kernel. Return false if kit->dsq is NULL. Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") Cc: stable@vger.kernel.org # v6.12+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7f991ecb1398..68c67113204f 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -8076,12 +8076,22 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit, struct task_struct *p, u64 dsq_id, u64 enq_flags) { struct scx_dispatch_q *src_dsq = kit->dsq, *dst_dsq; - struct scx_sched *sch = src_dsq->sched; + struct scx_sched *sch; struct rq *this_rq, *src_rq, *locked_rq; bool dispatched = false; bool in_balance; unsigned long flags; + /* + * The verifier considers an iterator slot initialized on any + * KF_ITER_NEW return, so a BPF program may legally reach here after + * bpf_iter_scx_dsq_new() failed and left @kit->dsq NULL. + */ + if (unlikely(!src_dsq)) + return false; + + sch = src_dsq->sched; + if (!scx_vet_enq_flags(sch, dsq_id, &enq_flags)) return false; -- cgit v1.2.3 From da2d81b4118a74e65d2335e221a38d665902a98c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Skip tasks with stale task_rq in bypass_lb_cpu() bypass_lb_cpu() transfers tasks between per-CPU bypass DSQs without migrating them - task_cpu() only updates when the donee later consumes the task via move_remote_task_to_local_dsq(). If the LB timer fires again before consumption and the new DSQ becomes a donor, @p is still on the previous CPU and task_rq(@p) != donor_rq. @p can't be moved without its own rq locked. Skip such tasks. Fixes: 95d1df610cdc ("sched_ext: Implement load balancer for bypass mode") Cc: stable@vger.kernel.org # v6.19+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 68c67113204f..f8500ce37b22 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5023,6 +5023,15 @@ resume: if (cpumask_empty(donee_mask)) break; + /* + * If an earlier pass placed @p on @donor_dsq from a different + * CPU and the donee hasn't consumed it yet, @p is still on the + * previous CPU and task_rq(@p) != @donor_rq. @p can't be moved + * without its rq locked. Skip. + */ + if (task_rq(p) != donor_rq) + continue; + donee = cpumask_any_and_distribute(donee_mask, p->cpus_ptr); if (donee >= nr_cpu_ids) continue; -- cgit v1.2.3 From 21a5a97ba47842ef0c52d6c89e501dce27806550 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Don't disable tasks in scx_sub_enable_workfn() abort path scx_sub_enable_workfn()'s prep loop calls __scx_init_task(sch, p, false) without transitioning task state, then sets SCX_TASK_SUB_INIT. If prep fails partway, the abort path runs __scx_disable_and_exit_task(sch, p) on the marked tasks. Task state is still the parent's ENABLED, so that dispatches to the SCX_TASK_ENABLED arm and calls scx_disable_task(sch, p) - i.e. child->ops.disable() - for tasks on which child->ops.enable() never ran. A BPF sub-scheduler allocating per-task state in enable/freeing in disable would operate on uninitialized state. The dying-task branch in scx_disable_and_exit_task() has the same problem, and scx_enabling_sub_sched was cleared before the abort cleanup loop - a task exiting during cleanup tripped the WARN and skipped both ops.exit_task and the SCX_TASK_SUB_INIT clear, leaking per-task resources and leaving the task stuck. Introduce scx_sub_init_cancel_task() that calls ops.exit_task with cancelled=true - matching what the top-level init path does when init_task itself returns -errno. Use it in the abort loop and in the dying-task branch. scx_enabling_sub_sched now stays set until the abort loop finishes clearing SUB_INIT, so concurrent exits hitting the dying-task branch can still find @sch. That branch also clears SCX_TASK_SUB_INIT unconditionally when seen, leaving the task unmarked even if the WARN fires. Fixes: 337ec00b1d9c ("sched_ext: Implement cgroup sub-sched enabling and disabling") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f8500ce37b22..dd0539ab9ba8 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3633,6 +3633,22 @@ static void __scx_disable_and_exit_task(struct scx_sched *sch, SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args); } +/* + * Undo a completed __scx_init_task(sch, p, false) when scx_enable_task() never + * ran. The task state has not been transitioned, so this mirrors the + * SCX_TASK_INIT branch in __scx_disable_and_exit_task(). + */ +static void scx_sub_init_cancel_task(struct scx_sched *sch, struct task_struct *p) +{ + struct scx_exit_task_args args = { .cancelled = true }; + + lockdep_assert_held(&p->pi_lock); + lockdep_assert_rq_held(task_rq(p)); + + if (SCX_HAS_OP(sch, exit_task)) + SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args); +} + static void scx_disable_and_exit_task(struct scx_sched *sch, struct task_struct *p) { @@ -3641,11 +3657,12 @@ static void scx_disable_and_exit_task(struct scx_sched *sch, /* * If set, @p exited between __scx_init_task() and scx_enable_task() in * scx_sub_enable() and is initialized for both the associated sched and - * its parent. Disable and exit for the child too. + * its parent. Exit for the child too - scx_enable_task() never ran for + * it, so undo only init_task. */ - if ((p->scx.flags & SCX_TASK_SUB_INIT) && - !WARN_ON_ONCE(!scx_enabling_sub_sched)) { - __scx_disable_and_exit_task(scx_enabling_sub_sched, p); + if (p->scx.flags & SCX_TASK_SUB_INIT) { + if (!WARN_ON_ONCE(!scx_enabling_sub_sched)) + scx_sub_init_cancel_task(scx_enabling_sub_sched, p); p->scx.flags &= ~SCX_TASK_SUB_INIT; } @@ -7124,16 +7141,23 @@ out_unlock: abort: put_task_struct(p); scx_task_iter_stop(&sti); - scx_enabling_sub_sched = NULL; + /* + * Undo __scx_init_task() for tasks we marked. scx_enable_task() never + * ran for @sch on them, so calling scx_disable_task() here would invoke + * ops.disable() without a matching ops.enable(). scx_enabling_sub_sched + * must stay set until SUB_INIT is cleared from every marked task - + * scx_disable_and_exit_task() reads it when a task exits concurrently. + */ scx_task_iter_start(&sti, sch->cgrp); while ((p = scx_task_iter_next_locked(&sti))) { if (p->scx.flags & SCX_TASK_SUB_INIT) { - __scx_disable_and_exit_task(sch, p); + scx_sub_init_cancel_task(sch, p); p->scx.flags &= ~SCX_TASK_SUB_INIT; } } scx_task_iter_stop(&sti); + scx_enabling_sub_sched = NULL; err_unlock_and_disable: /* we'll soon enter disable path, keep bypass on */ scx_cgroup_unlock(); -- cgit v1.2.3 From 80afd4c84bc8f5e80145ce35279f5ce53f6043db Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Read scx_root under scx_cgroup_ops_rwsem in cgroup setters scx_group_set_{weight,idle,bandwidth}() cache scx_root before acquiring scx_cgroup_ops_rwsem, so the pointer can be stale by the time the op runs. If the loaded scheduler is disabled and freed (via RCU work) and another is enabled between the naked load and the rwsem acquire, the reader sees scx_cgroup_enabled=true (the new scheduler's) but dereferences the freed one - UAF on SCX_HAS_OP(sch, ...) / SCX_CALL_OP(sch, ...). scx_cgroup_enabled is toggled only under scx_cgroup_ops_rwsem write (scx_cgroup_{init,exit}), so reading scx_root inside the rwsem read section correlates @sch with the enabled snapshot. Fixes: a5bd6ba30b33 ("sched_ext: Use cgroup_lock/unlock() to synchronize against cgroup operations") Cc: stable@vger.kernel.org # v6.18+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index dd0539ab9ba8..f6d22636a4de 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4343,9 +4343,10 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) void scx_group_set_weight(struct task_group *tg, unsigned long weight) { - struct scx_sched *sch = scx_root; + struct scx_sched *sch; percpu_down_read(&scx_cgroup_ops_rwsem); + sch = scx_root; if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && tg->scx.weight != weight) @@ -4358,9 +4359,10 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) void scx_group_set_idle(struct task_group *tg, bool idle) { - struct scx_sched *sch = scx_root; + struct scx_sched *sch; percpu_down_read(&scx_cgroup_ops_rwsem); + sch = scx_root; if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_idle)) SCX_CALL_OP(sch, cgroup_set_idle, NULL, tg_cgrp(tg), idle); @@ -4374,9 +4376,10 @@ void scx_group_set_idle(struct task_group *tg, bool idle) void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) { - struct scx_sched *sch = scx_root; + struct scx_sched *sch; percpu_down_read(&scx_cgroup_ops_rwsem); + sch = scx_root; if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) && (tg->scx.bw_period_us != period_us || -- cgit v1.2.3 From cc2a387d330d1fc51a9b7f211a7e5d39c9f0ab94 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Resolve caller's scheduler in scx_bpf_destroy_dsq() / scx_bpf_dsq_nr_queued() scx_bpf_create_dsq() resolves the calling scheduler via scx_prog_sched(aux) and inserts the new DSQ into that scheduler's dsq_hash. Its inverse scx_bpf_destroy_dsq() and the query helper scx_bpf_dsq_nr_queued() were hard-coded to rcu_dereference(scx_root), so a sub-scheduler could only destroy or query DSQs in the root scheduler's hash - never its own. If the root had a DSQ with the same id, the sub-sched silently destroyed it and the root aborted on the next dispatch ("invalid DSQ ID 0x0.."). Take a const struct bpf_prog_aux *aux via KF_IMPLICIT_ARGS and resolve the scheduler with scx_prog_sched(aux), matching scx_bpf_create_dsq(). Fixes: ebeca1f930ea ("sched_ext: Introduce cgroup sub-sched support") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f6d22636a4de..cc5df32db8ff 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -8722,11 +8722,12 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags, const struct bpf_prog_aux /** * scx_bpf_dsq_nr_queued - Return the number of queued tasks * @dsq_id: id of the DSQ + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Return the number of tasks in the DSQ matching @dsq_id. If not found, * -%ENOENT is returned. */ -__bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) +__bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id, const struct bpf_prog_aux *aux) { struct scx_sched *sch; struct scx_dispatch_q *dsq; @@ -8734,7 +8735,7 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) preempt_disable(); - sch = rcu_dereference_sched(scx_root); + sch = scx_prog_sched(aux); if (unlikely(!sch)) { ret = -ENODEV; goto out; @@ -8766,21 +8767,21 @@ out: /** * scx_bpf_destroy_dsq - Destroy a custom DSQ * @dsq_id: DSQ to destroy + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs * * Destroy the custom DSQ identified by @dsq_id. Only DSQs created with * scx_bpf_create_dsq() can be destroyed. The caller must ensure that the DSQ is * empty and no further tasks are dispatched to it. Ignored if called on a DSQ * which doesn't exist. Can be called from any online scx_ops operations. */ -__bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id) +__bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id, const struct bpf_prog_aux *aux) { struct scx_sched *sch; - rcu_read_lock(); - sch = rcu_dereference(scx_root); + guard(rcu)(); + sch = scx_prog_sched(aux); if (sch) destroy_dsq(sch, dsq_id); - rcu_read_unlock(); } /** @@ -9534,8 +9535,8 @@ BTF_KFUNCS_START(scx_kfunc_ids_any) BTF_ID_FLAGS(func, scx_bpf_task_set_slice, KF_IMPLICIT_ARGS | KF_RCU); BTF_ID_FLAGS(func, scx_bpf_task_set_dsq_vtime, KF_IMPLICIT_ARGS | KF_RCU); BTF_ID_FLAGS(func, scx_bpf_kick_cpu, KF_IMPLICIT_ARGS) -BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) -BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) +BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued, KF_IMPLICIT_ARGS) +BTF_ID_FLAGS(func, scx_bpf_destroy_dsq, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_dsq_peek, KF_IMPLICIT_ARGS | KF_RCU_PROTECTED | KF_RET_NULL) BTF_ID_FLAGS(func, scx_bpf_dsq_reenq, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_reenqueue_local___v2, KF_IMPLICIT_ARGS) -- cgit v1.2.3 From 2f2ea77092660b53bfcbc4acc590b57ce9ab5dce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:35 -1000 Subject: sched_ext: Use dsq->first_task instead of list_empty() in dispatch_enqueue() FIFO-tail dispatch_enqueue()'s FIFO-tail path used list_empty(&dsq->list) to decide whether to set dsq->first_task on enqueue. dsq->list can contain parked BPF iterator cursors (SCX_DSQ_LNODE_ITER_CURSOR), so list_empty() is not a reliable "no real task" check. If the last real task is unlinked while a cursor is parked, first_task becomes NULL; the next FIFO-tail enqueue then sees list_empty() == false and skips the first_task update, leaving scx_bpf_dsq_peek() returning NULL for a non-empty DSQ. Test dsq->first_task directly, which already tracks only real tasks and is maintained under dsq->lock. Fixes: 44f5c8ec5b9a ("sched_ext: Add lockless peek operation for DSQs") Cc: stable@vger.kernel.org # v6.19+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi Cc: Ryan Newton --- kernel/sched/ext.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index cc5df32db8ff..8a2a90659c65 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1495,11 +1495,13 @@ static void dispatch_enqueue(struct scx_sched *sch, struct rq *rq, if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN)) rcu_assign_pointer(dsq->first_task, p); } else { - bool was_empty; - - was_empty = list_empty(&dsq->list); + /* + * dsq->list can contain parked BPF iterator cursors, so + * list_empty() here isn't a reliable proxy for "no real + * task in the DSQ". Test dsq->first_task directly. + */ list_add_tail(&p->scx.dsq_list.node, &dsq->list); - if (was_empty && !(dsq->id & SCX_DSQ_FLAG_BUILTIN)) + if (!dsq->first_task && !(dsq->id & SCX_DSQ_FLAG_BUILTIN)) rcu_assign_pointer(dsq->first_task, p); } } -- cgit v1.2.3 From 7fb39e4eb4c3db52e4707a6a1cd45362f7e803f5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Save and restore scx_locked_rq across SCX_CALL_OP SCX_CALL_OP{,_RET}() unconditionally clears scx_locked_rq_state to NULL on exit. Correct at the top level, but ops can recurse via scx_bpf_sub_dispatch(): a parent's ops.dispatch calls the helper, which invokes the child's ops.dispatch under another SCX_CALL_OP. When the inner call returns, the NULL clobbers the outer's state. The parent's BPF then calls kfuncs like scx_bpf_cpuperf_set() which read scx_locked_rq()==NULL and re-acquire the already-held rq. Snapshot scx_locked_rq_state on entry and restore on exit. Rename the rq parameter to locked_rq across all SCX_CALL_OP* macros so the snapshot local can be typed as 'struct rq *' without colliding with the parameter token in the expansion. SCX_CALL_OP_TASK{,_RET}() and SCX_CALL_OP_2TASKS_RET() funnel through the two base macros and inherit the fix. Fixes: 4f8b122848db ("sched_ext: Add basic building blocks for nested sub-scheduler dispatching") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8a2a90659c65..26968d0a6752 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -470,24 +470,35 @@ static inline void update_locked_rq(struct rq *rq) __this_cpu_write(scx_locked_rq_state, rq); } -#define SCX_CALL_OP(sch, op, rq, args...) \ +/* + * SCX ops can recurse via scx_bpf_sub_dispatch() - the inner call must not + * clobber the outer's scx_locked_rq_state. Save it on entry, restore on exit. + */ +#define SCX_CALL_OP(sch, op, locked_rq, args...) \ do { \ - if (rq) \ - update_locked_rq(rq); \ + struct rq *__prev_locked_rq; \ + \ + if (locked_rq) { \ + __prev_locked_rq = scx_locked_rq(); \ + update_locked_rq(locked_rq); \ + } \ (sch)->ops.op(args); \ - if (rq) \ - update_locked_rq(NULL); \ + if (locked_rq) \ + update_locked_rq(__prev_locked_rq); \ } while (0) -#define SCX_CALL_OP_RET(sch, op, rq, args...) \ +#define SCX_CALL_OP_RET(sch, op, locked_rq, args...) \ ({ \ + struct rq *__prev_locked_rq; \ __typeof__((sch)->ops.op(args)) __ret; \ \ - if (rq) \ - update_locked_rq(rq); \ + if (locked_rq) { \ + __prev_locked_rq = scx_locked_rq(); \ + update_locked_rq(locked_rq); \ + } \ __ret = (sch)->ops.op(args); \ - if (rq) \ - update_locked_rq(NULL); \ + if (locked_rq) \ + update_locked_rq(__prev_locked_rq); \ __ret; \ }) @@ -499,39 +510,39 @@ do { \ * those subject tasks. * * Every SCX_CALL_OP_TASK*() call site invokes its op with @p's rq lock held - - * either via the @rq argument here, or (for ops.select_cpu()) via @p's pi_lock - * held by try_to_wake_up() with rq tracking via scx_rq.in_select_cpu. So if - * kf_tasks[] is set, @p's scheduler-protected fields are stable. + * either via the @locked_rq argument here, or (for ops.select_cpu()) via @p's + * pi_lock held by try_to_wake_up() with rq tracking via scx_rq.in_select_cpu. + * So if kf_tasks[] is set, @p's scheduler-protected fields are stable. * * kf_tasks[] can not stack, so task-based SCX ops must not nest. The * WARN_ON_ONCE() in each macro catches a re-entry of any of the three variants * while a previous one is still in progress. */ -#define SCX_CALL_OP_TASK(sch, op, rq, task, args...) \ +#define SCX_CALL_OP_TASK(sch, op, locked_rq, task, args...) \ do { \ WARN_ON_ONCE(current->scx.kf_tasks[0]); \ current->scx.kf_tasks[0] = task; \ - SCX_CALL_OP((sch), op, rq, task, ##args); \ + SCX_CALL_OP((sch), op, locked_rq, task, ##args); \ current->scx.kf_tasks[0] = NULL; \ } while (0) -#define SCX_CALL_OP_TASK_RET(sch, op, rq, task, args...) \ +#define SCX_CALL_OP_TASK_RET(sch, op, locked_rq, task, args...) \ ({ \ __typeof__((sch)->ops.op(task, ##args)) __ret; \ WARN_ON_ONCE(current->scx.kf_tasks[0]); \ current->scx.kf_tasks[0] = task; \ - __ret = SCX_CALL_OP_RET((sch), op, rq, task, ##args); \ + __ret = SCX_CALL_OP_RET((sch), op, locked_rq, task, ##args); \ current->scx.kf_tasks[0] = NULL; \ __ret; \ }) -#define SCX_CALL_OP_2TASKS_RET(sch, op, rq, task0, task1, args...) \ +#define SCX_CALL_OP_2TASKS_RET(sch, op, locked_rq, task0, task1, args...) \ ({ \ __typeof__((sch)->ops.op(task0, task1, ##args)) __ret; \ WARN_ON_ONCE(current->scx.kf_tasks[0]); \ current->scx.kf_tasks[0] = task0; \ current->scx.kf_tasks[1] = task1; \ - __ret = SCX_CALL_OP_RET((sch), op, rq, task0, task1, ##args); \ + __ret = SCX_CALL_OP_RET((sch), op, locked_rq, task0, task1, ##args); \ current->scx.kf_tasks[0] = NULL; \ current->scx.kf_tasks[1] = NULL; \ __ret; \ -- cgit v1.2.3 From 207d76a372fb1bb324eadc8cb5bcaa0a8da7cefd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Pass held rq to SCX_CALL_OP() for dump_cpu/dump_task scx_dump_state() walks CPUs with rq_lock_irqsave() held and invokes ops.dump_cpu / ops.dump_task with NULL locked_rq, leaving scx_locked_rq_state NULL. If the BPF callback calls a kfunc that re-acquires rq based on scx_locked_rq() - e.g. scx_bpf_cpuperf_set(cpu) - it re-acquires the already-held rq. Pass the held rq to SCX_CALL_OP(). Thread it into scx_dump_task() too. The pre-loop ops.dump call runs before rq_lock_irqsave() so keeps rq=NULL. Fixes: 07814a9439a3 ("sched_ext: Print debug dump after an error exit") Cc: stable@vger.kernel.org # v6.12+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 26968d0a6752..73d629559d6d 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6117,9 +6117,8 @@ static void ops_dump_exit(void) scx_dump_data.cpu = -1; } -static void scx_dump_task(struct scx_sched *sch, - struct seq_buf *s, struct scx_dump_ctx *dctx, - struct task_struct *p, char marker) +static void scx_dump_task(struct scx_sched *sch, struct seq_buf *s, struct scx_dump_ctx *dctx, + struct rq *rq, struct task_struct *p, char marker) { static unsigned long bt[SCX_EXIT_BT_LEN]; struct scx_sched *task_sch = scx_task_sched(p); @@ -6160,7 +6159,7 @@ static void scx_dump_task(struct scx_sched *sch, if (SCX_HAS_OP(sch, dump_task)) { ops_dump_init(s, " "); - SCX_CALL_OP(sch, dump_task, NULL, dctx, p); + SCX_CALL_OP(sch, dump_task, rq, dctx, p); ops_dump_exit(); } @@ -6284,8 +6283,7 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei, used = seq_buf_used(&ns); if (SCX_HAS_OP(sch, dump_cpu)) { ops_dump_init(&ns, " "); - SCX_CALL_OP(sch, dump_cpu, NULL, - &dctx, cpu, idle); + SCX_CALL_OP(sch, dump_cpu, rq, &dctx, cpu, idle); ops_dump_exit(); } @@ -6308,11 +6306,11 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei, if (rq->curr->sched_class == &ext_sched_class && (dump_all_tasks || scx_task_on_sched(sch, rq->curr))) - scx_dump_task(sch, &s, &dctx, rq->curr, '*'); + scx_dump_task(sch, &s, &dctx, rq, rq->curr, '*'); list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) if (dump_all_tasks || scx_task_on_sched(sch, p)) - scx_dump_task(sch, &s, &dctx, p, ' '); + scx_dump_task(sch, &s, &dctx, rq, p, ' '); next: rq_unlock_irqrestore(rq, &rf); } -- cgit v1.2.3 From 4155fb489fa175ec74eedde7d02219cf2fe74303 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Pass held rq to SCX_CALL_OP() for core_sched_before scx_prio_less() runs from core-sched's pick_next_task() path with rq locked but invokes ops.core_sched_before() with NULL locked_rq, leaving scx_locked_rq_state NULL. If the BPF callback calls a kfunc that re-acquires rq based on scx_locked_rq() - e.g. scx_bpf_cpuperf_set(cpu) - it re-acquires the already-held rq. Pass task_rq(a). Fixes: 7b0888b7cc19 ("sched_ext: Implement core-sched support") Cc: stable@vger.kernel.org # v6.12+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 73d629559d6d..ba977154273c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3198,7 +3198,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, if (sch_a == sch_b && SCX_HAS_OP(sch_a, core_sched_before) && !scx_bypassing(sch_a, task_cpu(a))) return SCX_CALL_OP_2TASKS_RET(sch_a, core_sched_before, - NULL, + task_rq(a), (struct task_struct *)a, (struct task_struct *)b); else -- cgit v1.2.3 From d292aa00de1aea72961f94c0db43f6b5c72684c9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Make bypass LB cpumasks per-scheduler scx_bypass_lb_{donee,resched}_cpumask were file-scope statics shared by all scheduler instances. With CONFIG_EXT_SUB_SCHED, multiple sched instances each arm their own bypass_lb_timer; concurrent bypass_lb_node() calls RMW the global cpumasks with no lock, corrupting donee/resched decisions. Move the cpumasks into struct scx_sched, allocate them alongside the timer in scx_alloc_and_add_sched(), free them in scx_sched_free_rcu_work(). Fixes: 95d1df610cdc ("sched_ext: Implement load balancer for bypass mode") Cc: stable@vger.kernel.org # v6.19+ Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 33 +++++++++++++++++++-------------- kernel/sched/ext_internal.h | 2 ++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ba977154273c..e07f8c46e399 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -53,8 +53,6 @@ DEFINE_STATIC_KEY_FALSE(__scx_enabled); DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem); static atomic_t scx_enable_state_var = ATOMIC_INIT(SCX_DISABLED); static DEFINE_RAW_SPINLOCK(scx_bypass_lock); -static cpumask_var_t scx_bypass_lb_donee_cpumask; -static cpumask_var_t scx_bypass_lb_resched_cpumask; static bool scx_init_task_enabled; static bool scx_switching_all; DEFINE_STATIC_KEY_FALSE(__scx_switched_all); @@ -4747,6 +4745,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work) irq_work_sync(&sch->disable_irq_work); kthread_destroy_worker(sch->helper); timer_shutdown_sync(&sch->bypass_lb_timer); + free_cpumask_var(sch->bypass_lb_donee_cpumask); + free_cpumask_var(sch->bypass_lb_resched_cpumask); #ifdef CONFIG_EXT_SUB_SCHED kfree(sch->cgrp_path); @@ -5123,8 +5123,8 @@ resume: static void bypass_lb_node(struct scx_sched *sch, int node) { const struct cpumask *node_mask = cpumask_of_node(node); - struct cpumask *donee_mask = scx_bypass_lb_donee_cpumask; - struct cpumask *resched_mask = scx_bypass_lb_resched_cpumask; + struct cpumask *donee_mask = sch->bypass_lb_donee_cpumask; + struct cpumask *resched_mask = sch->bypass_lb_resched_cpumask; u32 nr_tasks = 0, nr_cpus = 0, nr_balanced = 0; u32 nr_target, nr_donor_target; u32 before_min = U32_MAX, before_max = 0; @@ -6520,6 +6520,15 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, init_irq_work(&sch->disable_irq_work, scx_disable_irq_workfn); kthread_init_work(&sch->disable_work, scx_disable_workfn); timer_setup(&sch->bypass_lb_timer, scx_bypass_lb_timerfn, 0); + + if (!alloc_cpumask_var(&sch->bypass_lb_donee_cpumask, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_stop_helper; + } + if (!alloc_cpumask_var(&sch->bypass_lb_resched_cpumask, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_free_lb_cpumask; + } sch->ops = *ops; rcu_assign_pointer(ops->priv, sch); @@ -6529,14 +6538,14 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, char *buf = kzalloc(PATH_MAX, GFP_KERNEL); if (!buf) { ret = -ENOMEM; - goto err_stop_helper; + goto err_free_lb_resched; } cgroup_path(cgrp, buf, PATH_MAX); sch->cgrp_path = kstrdup(buf, GFP_KERNEL); kfree(buf); if (!sch->cgrp_path) { ret = -ENOMEM; - goto err_stop_helper; + goto err_free_lb_resched; } sch->cgrp = cgrp; @@ -6571,10 +6580,12 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, #endif /* CONFIG_EXT_SUB_SCHED */ return sch; -#ifdef CONFIG_EXT_SUB_SCHED +err_free_lb_resched: + free_cpumask_var(sch->bypass_lb_resched_cpumask); +err_free_lb_cpumask: + free_cpumask_var(sch->bypass_lb_donee_cpumask); err_stop_helper: kthread_destroy_worker(sch->helper); -#endif err_free_pcpu: for_each_possible_cpu(cpu) { if (cpu == bypass_fail_cpu) @@ -9761,12 +9772,6 @@ static int __init scx_init(void) return ret; } - if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL) || - !alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) { - pr_err("sched_ext: Failed to allocate cpumasks\n"); - return -ENOMEM; - } - return 0; } __initcall(scx_init); diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h index 62ce4eaf6a3f..a075732d4430 100644 --- a/kernel/sched/ext_internal.h +++ b/kernel/sched/ext_internal.h @@ -1075,6 +1075,8 @@ struct scx_sched { struct irq_work disable_irq_work; struct kthread_work disable_work; struct timer_list bypass_lb_timer; + cpumask_var_t bypass_lb_donee_cpumask; + cpumask_var_t bypass_lb_resched_cpumask; struct rcu_work rcu_work; /* all ancestors including self */ -- cgit v1.2.3 From c0e8ddc76d54402171787414b1b8eb387812f1f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Align cgroup #ifdef guards with SUB_SCHED vs GROUP_SCHED Two EXT_GROUP_SCHED/SUB_SCHED guards are misclassified: - scx_root_enable_workfn()'s cgroup_get(cgrp) and the err_put_cgrp unwind in scx_alloc_and_add_sched() are under `#if GROUP || SUB`, but the matching cgroup_put() in scx_sched_free_rcu_work() is inside `#ifdef SUB` only (via sch->cgrp, stored only under SUB). GROUP-only would leak a reference on every root-sched enable. - sch_cgroup() / set_cgroup_sched() live under `#if GROUP || SUB` but touch SUB-only fields (sch->cgrp, cgroup->scx_sched). GROUP-only wouldn't compile. GROUP needs CGROUP_SCHED; SUB needs only CGROUPS. CGROUPS=y/CGROUP_SCHED=n gives the reachable GROUP=n, SUB=y combination; GROUP=y, SUB=n isn't reachable today (SUB is def_bool y under CGROUPS). Neither miscategorization triggers a real bug in any reachable config, but keep the guards honest: - Narrow cgroup_get and err_put_cgrp to `#ifdef SUB` (matches the free-side put). - Move sch_cgroup() and set_cgroup_sched() to a separate `#ifdef SUB` block with no-op stubs for the !SUB case; keep root_cgroup() and scx_cgroup_{ lock,unlock}() under `#if GROUP || SUB` since those only need cgroup core. Fixes: ebeca1f930ea ("sched_ext: Introduce cgroup sub-sched support") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index e07f8c46e399..e2898d60315b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4413,21 +4413,6 @@ static struct cgroup *root_cgroup(void) return &cgrp_dfl_root.cgrp; } -static struct cgroup *sch_cgroup(struct scx_sched *sch) -{ - return sch->cgrp; -} - -/* for each descendant of @cgrp including self, set ->scx_sched to @sch */ -static void set_cgroup_sched(struct cgroup *cgrp, struct scx_sched *sch) -{ - struct cgroup *pos; - struct cgroup_subsys_state *css; - - cgroup_for_each_live_descendant_pre(pos, css, cgrp) - rcu_assign_pointer(pos->scx_sched, sch); -} - static void scx_cgroup_lock(void) { #ifdef CONFIG_EXT_GROUP_SCHED @@ -4445,12 +4430,30 @@ static void scx_cgroup_unlock(void) } #else /* CONFIG_EXT_GROUP_SCHED || CONFIG_EXT_SUB_SCHED */ static struct cgroup *root_cgroup(void) { return NULL; } -static struct cgroup *sch_cgroup(struct scx_sched *sch) { return NULL; } -static void set_cgroup_sched(struct cgroup *cgrp, struct scx_sched *sch) {} static void scx_cgroup_lock(void) {} static void scx_cgroup_unlock(void) {} #endif /* CONFIG_EXT_GROUP_SCHED || CONFIG_EXT_SUB_SCHED */ +#ifdef CONFIG_EXT_SUB_SCHED +static struct cgroup *sch_cgroup(struct scx_sched *sch) +{ + return sch->cgrp; +} + +/* for each descendant of @cgrp including self, set ->scx_sched to @sch */ +static void set_cgroup_sched(struct cgroup *cgrp, struct scx_sched *sch) +{ + struct cgroup *pos; + struct cgroup_subsys_state *css; + + cgroup_for_each_live_descendant_pre(pos, css, cgrp) + rcu_assign_pointer(pos->scx_sched, sch); +} +#else /* CONFIG_EXT_SUB_SCHED */ +static struct cgroup *sch_cgroup(struct scx_sched *sch) { return NULL; } +static void set_cgroup_sched(struct cgroup *cgrp, struct scx_sched *sch) {} +#endif /* CONFIG_EXT_SUB_SCHED */ + /* * Omitted operations: * @@ -6604,7 +6607,7 @@ err_free_ei: err_free_sch: kfree(sch); err_put_cgrp: -#if defined(CONFIG_EXT_GROUP_SCHED) || defined(CONFIG_EXT_SUB_SCHED) +#ifdef CONFIG_EXT_SUB_SCHED cgroup_put(cgrp); #endif return ERR_PTR(ret); @@ -6695,7 +6698,7 @@ static void scx_root_enable_workfn(struct kthread_work *work) if (ret) goto err_unlock; -#if defined(CONFIG_EXT_GROUP_SCHED) || defined(CONFIG_EXT_SUB_SCHED) +#ifdef CONFIG_EXT_SUB_SCHED cgroup_get(cgrp); #endif sch = scx_alloc_and_add_sched(ops, cgrp, NULL); -- cgit v1.2.3 From ea7c716a24aebe887e0990649ab697bd698cc325 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Refuse cross-task select_cpu_from_kfunc calls select_cpu_from_kfunc() skipped pi_lock for @p when called from ops.select_cpu() or another rq-locked SCX op, assuming the held lock protects @p. scx_bpf_select_cpu_dfl() / __scx_bpf_select_cpu_and() accept an arbitrary KF_RCU task_struct, so a caller in e.g. ops.select_cpu(p1) or ops.enqueue(p1) can pass some other p2 - the held pi_lock / rq lock is p1's, not p2's - and reading p2->cpus_ptr / nr_cpus_allowed races with set_cpus_allowed_ptr() and migrate_disable_switch() on another CPU. Abort the scheduler on cross-task calls in both branches: for ops.select_cpu() use scx_kf_arg_task_ok() to verify @p is the wake-up task recorded in current->scx.kf_tasks[] by SCX_CALL_OP_TASK_RET(); for other rq-locked SCX ops compare task_rq(p) against scx_locked_rq(). v2: Switch the in_select_cpu cross-task check from direct_dispatch_task comparison to scx_kf_arg_task_ok(). The former spuriously rejects when ops.select_cpu() calls scx_bpf_dsq_insert() first, then calls scx_bpf_select_cpu_*() on the same task. (Andrea Righi) Fixes: 0022b328504d ("sched_ext: Decouple kfunc unlocked-context check from kf_mask") Reported-by: Chris Mason Signed-off-by: Tejun Heo Cc: Andrea Righi --- kernel/sched/ext_idle.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index c43d62d90e40..7468560a6d80 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -927,14 +927,24 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, * Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq * lock or @p's pi_lock. Three cases: * - * - inside ops.select_cpu(): try_to_wake_up() holds @p's pi_lock. + * - inside ops.select_cpu(): try_to_wake_up() holds the wake-up + * task's pi_lock; the wake-up task is recorded in kf_tasks[0] + * by SCX_CALL_OP_TASK_RET(). * - other rq-locked SCX op: scx_locked_rq() points at the held rq. * - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops): * nothing held, take pi_lock ourselves. + * + * In the first two cases, BPF schedulers may pass an arbitrary task + * that the held lock doesn't cover. Refuse those. */ if (this_rq()->scx.in_select_cpu) { + if (!scx_kf_arg_task_ok(sch, p)) + return -EINVAL; lockdep_assert_held(&p->pi_lock); - } else if (!scx_locked_rq()) { + } else if (scx_locked_rq()) { + if (task_rq(p) != scx_locked_rq()) + goto cross_task; + } else { raw_spin_lock_irqsave(&p->pi_lock, irq_flags); we_locked = true; } @@ -960,6 +970,11 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p, raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags); return cpu; + +cross_task: + scx_error(sch, "select_cpu kfunc called cross-task on %s[%d]", + p->comm, p->pid); + return -EINVAL; } /** -- cgit v1.2.3 From 05b4a9a9bc37f1fa289a8f07b4fbfc3ae681b650 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Reject NULL-sch callers in scx_bpf_task_set_slice/dsq_vtime scx_prog_sched(aux) returns NULL for TRACING / SYSCALL BPF progs that have no struct_ops association when the root scheduler has sub_attach set. scx_bpf_task_set_slice() and scx_bpf_task_set_dsq_vtime() pass that NULL into scx_task_on_sched(sch, p), which under CONFIG_EXT_SUB_SCHED is rcu_access_pointer(p->scx.sched) == sch. For any non-scx task p->scx.sched is NULL, so NULL == NULL returns true and the authority gate is bypassed - a privileged but non-struct_ops-associated prog can poke p->scx.slice / p->scx.dsq_vtime on arbitrary tasks. Reject !sch up front so the gate only admits callers with a resolved scheduler. Fixes: 245d09c594ea ("sched_ext: Enforce scheduler ownership when updating slice and dsq_vtime") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index e2898d60315b..f333fd0cb83f 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -8640,7 +8640,7 @@ __bpf_kfunc bool scx_bpf_task_set_slice(struct task_struct *p, u64 slice, guard(rcu)(); sch = scx_prog_sched(aux); - if (unlikely(!scx_task_on_sched(sch, p))) + if (unlikely(!sch || !scx_task_on_sched(sch, p))) return false; p->scx.slice = slice; @@ -8663,7 +8663,7 @@ __bpf_kfunc bool scx_bpf_task_set_dsq_vtime(struct task_struct *p, u64 vtime, guard(rcu)(); sch = scx_prog_sched(aux); - if (unlikely(!scx_task_on_sched(sch, p))) + if (unlikely(!sch || !scx_task_on_sched(sch, p))) return false; p->scx.dsq_vtime = vtime; -- cgit v1.2.3 From deb7b2f93d0129b79425f830a1e5e7e1bb2c4973 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Apr 2026 14:31:36 -1000 Subject: sched_ext: Release cpus_read_lock on scx_link_sched() failure in root enable scx_root_enable_workfn() takes cpus_read_lock() before scx_link_sched(sch), but the `if (ret) goto err_disable` on failure skips the matching cpus_read_unlock() - all other err_disable gotos along this path drop the lock first. scx_link_sched() only returns non-zero on the sub-sched path (parent != NULL), so the leak path is unreachable via the root caller today. Still, the unwind is out of line with the surrounding paths. Drop cpus_read_lock() before goto err_disable. v2: Correct Fixes: tag (Andrea Righi). Fixes: 25037af712eb ("sched_ext: Add rhashtable lookup for sub-schedulers") Reported-by: Chris Mason Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f333fd0cb83f..9eda20e5fdb8 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6736,8 +6736,10 @@ static void scx_root_enable_workfn(struct kthread_work *work) rcu_assign_pointer(scx_root, sch); ret = scx_link_sched(sch); - if (ret) + if (ret) { + cpus_read_unlock(); goto err_disable; + } scx_idle_enable(ops); -- cgit v1.2.3 From 54900126ae0a2671f8790a7f95706b9ea95fac4e Mon Sep 17 00:00:00 2001 From: John Madieu Date: Sat, 25 Apr 2026 02:47:25 +0000 Subject: spi: rzv2h-rspi: Fix silent failure in clock setup error path rzv2h_rspi_setup_clock() is declared to return u32 but returns -EINVAL when no valid clock parameters are found. Cast to u32, -EINVAL becomes 0xffffffea, which is a non-zero value. The caller in rzv2h_rspi_prepare_message() guards against failure with: rspi->freq = rzv2h_rspi_setup_clock(rspi, speed_hz); if (!rspi->freq) return -EINVAL; Because 0xffffffea is non-zero, the check is bypassed and the controller proceeds to program SPBR/SPCMD with stale values, leading to an unknown bit rate. Return 0 on the failed-search path, consistent with the existing clk_set_rate() failure path which already returns 0. Fixes: 77d931584dd3 ("spi: rzv2h-rspi: make transfer clock rate finding chip-specific") Signed-off-by: John Madieu Reviewed-by: Biju Das Reviewed-by: Cosmin Tanislav Link: https://patch.msgid.link/20260425024725.2393632-1-john.madieu.xa@bp.renesas.com Signed-off-by: Mark Brown --- drivers/spi/spi-rzv2h-rspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-rzv2h-rspi.c b/drivers/spi/spi-rzv2h-rspi.c index f45af5884638..1655efda7d20 100644 --- a/drivers/spi/spi-rzv2h-rspi.c +++ b/drivers/spi/spi-rzv2h-rspi.c @@ -579,7 +579,7 @@ static u32 rzv2h_rspi_setup_clock(struct rzv2h_rspi_priv *rspi, u32 hz) rspi->info->find_pclk_rate(rspi->pclk, hz, &best_clock); if (!best_clock.clk_rate) - return -EINVAL; + return 0; ret = clk_set_rate(best_clock.clk, best_clock.clk_rate); if (ret) -- cgit v1.2.3 From 4b7774eeab8d66c22f02f5c120602df154a87e12 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Apr 2026 12:24:25 +0100 Subject: regmap: sdw-mbq: Fix spelling mistake "undeferable" -> "undeferrable" There is a spelling mistake in a dev_warn message. Fix it. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20260424112425.32129-1-colin.i.king@gmail.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-sdw-mbq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-sdw-mbq.c b/drivers/base/regmap/regmap-sdw-mbq.c index 4533fe793c5f..2585933d4946 100644 --- a/drivers/base/regmap/regmap-sdw-mbq.c +++ b/drivers/base/regmap/regmap-sdw-mbq.c @@ -172,7 +172,7 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va ret = regmap_sdw_mbq_read_impl(slave, reg, val, mbq_size); if (ret == -ENODATA) { if (!deferrable) - dev_warn(dev, "Defer on undeferable control: %x\n", reg); + dev_warn(dev, "Defer on undeferrable control: %x\n", reg); ret = regmap_sdw_mbq_poll_busy(slave, reg, ctx); if (ret) -- cgit v1.2.3 From b0f6f4ac7d5d04fe2adcdd63ed1cd1ad505b8958 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 23 Apr 2026 15:30:58 -0300 Subject: ASoC: amd: acp: Add DMI quirk for Valve Steam Deck OLED Commit 671dd2ffbd8b ("ASoC: amd: acp: Add new cpu dai and dailink creation for I2S BT instance") introduced a change that "broke" Steam Deck's audio probe, in the OLED model, as observed in the following dmesg snippet: [...] snd_sof_amd_vangogh 0000:04:00.5: Topology: ABI 3:26:0 Kernel ABI 3:23:1 sof_mach nau8821-max: ASoC: physical link acp-bt-codec (id 2) not exist sof_mach nau8821-max: ASoC: topology: could not load header: -22 snd_sof_amd_vangogh 0000:04:00.5: tplg amd/sof-tplg/sof-vangogh-nau8821-max.tplg component load failed -22 snd_sof_amd_vangogh 0000:04:00.5: error: failed to load DSP topology -22 snd_sof_amd_vangogh 0000:04:00.5: ASoC error (-22): at snd_soc_component_probe() on 0000:04:00.5 sof_mach nau8821-max: ASoC: failed to instantiate card -22 sof_mach nau8821-max: error -EINVAL: Failed to register card(sof-nau8821-max) sof_mach nau8821-max: probe with driver sof_mach failed with error -22 [...] Notice the quotes in "broke": it's not really a bug in such commit, but instead a problem with a topology file from Steam Deck OLED. This was discussed to great extent in [1], and Cristian proposed a pretty simple and functional change that resolved the issue for the Deck's issue. That change, though, would break other devices, so it wasn't accepted upstream. And the proper suggested solution (fix the topology) was never implemented, so Valve's kernel (and anyone that wants to boot the mainline on Steam Deck OLED) is carrying that fix downstream. So, we propose hereby a different approach: a DMI quirk, as many already present in the sound drivers, to address this issue solely on Steam Deck OLED, not breaking other devices and as a bonus, allowing simple patch up in case eventually the topology file gets fixed (we'd just need to check against any DMI info reflecting that or the topology/FW versions). The motivation of such upstream quirk is related to users that want to test latest kernel trees on their devices and get no only non-working sound device, but seems some games (like Ori and the Blind Forest) can't properly work without a proper functional audio device. Example of such report can be seen at [2]. Cc: Mark Brown Cc: Robert Beckett Cc: Umang Jain Fixes: 671dd2ffbd8b ("ASoC: amd: acp: Add new cpu dai and dailink creation for I2S BT instance") Link: https://lore.kernel.org/r/20231209205351.880797-11-cristian.ciocaltea@collabora.com/ [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=218677 [2] Reviewed-by: Cristian Ciocaltea Reviewed-by: Mario Limonciello Tested-by: Melissa Wen Signed-off-by: Guilherme G. Piccoli Link: https://patch.msgid.link/20260423183505.116445-1-gpiccoli@igalia.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-legacy-mach.c | 2 +- sound/soc/amd/acp/acp-mach-common.c | 22 +++++++++++++++++++--- sound/soc/amd/acp/acp-mach.h | 4 ++++ sound/soc/amd/acp/acp-sof-mach.c | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/sound/soc/amd/acp/acp-legacy-mach.c b/sound/soc/amd/acp/acp-legacy-mach.c index a7a551366a40..235d6cc83fa9 100644 --- a/sound/soc/amd/acp/acp-legacy-mach.c +++ b/sound/soc/amd/acp/acp-legacy-mach.c @@ -174,7 +174,7 @@ static int acp_asoc_probe(struct platform_device *pdev) acp_card_drvdata->acp_rev = mach->mach_params.subsystem_rev; dmi_id = dmi_first_match(acp_quirk_table); - if (dmi_id && dmi_id->driver_data) + if (dmi_id && dmi_id->driver_data == (void *)QUIRK_TDM_MODE_ENABLE) acp_card_drvdata->tdm_mode = dmi_id->driver_data; ret = acp_legacy_dai_links_create(card); diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index 09f6c9a2c041..ef784cca13f2 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "../../codecs/rt5682.h" #include "../../codecs/rt1019.h" @@ -37,15 +38,21 @@ #define NAU8821_FREQ_OUT 12288000 #define MAX98388_CODEC_DAI "max98388-aif1" -#define TDM_MODE_ENABLE 1 - const struct dmi_system_id acp_quirk_table[] = { { /* Google skyrim proto-0 */ .matches = { DMI_EXACT_MATCH(DMI_PRODUCT_FAMILY, "Google_Skyrim"), }, - .driver_data = (void *)TDM_MODE_ENABLE, + .driver_data = (void *)QUIRK_TDM_MODE_ENABLE, + }, + { + /* Valve Steam Deck OLED */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_MATCH(DMI_PRODUCT_NAME, "Galileo"), + }, + .driver_data = (void *)QUIRK_REMAP_DMIC_BT, }, {} }; @@ -1401,6 +1408,7 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) struct snd_soc_dai_link *links; struct device *dev = card->dev; struct acp_card_drvdata *drv_data = card->drvdata; + const struct dmi_system_id *dmi_id = dmi_first_match(acp_quirk_table); int i = 0, num_links = 0; if (drv_data->hs_cpu_id) @@ -1572,6 +1580,9 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) links[i].codecs = &snd_soc_dummy_dlc; links[i].num_codecs = 1; } + + if (dmi_id && dmi_id->driver_data == (void *)QUIRK_REMAP_DMIC_BT) + links[i].id = DMIC_BE_ID; i++; } @@ -1587,6 +1598,11 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) links[i].capture_only = 1; links[i].nonatomic = true; links[i].no_pcm = 1; + + if (dmi_id && dmi_id->driver_data == (void *)QUIRK_REMAP_DMIC_BT) { + links[i].id = BT_BE_ID; + dev_dbg(dev, "quirk REMAP_DMIC_BT enabled\n"); + } } card->dai_link = links; diff --git a/sound/soc/amd/acp/acp-mach.h b/sound/soc/amd/acp/acp-mach.h index f94c30c20f20..7177d3fd9619 100644 --- a/sound/soc/amd/acp/acp-mach.h +++ b/sound/soc/amd/acp/acp-mach.h @@ -26,6 +26,10 @@ #define acp_get_drvdata(card) ((struct acp_card_drvdata *)(card)->drvdata) +/* List of DMI quirks - check acp-mach-common.c for usage. */ +#define QUIRK_TDM_MODE_ENABLE 1 +#define QUIRK_REMAP_DMIC_BT 2 + enum be_id { HEADSET_BE_ID = 0, AMP_BE_ID, diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index 6215e31ecedd..36ecef7013b9 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -110,7 +110,7 @@ static int acp_sof_probe(struct platform_device *pdev) acp_card_drvdata = card->drvdata; dmi_id = dmi_first_match(acp_quirk_table); - if (dmi_id && dmi_id->driver_data) + if (dmi_id && dmi_id->driver_data == (void *)QUIRK_TDM_MODE_ENABLE) acp_card_drvdata->tdm_mode = dmi_id->driver_data; acp_card_drvdata->acp_rev = mach->mach_params.subsystem_rev; -- cgit v1.2.3 From 3c6f06a200796ae7b2b1065e8a6499b138e27a50 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 24 Apr 2026 18:50:31 +0800 Subject: ASoC: SOF: Intel: add an empty adr_link An empty adr_link is expected to terminate the for (adr_link = mach_params->links; adr_link->num_adr; adr_link++) loop. Allocate link_num + 1 links to add an empty adr_link. Fixes: 5226d19d4cae5 ("ASoC: SOF: Intel: use sof_sdw as default SDW machine driver") Signed-off-by: Bard Liao Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20260424105031.114053-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index b3d61d973ce4..8662b422eb80 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1412,7 +1412,8 @@ static struct snd_soc_acpi_mach *hda_sdw_machine_select(struct snd_sof_dev *sdev link_mask |= BIT(peripherals->array[i]->bus->link_id); link_num = hweight32(link_mask); - links = devm_kcalloc(sdev->dev, link_num, sizeof(*links), GFP_KERNEL); + /* An empty adr_link is needed to terminate the adr_link loop */ + links = devm_kcalloc(sdev->dev, link_num + 1, sizeof(*links), GFP_KERNEL); if (!links) return NULL; -- cgit v1.2.3 From b4683a239a409d65f88052f5630c748a8ba070cd Mon Sep 17 00:00:00 2001 From: John Madieu Date: Sat, 25 Apr 2026 09:29:34 +0000 Subject: spi: rockchip: Read ISR, not IMR, to detect cs-inactive IRQ rockchip_spi_isr() decides whether the current interrupt was the cs-inactive event by reading IMR: if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) ctlr->target_abort(ctlr); IMR is the interrupt mask register: it tells which sources are enabled, not which one fired. In the PIO path, rockchip_spi_prepare_irq() enables both INT_RF_FULL and INT_CS_INACTIVE in IMR when rs->cs_inactive is true: if (rs->cs_inactive) writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR); so the IMR check is always true once cs_inactive is enabled, and every PIO interrupt - including normal RF_FULL completions - is dispatched to ctlr->target_abort(), aborting the transfer. The bug is reachable on ROCKCHIP_SPI_VER2_TYPE2 in target mode with a DMA-capable controller when the transfer is short enough to fall back to PIO (rockchip_spi_can_dma() returns false below fifo_len). Read ISR (which is RISR masked by IMR) so the check actually reflects which interrupt fired, and parenthesise the expression for clarity while at it. Fixes: 869f2c94db92 ("spi: rockchip: Stop spi slave dma receiver when cs inactive") Signed-off-by: John Madieu Link: https://patch.msgid.link/20260425092936.2590132-2-john.madieu@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 14cd1b9d9793..de39f5da62cb 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -357,7 +357,8 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id) struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); /* When int_cs_inactive comes, spi target abort */ - if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) { + if (rs->cs_inactive && + (readl_relaxed(rs->regs + ROCKCHIP_SPI_ISR) & INT_CS_INACTIVE)) { ctlr->target_abort(ctlr); writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR); writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR); -- cgit v1.2.3 From 7643978722aac3a012783ce12dc534eba7c51698 Mon Sep 17 00:00:00 2001 From: John Madieu Date: Sat, 25 Apr 2026 09:29:35 +0000 Subject: spi: rockchip: Drop unused and broken CR0 macros Two CTRLR0 macros are defined but never referenced, and both are wrong: - CR0_XFM_MASK shifts by SPI_XFM_OFFSET, which does not exist anywhere in the tree. The intended symbol is CR0_XFM_OFFSET. - CR0_MTM_OFFSET is defined as 0x21, i.e. bit 33 of a 32-bit register. The value is meaningless and the macro is unused. Drop both. They can be re-introduced correctly when an actual user appears. Signed-off-by: John Madieu Link: https://patch.msgid.link/20260425092936.2590132-3-john.madieu@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index de39f5da62cb..231fbcf0e7aa 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -98,7 +98,6 @@ #define CR0_FRF_MICROWIRE 0x2 #define CR0_XFM_OFFSET 18 -#define CR0_XFM_MASK (0x03 << SPI_XFM_OFFSET) #define CR0_XFM_TR 0x0 #define CR0_XFM_TO 0x1 #define CR0_XFM_RO 0x2 @@ -109,8 +108,6 @@ #define CR0_SOI_OFFSET 23 -#define CR0_MTM_OFFSET 0x21 - /* Bit fields in SER, 2bit */ #define SER_MASK 0x3 -- cgit v1.2.3 From 4cfb5971c2fbfac061c23fb4224a3a008199de81 Mon Sep 17 00:00:00 2001 From: James Calligeros Date: Sat, 25 Apr 2026 10:44:03 +1000 Subject: ASoC: tas2764: Mark die temp register as volatile Reading the temperature register always returns the first value read from the chip due to regcache. Mark TAS2764_TEMP as volatile to prevent returning stale, cached values when reading the die temp. Fixes: 186dfc85f9a8 ("ASoC: tas2764: expose die temp to hwmon") Signed-off-by: James Calligeros Link: https://patch.msgid.link/20260425-tas27xx-hwmon-fixes-v1-1-83c13b8e8f54@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2764.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 423b7073b302..6aab6d2b7419 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -904,6 +904,7 @@ static bool tas2764_volatile_register(struct device *dev, unsigned int reg) { switch (reg) { case TAS2764_SW_RST: + case TAS2764_TEMP: case TAS2764_INT_LTCH0 ... TAS2764_INT_LTCH4: case TAS2764_INT_CLK_CFG: return true; -- cgit v1.2.3 From c7ecb6a61908c2604dda6e42da66724d256de7b9 Mon Sep 17 00:00:00 2001 From: James Calligeros Date: Sat, 25 Apr 2026 10:44:05 +1000 Subject: ASoC: tas2770: Fix order of operations for temperature calculation The order of operations to derive the temperature from the temp register values was wrong, since 1000 / 16 is not an integer. This resulted in the calculated temperature value deviating from the value represented by the registers slightly, which was most obvious when the registers were zeroed (-92.265 *C vs the expected -93.000 *C). Scale the reading before dividing the whole thing by 16 to correct this. Fixes: ff73e2780169 ("ASoC: tas2770: expose die temp to hwmon") Signed-off-by: James Calligeros Link: https://patch.msgid.link/20260425-tas27xx-hwmon-fixes-v1-3-83c13b8e8f54@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index d4d7d056141b..50501bcbe916 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -624,7 +624,7 @@ static int tas2770_read_die_temp(struct tas2770_priv *tas2770, long *result) /* * As per datasheet: divide register by 16 and subtract 93 to get * degrees Celsius. hwmon requires millidegrees. Let's avoid rounding - * errors by subtracting 93 * 16 then multiplying by 1000 / 16. + * errors by subtracting 93 * 16 and scaling before dividing. * * NOTE: The ADC registers are initialised to 0 on reset. This means * that the temperature will read -93 *C until the chip is brought out @@ -633,7 +633,7 @@ static int tas2770_read_die_temp(struct tas2770_priv *tas2770, long *result) * value read back from its registers will be the last value sampled * before entering software shutdown. */ - *result = (reading - (93 * 16)) * (1000 / 16); + *result = (reading - (93 * 16)) * 1000 / 16; return 0; } -- cgit v1.2.3 From dad701bdb74368e6da30177b1d9e5529e3381591 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 25 Apr 2026 20:02:49 -0400 Subject: ASoC: tegra: Remove stale snd-soc-tegra-utils composite module definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kconfiglint reports two warnings for sound/soc/tegra/Makefile: M002: composite module 'snd-soc-tegra-utils' defined but not in any obj-* M008: composite module 'snd-soc-tegra-utils': tegra_asoc_utils.o has no source file The composite module definition `snd-soc-tegra-utils-y += tegra_asoc_utils.o` references a source file that no longer exists and defines a module that is never included in any obj-* target. The tegra_asoc_utils module was originally introduced in commit a3cd50deef7b ("ASoC: Tegra: Move utilities to separate module") by Stephen Warren in 2011 to provide shared clock/rate utility functions for Tegra machine drivers. At that time, the Makefile had both the composite definition (`snd-soc-tegra-utils-objs`) and the build target (`obj-$(CONFIG_SND_TEGRA_SOC) += snd-soc-tegra-utils.o`). In 2021, commit 8c1b3b159300 ("ASoC: tegra: Squash utils into common machine driver") by Dmitry Osipenko merged tegra_asoc_utils.c into tegra_asoc_machine.c, deleting both the .c and .h files. That commit correctly removed the obj-* build target line but overlooked the composite module definition line (`snd-soc-tegra-utils-objs += tegra_asoc_utils.o`). The orphaned line persisted unnoticed and was even mechanically updated in 2024 by commit 51a50d6ad727 ("ASoC: tegra: Use *-y instead of *-objs in Makefile") by Takashi Iwai, which converted it from `-objs` to `-y` syntax as part of a treewide cleanup — inadvertently refreshing a stale definition. Remove the orphaned composite module definition since it serves no purpose: the source file was deleted, the obj-* target was already removed, and the functionality now lives in tegra_asoc_machine.c. Assisted-by: Claude:claude-opus-4-6 kconfiglint Signed-off-by: Sasha Levin Link: https://patch.msgid.link/20260426000249.54799-1-sashal@kernel.org Signed-off-by: Mark Brown --- sound/soc/tegra/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/tegra/Makefile b/sound/soc/tegra/Makefile index 3f396c87802e..1c18ef6971c0 100644 --- a/sound/soc/tegra/Makefile +++ b/sound/soc/tegra/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Tegra platform Support snd-soc-tegra-pcm-y := tegra_pcm.o -snd-soc-tegra-utils-y += tegra_asoc_utils.o snd-soc-tegra20-ac97-y := tegra20_ac97.o snd-soc-tegra20-das-y := tegra20_das.o snd-soc-tegra20-i2s-y := tegra20_i2s.o -- cgit v1.2.3 From 74c876bfd71b1023029a483d7213015201f62b53 Mon Sep 17 00:00:00 2001 From: Ajay Kumar Nandam Date: Mon, 20 Apr 2026 23:32:21 +0530 Subject: ASoC: codecs: wcd937x: fix AUX PA sequencing and mixer controls Enable AUX PA sequencing during AUX DAC DAPM events and keep the AUX-specific RX supplies enabled while the path is active. Add the missing AUX-related mixer controls, including CLSH PA and DSD left/right switches, so AUX playback can be routed from userspace. Signed-off-by: Ajay Kumar Nandam Link: https://patch.msgid.link/20260420180221.785113-1-ajay.nandam@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/codecs/wcd937x.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd937x.c b/sound/soc/codecs/wcd937x.c index 10a2d598caa7..72a53f95d688 100644 --- a/sound/soc/codecs/wcd937x.c +++ b/sound/soc/codecs/wcd937x.c @@ -546,6 +546,9 @@ static int wcd937x_codec_aux_dac_event(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, WCD937X_DIGITAL_CDC_ANA_CLK_CTL, BIT(2), BIT(2)); + snd_soc_component_update_bits(component, + WCD937X_AUX_AUXPA, + BIT(4), BIT(4)); snd_soc_component_update_bits(component, WCD937X_DIGITAL_CDC_DIG_CLK_CTL, BIT(2), BIT(2)); @@ -562,6 +565,9 @@ static int wcd937x_codec_aux_dac_event(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, WCD937X_DIGITAL_CDC_ANA_CLK_CTL, BIT(2), 0x00); + snd_soc_component_update_bits(component, + WCD937X_AUX_AUXPA, + BIT(4), 0x00); break; } @@ -730,10 +736,23 @@ static int wcd937x_codec_enable_aux_pa(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, WCD937X_ANA_RX_SUPPLIES, BIT(1), BIT(1)); + /* Enable AUX PA related RX supplies */ + snd_soc_component_update_bits(component, + WCD937X_ANA_RX_SUPPLIES, + BIT(6), BIT(6)); + snd_soc_component_update_bits(component, + WCD937X_ANA_RX_SUPPLIES, + BIT(7), BIT(7)); enable_irq(wcd937x->aux_pdm_wd_int); break; case SND_SOC_DAPM_PRE_PMD: disable_irq_nosync(wcd937x->aux_pdm_wd_int); + snd_soc_component_update_bits(component, + WCD937X_ANA_RX_SUPPLIES, + BIT(6), 0x00); + snd_soc_component_update_bits(component, + WCD937X_ANA_RX_SUPPLIES, + BIT(7), 0x00); break; case SND_SOC_DAPM_POST_PMD: usleep_range(2000, 2010); @@ -2051,7 +2070,12 @@ static const struct snd_kcontrol_new wcd937x_snd_controls[] = { wcd937x_get_swr_port, wcd937x_set_swr_port), SOC_SINGLE_EXT("LO Switch", WCD937X_LO, 0, 1, 0, wcd937x_get_swr_port, wcd937x_set_swr_port), - + SOC_SINGLE_EXT("CLSH PA Switch", WCD937X_CLSH, 0, 1, 0, + wcd937x_get_swr_port, wcd937x_set_swr_port), + SOC_SINGLE_EXT("DSD_L Switch", WCD937X_DSD_L, 0, 1, 0, + wcd937x_get_swr_port, wcd937x_set_swr_port), + SOC_SINGLE_EXT("DSD_R Switch", WCD937X_DSD_R, 0, 1, 0, + wcd937x_get_swr_port, wcd937x_set_swr_port), SOC_SINGLE_EXT("ADC1 Switch", WCD937X_ADC1, 1, 1, 0, wcd937x_get_swr_port, wcd937x_set_swr_port), SOC_SINGLE_EXT("ADC2 Switch", WCD937X_ADC2, 1, 1, 0, -- cgit v1.2.3 From 5b1689a41f02955c5361944f748a4812a6ff9307 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2026 14:36:12 +0200 Subject: spi: cadence: fix unclocked access on unbind Make sure that the controller is runtime resumed before disabling it during driver unbind to avoid unclocked register access and unbalanced clock disable. Also restore the autosuspend setting. This issue was flagged by Sashiko when reviewing a controller deregistration fix. Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation") Cc: stable@vger.kernel.org # 4.7 Cc: Shubhrajyoti Datta Link: https://sashiko.dev/#/patchset/20260414134319.978196-1-johan%40kernel.org?part=1 Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260421123615.1533617-2-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 08d7dabe818d..bf4a7cf6b142 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -776,16 +776,23 @@ static void cdns_spi_remove(struct platform_device *pdev) { struct spi_controller *ctlr = platform_get_drvdata(pdev); struct cdns_spi *xspi = spi_controller_get_devdata(ctlr); + int ret = 0; + + if (!spi_controller_is_target(ctlr)) + ret = pm_runtime_get_sync(&pdev->dev); spi_controller_get(ctlr); spi_unregister_controller(ctlr); - cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE); + if (ret >= 0) + cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE); if (!spi_controller_is_target(ctlr)) { pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); } spi_controller_put(ctlr); -- cgit v1.2.3 From ecea4f0e9db2fb6ab4a68a59c5aba0d8f59a9566 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2026 14:36:13 +0200 Subject: spi: cadence: fix clock imbalance on probe failure Make sure that the controller is active before disabling clocks on probe failure to avoid unbalanced clock disable. Also drop the usage count before returning (so that the controller can be suspended after a probe deferral) and restore the autosuspend setting. Fixes: d36ccd9f7ea4 ("spi: cadence: Runtime pm adaptation") Cc: stable@vger.kernel.org # 4.7 Cc: Shubhrajyoti Datta Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260421123615.1533617-3-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index bf4a7cf6b142..891e2ba36958 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -741,7 +741,6 @@ static int cdns_spi_probe(struct platform_device *pdev) /* Set to default valid value */ ctlr->max_speed_hz = xspi->clk_rate / 4; xspi->speed_hz = ctlr->max_speed_hz; - pm_runtime_put_autosuspend(&pdev->dev); } else { ctlr->mode_bits |= SPI_NO_CS; ctlr->target_abort = cdns_target_abort; @@ -752,12 +751,17 @@ static int cdns_spi_probe(struct platform_device *pdev) goto clk_dis_all; } + if (!spi_controller_is_target(ctlr)) + pm_runtime_put_autosuspend(&pdev->dev); + return ret; clk_dis_all: if (!spi_controller_is_target(ctlr)) { pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); } remove_ctlr: spi_controller_put(ctlr); -- cgit v1.2.3 From 5ff4d5d1af0c7517bd8db83c95c4247a9729a548 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2026 14:53:49 +0200 Subject: spi: cadence-quadspi: fix runtime pm disable imbalance on probe failure A recent attempt to fix the probe error handling introduced a runtime PM disable depth imbalance by incorrectly disabling runtime PM on early failures (e.g. probe deferral). Fixes: f18c8cfa4f1a ("spi: cadence-qspi: Fix probe error path and remove") Cc: stable@vger.kernel.org # 7.0 Cc: Miquel Raynal (Schneider Electric) Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260421125354.1534871-2-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 65aff2e70265..2ab6d2a81865 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1867,7 +1867,7 @@ static int cqspi_probe(struct platform_device *pdev) ret = clk_bulk_prepare_enable(CLK_QSPI_NUM, cqspi->clks); if (ret) { dev_err(dev, "Cannot enable QSPI clocks.\n"); - goto disable_rpm; + return ret; } /* Obtain QSPI reset control */ @@ -1977,7 +1977,7 @@ static int cqspi_probe(struct platform_device *pdev) ret = cqspi_request_mmap_dma(cqspi); if (ret == -EPROBE_DEFER) { dev_err_probe(&pdev->dev, ret, "Failed to request mmap DMA\n"); - goto disable_controller; + goto disable_rpm; } } @@ -1995,14 +1995,13 @@ static int cqspi_probe(struct platform_device *pdev) release_dma_chan: if (cqspi->rx_chan) dma_release_channel(cqspi->rx_chan); -disable_controller: +disable_rpm: + if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) + pm_runtime_disable(dev); cqspi_controller_enable(cqspi, 0); disable_clks: if (pm_runtime_get_sync(&pdev->dev) >= 0) clk_bulk_disable_unprepare(CLK_QSPI_NUM, cqspi->clks); -disable_rpm: - if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) - pm_runtime_disable(dev); return ret; } -- cgit v1.2.3 From cba53fe20c18688c17ca668ad0e4ec05e31c70d3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2026 14:53:50 +0200 Subject: spi: cadence-quadspi: fix clock imbalance on probe failure Drop the bogus runtime PM get on probe failures that was never needed and that leaks a usage count reference while preventing the clocks from being disabled (as runtime PM has not yet been enabled). Fixes: 1889dd208197 ("spi: cadence-quadspi: Fix clock disable on probe failure path") Cc: stable@vger.kernel.org # 6.19 Cc: Anurag Dutta Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260421125354.1534871-3-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 2ab6d2a81865..87e2bb66ad6c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -2000,8 +2000,7 @@ disable_rpm: pm_runtime_disable(dev); cqspi_controller_enable(cqspi, 0); disable_clks: - if (pm_runtime_get_sync(&pdev->dev) >= 0) - clk_bulk_disable_unprepare(CLK_QSPI_NUM, cqspi->clks); + clk_bulk_disable_unprepare(CLK_QSPI_NUM, cqspi->clks); return ret; } -- cgit v1.2.3 From 233db2cb14db8b1935dda52a6affd97276462b82 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2026 14:53:51 +0200 Subject: spi: cadence-quadspi: fix unclocked access on unbind Make sure that the controller is runtime resumed before disabling it during driver unbind to avoid an unclocked register access. This issue was flagged by Sashiko when reviewing a controller deregistration fix. Fixes: 0578a6dbfe75 ("spi: spi-cadence-quadspi: add runtime pm support") Cc: stable@vger.kernel.org # 6.7 Cc: Dhruva Gole Link: https://sashiko.dev/#/patchset/20260414134319.978196-1-johan%40kernel.org?part=2 Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260421125354.1534871-4-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 87e2bb66ad6c..9ccfdc8c36fe 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -2024,14 +2024,13 @@ static void cqspi_remove(struct platform_device *pdev) if (cqspi->rx_chan) dma_release_channel(cqspi->rx_chan); - cqspi_controller_enable(cqspi, 0); - - if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) ret = pm_runtime_get_sync(&pdev->dev); - if (ret >= 0) + if (ret >= 0) { + cqspi_controller_enable(cqspi, 0); clk_bulk_disable_unprepare(CLK_QSPI_NUM, cqspi->clks); + } if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { pm_runtime_put_sync(&pdev->dev); -- cgit v1.2.3 From 5e8bb0cc72f1d52d8ac2a88f4c952e2e98056aed Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 21 Apr 2026 14:53:52 +0200 Subject: spi: cadence-quadspi: fix runtime pm and clock imbalance on unbind Make sure to balance the runtime PM usage count before returning on probe failure (to allow the controller to suspend after a probe deferral) and to only drop the usage count on driver unbind to avoid a clock disable imbalance. Also restore the autosuspend setting. Fixes: 0578a6dbfe75 ("spi: spi-cadence-quadspi: add runtime pm support") Cc: stable@vger.kernel.org # 6.7 Cc: Dhruva Gole Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260421125354.1534871-5-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 9ccfdc8c36fe..057381e56a7f 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1860,10 +1860,6 @@ static int cqspi_probe(struct platform_device *pdev) if (irq < 0) return -ENXIO; - ret = pm_runtime_set_active(dev); - if (ret) - return ret; - ret = clk_bulk_prepare_enable(CLK_QSPI_NUM, cqspi->clks); if (ret) { dev_err(dev, "Cannot enable QSPI clocks.\n"); @@ -1962,10 +1958,11 @@ static int cqspi_probe(struct platform_device *pdev) cqspi->sclk = 0; if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { - pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, CQSPI_AUTOSUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); pm_runtime_get_noresume(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); } host->num_chipselect = cqspi->num_chipselect; @@ -1996,8 +1993,12 @@ release_dma_chan: if (cqspi->rx_chan) dma_release_channel(cqspi->rx_chan); disable_rpm: - if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) + if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); + pm_runtime_dont_use_autosuspend(dev); + } cqspi_controller_enable(cqspi, 0); disable_clks: clk_bulk_disable_unprepare(CLK_QSPI_NUM, cqspi->clks); @@ -2033,8 +2034,10 @@ static void cqspi_remove(struct platform_device *pdev) } if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { - pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); } } -- cgit v1.2.3 From 8ed3311131077712cdd0b3afec6909b9388ad3e4 Mon Sep 17 00:00:00 2001 From: Li Jian Date: Fri, 17 Apr 2026 18:53:14 +0800 Subject: ASoC: ES8389: convert to devm_clk_get_optional() to get clock When enabling ES8390 via ACPI description, es8389 would fail to obtain a clock source, causing the driver to fail to initialize. This was not an issue with older kernels, but since commit abae8e57e49a ("clk: generalize devm_clk_get() a bit"), devm_clk_get() would return an error pointer when a clock source was not detected (instead of falling back to a static clock), causing the driver to fail early. Use devm_clk_get_optional() instead to return to the previous behaviour, allowing the use of a static clock source. Cc: stable@vger.kernel.org Signed-off-by: Li Jian Link: https://patch.msgid.link/tencent_7C78374FB9F4B3A37101E5C719715D8BC40A@qq.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8389.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8389.c b/sound/soc/codecs/es8389.c index 8d418cae371a..449d9574b03a 100644 --- a/sound/soc/codecs/es8389.c +++ b/sound/soc/codecs/es8389.c @@ -892,7 +892,7 @@ static int es8389_probe(struct snd_soc_component *component) return ret; } - es8389->mclk = devm_clk_get(component->dev, "mclk"); + es8389->mclk = devm_clk_get_optional(component->dev, "mclk"); if (IS_ERR(es8389->mclk)) return dev_err_probe(component->dev, PTR_ERR(es8389->mclk), "ES8389 is unable to get mclk\n"); -- cgit v1.2.3 From 5f69165b7e4215f02247b0c64052c71b2f66d73a Mon Sep 17 00:00:00 2001 From: "Mukesh Kumar Chaurasiya (IBM)" Date: Sun, 26 Apr 2026 15:17:25 +0530 Subject: rust/drm: import ARef from sync crate ARef is defined in sync and is getting used from types causing the build to fail. Fix this by using ARef from sync module. Fixes: 80df573af9ef ("rust: drm: gem: shmem: Add DRM shmem helper abstraction") Signed-off-by: Mukesh Kumar Chaurasiya (IBM) Link: https://patch.msgid.link/20260426094725.2188668-2-mkchauras@gmail.com [ Add missing Fixes: tag. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/drm/gem/shmem.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs index d025fb035195..e1b648920d2f 100644 --- a/rust/kernel/drm/gem/shmem.rs +++ b/rust/kernel/drm/gem/shmem.rs @@ -19,10 +19,8 @@ use crate::{ }, error::to_result, prelude::*, - types::{ - ARef, - Opaque, // - }, // + sync::aref::ARef, + types::Opaque, // }; use core::{ ops::{ -- cgit v1.2.3 From 15e8bae5d930c91b8739a87d75db0a6efca3cb32 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 23 Apr 2026 14:46:35 +0200 Subject: MAINTAINERS: nova: update mailing list The nouveau mailing list has some issues (e.g. with stripping Cc entries from replies when using notmuch + b4 based workflows). Besides that, having a separate mailing list for nova also helps to better distinguish nova from nouveau and makes it easier to track nova-specific discussions. Replace the nouveau mailing list with the new nova-gpu@lists.linux.dev mailing list for both nova-core and nova-drm, and remove the patchwork entries, since those are bound to the nouveau mailing list and not used by nova anyway. Link: https://lore.kernel.org/all/bc2517c2-6772-4cbd-8fd7-6dbdcdd13eab@nvidia.com/ Reviewed-by: Joel Fernandes Reviewed-by: John Hubbard Acked-by: Alexandre Courbot Link: https://patch.msgid.link/20260423124649.38793-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..5c9272622033 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8193,10 +8193,9 @@ F: include/uapi/drm/nouveau_drm.h CORE DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich M: Alexandre Courbot -L: nouveau@lists.freedesktop.org +L: nova-gpu@lists.linux.dev S: Supported W: https://rust-for-linux.com/nova-gpu-driver -Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next @@ -8205,10 +8204,9 @@ F: drivers/gpu/nova-core/ DRM DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich -L: nouveau@lists.freedesktop.org +L: nova-gpu@lists.linux.dev S: Supported W: https://rust-for-linux.com/nova-gpu-driver -Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau T: git https://gitlab.freedesktop.org/drm/rust/kernel.git drm-rust-next -- cgit v1.2.3 From b0bf14546bcefa4ea49f5efcd7db2a99f0cabde9 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 3 Apr 2026 11:20:55 -0400 Subject: nfsd: fix GET_DIR_DELEGATION when VFS leases are disabled When leases are disabled on the server, running xfstest generic/309 leads to an error because GET_DIR_DELEGATION returns EINVAL. nfsd_get_dir_deleg() can fail in several ways: like memory allocation and unable to get a lease because either leases are disable or it's already held. Currently only the condition "already held" is translated to returning directory-delegation-is-unavailable error. However, other failure conditions are likely temporary and thus should result in the same kind of error. Fixes: 8b99f6a8c116 ("nfsd: wire up GET_DIR_DELEGATION handling") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 85e94c30285a..2797da8cc950 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2535,10 +2535,6 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp, dd = nfsd_get_dir_deleg(cstate, gdd, nf); nfsd_file_put(nf); if (IS_ERR(dd)) { - int err = PTR_ERR(dd); - - if (err != -EAGAIN) - return nfserrno(err); gdd->gddrnf_status = GDD4_UNAVAIL; return nfs_ok; } -- cgit v1.2.3 From 0cbc300257d9b399491909806777f504ec687c1d Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Sun, 26 Apr 2026 21:39:37 +1000 Subject: smb/client: remove unused smb3_parse_opt() Commit abdb1742a3123 ("cifs: get rid of mount options string parsing") removed the last caller. Signed-off-by: David Disseldorp Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 1 - fs/smb/client/fs_context.c | 31 ------------------------------- 2 files changed, 32 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 4a25afda9448..79d891f7df1a 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -89,7 +89,6 @@ int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); char *smb3_fs_context_fullpath(const struct smb3_fs_context *ctx, char dirsep); int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx); -int smb3_parse_opt(const char *options, const char *key, char **val); int cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs); bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); int cifs_discard_remaining_data(struct TCP_Server_Info *server); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index b9544eb0381b..b63ec7ab6e51 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -536,37 +536,6 @@ cifs_parse_smb_version(struct fs_context *fc, char *value, struct smb3_fs_contex return 0; } -int smb3_parse_opt(const char *options, const char *key, char **val) -{ - int rc = -ENOENT; - char *opts, *orig, *p; - - orig = opts = kstrdup(options, GFP_KERNEL); - if (!opts) - return -ENOMEM; - - while ((p = strsep(&opts, ","))) { - char *nval; - - if (!*p) - continue; - if (strncasecmp(p, key, strlen(key))) - continue; - nval = strchr(p, '='); - if (nval) { - if (nval == p) - continue; - *nval++ = 0; - *val = kstrdup(nval, GFP_KERNEL); - rc = !*val ? -ENOMEM : 0; - goto out; - } - } -out: - kfree(orig); - return rc; -} - /* * Remove duplicate path delimiters. Windows is supposed to do that * but there are some bugs that prevent rename from working if there are -- cgit v1.2.3 From 2905281cbda52ec9df540113b35b835feb5fafd3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Apr 2026 18:00:27 +0200 Subject: Input: usbtouchscreen - clamp NEXIO data_len/x_len to URB buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nexio_read_data() pulls data_len and x_len from a packed __be16 header in the device's interrupt packet and then walks packet->data[0..x_len) and packet->data[x_len..data_len) comparing each byte against a threshold. Both fields are 16-bit on the wire (max 65535). The existing adjustments shave at most 0x100 / 0x80 off, so the loop bound can still reach roughly 0xfeff. The URB transfer buffer for NEXIO is rept_size (1024) bytes from usb_alloc_coherent(), with the first 7 occupied by the packed header — so packet->data[] has 1017 valid bytes. read_data() callbacks are not given urb->actual_length, and nothing else bounds the walk. A device that lies about its length can get a ~64 KiB out-of-bounds read past the coherent DMA allocation. The first index whose byte exceeds NEXIO_THRESHOLD lands in begin_x / begin_y and from there into the reported touch coordinates, so adjacent kernel memory contents leak to userspace as ABS_X / ABS_Y events. Far enough out, the read can also hit an unmapped page and fault. Fix this all by clamping data_len to the buffer's data[] capacity and x_len to data_len. Cc: Dmitry Torokhov Fixes: 5197424cdccc ("Input: usbtouchscreen - add NEXIO (or iNexio) support") Cc: stable Assisted-by: gkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026042026-chlorine-epidermis-fd6d@gregkh Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/usbtouchscreen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index daa28135f887..0bbacb517c28 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -1067,6 +1067,11 @@ static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt) if (x_len > 0xff) x_len -= 0x80; + if (data_len > usbtouch->data_size - sizeof(*packet)) + data_len = usbtouch->data_size - sizeof(*packet); + if (x_len > data_len) + x_len = data_len; + /* send ACK */ ret = usb_submit_urb(priv->ack, GFP_ATOMIC); if (ret) -- cgit v1.2.3 From 6cdc46b38cf146ce81d4831b6472dbf7731849a2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 26 Apr 2026 21:09:33 -0700 Subject: Input: xpad - fix out-of-bounds access for Share button xpadone_process_packet() receives len directly from urb->actual_length and uses it to index the share-button byte at data[len - 18] or data[len - 26]. Since both len and data[0] are under the device's control, a broken controller can send a GIP_CMD_INPUT packet with actual_length < 18 (e.g. 5 bytes) and reach this code path, causing accesses beyond the actual array. Fix this by calculating the offset and checking bounds against the packet length. Reported-by: Greg Kroah-Hartman Fixes: 4ef46367073b ("Input: xpad - fix Share button on Xbox One controllers") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 0549fdc5a985..19ce90da89e9 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -1077,10 +1077,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); if (xpad->mapping & MAP_SHARE_BUTTON) { - if (xpad->mapping & MAP_SHARE_OFFSET) - input_report_key(dev, KEY_RECORD, data[len - 26] & BIT(0)); - else - input_report_key(dev, KEY_RECORD, data[len - 18] & BIT(0)); + u32 offset = (xpad->mapping & MAP_SHARE_OFFSET) ? 26 : 18; + + if (len >= offset) + input_report_key(dev, KEY_RECORD, data[len - offset] & BIT(0)); } /* buttons A,B,X,Y */ -- cgit v1.2.3 From aa23c94cc433b145d1ce93820ecdfe16d8940e28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 30 Mar 2026 12:08:21 +0100 Subject: media: venus: fix QCOM_MDT_LOADER dependency When build-testined with CONFIG_QCOM_MDT_LOADER=m and VIDEO_QCOM_VENUS=y, the kernel fails to link: x86_64-linux-ld: drivers/media/platform/qcom/venus/firmware.o: in function `venus_boot': firmware.c:(.text+0x1e3): undefined reference to `qcom_mdt_get_size' firmware.c:(.text+0x25a): undefined reference to `qcom_mdt_load' firmware.c:(.text+0x272): undefined reference to `qcom_mdt_load_no_init' The problem is the conditional 'select' statement. Change this to make the driver built-in here regardless of CONFIG_ARCH_QCOM, same as for the similar IRIS driver. Signed-off-by: Arnd Bergmann Reviewed-by: Konrad Dybcio Reviewed-by: Dikshita Agarwal Fixes: 0399b696f7f4 ("media: venus: fix compile-test build on non-qcom ARM platform") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/venus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/venus/Kconfig b/drivers/media/platform/qcom/venus/Kconfig index ffb731ecd48c..63ee8c78dc6d 100644 --- a/drivers/media/platform/qcom/venus/Kconfig +++ b/drivers/media/platform/qcom/venus/Kconfig @@ -4,7 +4,7 @@ config VIDEO_QCOM_VENUS depends on VIDEO_DEV && QCOM_SMEM depends on (ARCH_QCOM && ARM64 && IOMMU_API) || COMPILE_TEST select OF_DYNAMIC if ARCH_QCOM - select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_MDT_LOADER select QCOM_SCM select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV -- cgit v1.2.3 From a297c5165f91366cbc3490e630aabd1c0f70efb8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Feb 2026 15:56:19 +0100 Subject: media: iris: fix QCOM_MDT_LOADER dependency When build-testined with CONFIG_QCOM_MDT_LOADER=m and VIDEO_QCOM_IRIS=y, the kernel fails to link: x86_64-linux-ld: drivers/media/platform/qcom/iris/iris_firmware.o: in function `iris_fw_load': iris_firmware.c:(.text+0xb0): undefined reference to `qcom_mdt_get_size' iris_firmware.c:(.text+0xfd): undefined reference to `qcom_mdt_load' The problem is the conditional 'select' statement. Change this to make the driver built-in here regardless of CONFIG_ARCH_QCOM. Signed-off-by: Arnd Bergmann Reviewed-by: Konrad Dybcio Reviewed-by: Dikshita Agarwal Reviewed-by: Bryan O'Donoghue Fixes: d19b163356b8 ("media: iris: implement video firmware load/unload") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/iris/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/iris/Kconfig b/drivers/media/platform/qcom/iris/Kconfig index 3c803a05305a..5498f48362d1 100644 --- a/drivers/media/platform/qcom/iris/Kconfig +++ b/drivers/media/platform/qcom/iris/Kconfig @@ -3,7 +3,7 @@ config VIDEO_QCOM_IRIS depends on VIDEO_DEV depends on ARCH_QCOM || COMPILE_TEST select V4L2_MEM2MEM_DEV - select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_MDT_LOADER select QCOM_SCM select VIDEOBUF2_DMA_CONTIG help -- cgit v1.2.3 From f27cfdcfc916bb59297825805f4c3499f89f9e76 Mon Sep 17 00:00:00 2001 From: Dikshita Agarwal Date: Mon, 16 Feb 2026 12:37:42 +0530 Subject: media: iris: Fix use-after-free in iris_release_internal_buffers() The recent change in commit 1dabf00ee206 ("media: iris: gen1: Destroy internal buffers after FW releases") introduced a regression where session_release_buf() may free the buffer. The caller, iris_release_internal_buffers(), continued to access `buffer` after the call, leading to a potential use-after-free. Fix this by setting BUF_ATTR_PENDING_RELEASE before calling session_release_buf(), and reverting the flag if the call fails. This ensures no dereference occurs after potential freeing. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/lkml/aYXvKAX3Pg3sL37P@stanley.mountain/#r Signed-off-by: Dikshita Agarwal Reviewed-by: Vikash Garodia Fixes: 1dabf00ee206 ("media: iris: gen1: Destroy internal buffers after FW releases") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/iris/iris_buffer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/iris/iris_buffer.c b/drivers/media/platform/qcom/iris/iris_buffer.c index 9151f43bc6b9..1d53c7414b75 100644 --- a/drivers/media/platform/qcom/iris/iris_buffer.c +++ b/drivers/media/platform/qcom/iris/iris_buffer.c @@ -582,10 +582,12 @@ static int iris_release_internal_buffers(struct iris_inst *inst, continue; if (!(buffer->attr & BUF_ATTR_QUEUED)) continue; + buffer->attr |= BUF_ATTR_PENDING_RELEASE; ret = hfi_ops->session_release_buf(inst, buffer); - if (ret) + if (ret) { + buffer->attr &= ~BUF_ATTR_PENDING_RELEASE; return ret; - buffer->attr |= BUF_ATTR_PENDING_RELEASE; + } } return 0; -- cgit v1.2.3 From 4a49ae56b0e4268d48fd96babe0cc68596bc301a Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Fri, 13 Feb 2026 10:13:27 +0100 Subject: media: iris: Fix dma_free_attrs() size in iris_hfi_queues_init() The core->iface_q_table_vaddr buffer is alloc'd with size queue_size but freed with sizeof(*q_tbl_hdr) which is different. Change the dma_free_attrs() size. Signed-off-by: Thomas Fourier Reviewed-by: Dikshita Agarwal Fixes: d7378f84e94e ("media: iris: introduce iris core state management with shared queues") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/iris/iris_hfi_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/iris/iris_hfi_queue.c b/drivers/media/platform/qcom/iris/iris_hfi_queue.c index b3ed06297953..bf6db23b53e2 100644 --- a/drivers/media/platform/qcom/iris/iris_hfi_queue.c +++ b/drivers/media/platform/qcom/iris/iris_hfi_queue.c @@ -263,7 +263,7 @@ int iris_hfi_queues_init(struct iris_core *core) GFP_KERNEL, DMA_ATTR_WRITE_COMBINE); if (!core->sfr_vaddr) { dev_err(core->dev, "sfr alloc and map failed\n"); - dma_free_attrs(core->dev, sizeof(*q_tbl_hdr), core->iface_q_table_vaddr, + dma_free_attrs(core->dev, queue_size, core->iface_q_table_vaddr, core->iface_q_table_daddr, DMA_ATTR_WRITE_COMBINE); return -ENOMEM; } -- cgit v1.2.3 From 95a337f92f0a602d4f935315bfbc8bf07f475e65 Mon Sep 17 00:00:00 2001 From: Vikash Garodia Date: Fri, 13 Mar 2026 18:49:36 +0530 Subject: media: iris: switch to hardware mode after firmware boot Currently the driver switches the vcodec GDSC to hardware (HW) mode before firmware load and boot sequence. GDSC can be powered off, keeping in hw mode, thereby the vcodec registers programmed in TrustZone (TZ) carry default (reset) values. Move the transition to HW mode after firmware load and boot sequence. The bug was exposed with driver configuring different stream ids to different devices via iommu-map. With registers carrying reset values, VPU would not generate desired stream-id, thereby leading to SMMU fault. For vpu4, when GDSC is switched to HW mode, there is a need to perform the reset operation. Without reset, there are occasional issues of register corruption observed. Hence the vpu GDSC switch also involves the reset. Co-developed-by: Vishnu Reddy Signed-off-by: Vishnu Reddy Signed-off-by: Vikash Garodia Reviewed-by: Dikshita Agarwal Reviewed-by: Dmitry Baryshkov [bod: occassional => occasional] Fixes: dde659d37036 ("media: iris: Introduce vpu ops for vpu4 with necessary hooks") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/iris/iris_core.c | 4 ++++ drivers/media/platform/qcom/iris/iris_hfi_common.c | 4 ++++ drivers/media/platform/qcom/iris/iris_vpu2.c | 1 + drivers/media/platform/qcom/iris/iris_vpu3x.c | 9 +++----- drivers/media/platform/qcom/iris/iris_vpu4x.c | 24 ++++++++++++---------- drivers/media/platform/qcom/iris/iris_vpu_common.c | 16 +++++++++------ drivers/media/platform/qcom/iris/iris_vpu_common.h | 3 +++ 7 files changed, 38 insertions(+), 23 deletions(-) diff --git a/drivers/media/platform/qcom/iris/iris_core.c b/drivers/media/platform/qcom/iris/iris_core.c index 8406c48d635b..dbaac01eb15a 100644 --- a/drivers/media/platform/qcom/iris/iris_core.c +++ b/drivers/media/platform/qcom/iris/iris_core.c @@ -75,6 +75,10 @@ int iris_core_init(struct iris_core *core) if (ret) goto error_unload_fw; + ret = iris_vpu_switch_to_hwmode(core); + if (ret) + goto error_unload_fw; + ret = iris_hfi_core_init(core); if (ret) goto error_unload_fw; diff --git a/drivers/media/platform/qcom/iris/iris_hfi_common.c b/drivers/media/platform/qcom/iris/iris_hfi_common.c index 92112eb16c11..621c66593d88 100644 --- a/drivers/media/platform/qcom/iris/iris_hfi_common.c +++ b/drivers/media/platform/qcom/iris/iris_hfi_common.c @@ -159,6 +159,10 @@ int iris_hfi_pm_resume(struct iris_core *core) if (ret) goto err_suspend_hw; + ret = iris_vpu_switch_to_hwmode(core); + if (ret) + goto err_suspend_hw; + ret = ops->sys_interframe_powercollapse(core); if (ret) goto err_suspend_hw; diff --git a/drivers/media/platform/qcom/iris/iris_vpu2.c b/drivers/media/platform/qcom/iris/iris_vpu2.c index 9c103a2e4e4e..01ef40f38957 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu2.c +++ b/drivers/media/platform/qcom/iris/iris_vpu2.c @@ -44,4 +44,5 @@ const struct vpu_ops iris_vpu2_ops = { .power_off_controller = iris_vpu_power_off_controller, .power_on_controller = iris_vpu_power_on_controller, .calc_freq = iris_vpu2_calc_freq, + .set_hwmode = iris_vpu_set_hwmode, }; diff --git a/drivers/media/platform/qcom/iris/iris_vpu3x.c b/drivers/media/platform/qcom/iris/iris_vpu3x.c index fe4423b951b1..3dad47be78b5 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu3x.c +++ b/drivers/media/platform/qcom/iris/iris_vpu3x.c @@ -234,14 +234,8 @@ static int iris_vpu35_power_on_hw(struct iris_core *core) if (ret) goto err_disable_hw_free_clk; - ret = dev_pm_genpd_set_hwmode(core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN], true); - if (ret) - goto err_disable_hw_clk; - return 0; -err_disable_hw_clk: - iris_disable_unprepare_clock(core, IRIS_HW_CLK); err_disable_hw_free_clk: iris_disable_unprepare_clock(core, IRIS_HW_FREERUN_CLK); err_disable_axi_clk: @@ -266,6 +260,7 @@ const struct vpu_ops iris_vpu3_ops = { .power_off_controller = iris_vpu_power_off_controller, .power_on_controller = iris_vpu_power_on_controller, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu_set_hwmode, }; const struct vpu_ops iris_vpu33_ops = { @@ -274,6 +269,7 @@ const struct vpu_ops iris_vpu33_ops = { .power_off_controller = iris_vpu33_power_off_controller, .power_on_controller = iris_vpu_power_on_controller, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu_set_hwmode, }; const struct vpu_ops iris_vpu35_ops = { @@ -283,4 +279,5 @@ const struct vpu_ops iris_vpu35_ops = { .power_on_controller = iris_vpu35_vpu4x_power_on_controller, .program_bootup_registers = iris_vpu35_vpu4x_program_bootup_registers, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu_set_hwmode, }; diff --git a/drivers/media/platform/qcom/iris/iris_vpu4x.c b/drivers/media/platform/qcom/iris/iris_vpu4x.c index a8db02ce5c5e..02e100a4045f 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu4x.c +++ b/drivers/media/platform/qcom/iris/iris_vpu4x.c @@ -252,21 +252,10 @@ static int iris_vpu4x_power_on_hardware(struct iris_core *core) ret = iris_vpu4x_power_on_apv(core); if (ret) goto disable_hw_clocks; - - iris_vpu4x_ahb_sync_reset_apv(core); } - iris_vpu4x_ahb_sync_reset_hardware(core); - - ret = iris_vpu4x_genpd_set_hwmode(core, true, efuse_value); - if (ret) - goto disable_apv_power_domain; - return 0; -disable_apv_power_domain: - if (!(efuse_value & DISABLE_VIDEO_APV_BIT)) - iris_vpu4x_power_off_apv(core); disable_hw_clocks: iris_vpu4x_disable_hardware_clocks(core, efuse_value); disable_vpp1_power_domain: @@ -359,6 +348,18 @@ disable_clocks_and_power: iris_disable_power_domains(core, core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN]); } +static int iris_vpu4x_set_hwmode(struct iris_core *core) +{ + u32 efuse_value = readl(core->reg_base + WRAPPER_EFUSE_MONITOR); + + if (!(efuse_value & DISABLE_VIDEO_APV_BIT)) + iris_vpu4x_ahb_sync_reset_apv(core); + + iris_vpu4x_ahb_sync_reset_hardware(core); + + return iris_vpu4x_genpd_set_hwmode(core, true, efuse_value); +} + const struct vpu_ops iris_vpu4x_ops = { .power_off_hw = iris_vpu4x_power_off_hardware, .power_on_hw = iris_vpu4x_power_on_hardware, @@ -366,4 +367,5 @@ const struct vpu_ops iris_vpu4x_ops = { .power_on_controller = iris_vpu35_vpu4x_power_on_controller, .program_bootup_registers = iris_vpu35_vpu4x_program_bootup_registers, .calc_freq = iris_vpu3x_vpu4x_calculate_frequency, + .set_hwmode = iris_vpu4x_set_hwmode, }; diff --git a/drivers/media/platform/qcom/iris/iris_vpu_common.c b/drivers/media/platform/qcom/iris/iris_vpu_common.c index 548e5f1727fd..69e6126dc4d9 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu_common.c +++ b/drivers/media/platform/qcom/iris/iris_vpu_common.c @@ -292,14 +292,8 @@ int iris_vpu_power_on_hw(struct iris_core *core) if (ret && ret != -ENOENT) goto err_disable_hw_clock; - ret = dev_pm_genpd_set_hwmode(core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN], true); - if (ret) - goto err_disable_hw_ahb_clock; - return 0; -err_disable_hw_ahb_clock: - iris_disable_unprepare_clock(core, IRIS_HW_AHB_CLK); err_disable_hw_clock: iris_disable_unprepare_clock(core, IRIS_HW_CLK); err_disable_power: @@ -308,6 +302,16 @@ err_disable_power: return ret; } +int iris_vpu_set_hwmode(struct iris_core *core) +{ + return dev_pm_genpd_set_hwmode(core->pmdomain_tbl->pd_devs[IRIS_HW_POWER_DOMAIN], true); +} + +int iris_vpu_switch_to_hwmode(struct iris_core *core) +{ + return core->iris_platform_data->vpu_ops->set_hwmode(core); +} + int iris_vpu35_vpu4x_power_off_controller(struct iris_core *core) { u32 clk_rst_tbl_size = core->iris_platform_data->clk_rst_tbl_size; diff --git a/drivers/media/platform/qcom/iris/iris_vpu_common.h b/drivers/media/platform/qcom/iris/iris_vpu_common.h index f6dffc613b82..dee3b1349c5e 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu_common.h +++ b/drivers/media/platform/qcom/iris/iris_vpu_common.h @@ -21,6 +21,7 @@ struct vpu_ops { int (*power_on_controller)(struct iris_core *core); void (*program_bootup_registers)(struct iris_core *core); u64 (*calc_freq)(struct iris_inst *inst, size_t data_size); + int (*set_hwmode)(struct iris_core *core); }; int iris_vpu_boot_firmware(struct iris_core *core); @@ -30,6 +31,8 @@ int iris_vpu_watchdog(struct iris_core *core, u32 intr_status); int iris_vpu_prepare_pc(struct iris_core *core); int iris_vpu_power_on_controller(struct iris_core *core); int iris_vpu_power_on_hw(struct iris_core *core); +int iris_vpu_set_hwmode(struct iris_core *core); +int iris_vpu_switch_to_hwmode(struct iris_core *core); int iris_vpu_power_on(struct iris_core *core); int iris_vpu_power_off_controller(struct iris_core *core); void iris_vpu_power_off_hw(struct iris_core *core); -- cgit v1.2.3 From 3d9593ad1a58c5acc3e5fa2a48222bb7632e6812 Mon Sep 17 00:00:00 2001 From: Vishnu Reddy Date: Thu, 5 Mar 2026 18:58:31 +0530 Subject: media: iris: fix use-after-free of fmt_src during MBPF check During concurrency testing, multiple instances can run in parallel, and each instance uses its own inst->lock while the core->lock protects the list of active instances. The race happens because these locks cover different scopes, inst->lock protects only the internals of a single instance, while the Macro Blocks Per Frame (MBPF) checker walks the core list under core->lock and reads fields like fmt_src->width and fmt_src->height. At the same time, iris_close() may free fmt_src and fmt_dst under inst->lock while the instance is still present in the core list. This allows a situation where the MBPF checker, still iterating through the core list, reaches an instance whose fmt_src was already freed by another thread and ends up dereferencing a dangling pointer, resulting in a use-after-free. This happens because the MBPF checker assumes that any instance in the core list is fully valid, but the freeing of fmt_src and fmt_dst without removing the instance from the core list is not correct. The correct ordering is to defer freeing fmt_src and fmt_dst until after the instance has been removed from the core list and all teardown under the core lock has completed, ensuring that no dangling pointers are ever exposed during MBPF checks. Reviewed-by: Vikash Garodia Signed-off-by: Vishnu Reddy Reviewed-by: Dikshita Agarwal Fixes: 5ad964ad5656 ("media: iris: Initialize and deinitialize encoder instance structure") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/iris/iris_vdec.c | 6 ------ drivers/media/platform/qcom/iris/iris_vdec.h | 1 - drivers/media/platform/qcom/iris/iris_venc.c | 6 ------ drivers/media/platform/qcom/iris/iris_venc.h | 1 - drivers/media/platform/qcom/iris/iris_vidc.c | 6 ++---- 5 files changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/media/platform/qcom/iris/iris_vdec.c b/drivers/media/platform/qcom/iris/iris_vdec.c index 719217399a30..99d544e2af4f 100644 --- a/drivers/media/platform/qcom/iris/iris_vdec.c +++ b/drivers/media/platform/qcom/iris/iris_vdec.c @@ -61,12 +61,6 @@ int iris_vdec_inst_init(struct iris_inst *inst) return iris_ctrls_init(inst); } -void iris_vdec_inst_deinit(struct iris_inst *inst) -{ - kfree(inst->fmt_dst); - kfree(inst->fmt_src); -} - static const struct iris_fmt iris_vdec_formats_cap[] = { [IRIS_FMT_NV12] = { .pixfmt = V4L2_PIX_FMT_NV12, diff --git a/drivers/media/platform/qcom/iris/iris_vdec.h b/drivers/media/platform/qcom/iris/iris_vdec.h index ec1ce55d1375..5123d2a340e1 100644 --- a/drivers/media/platform/qcom/iris/iris_vdec.h +++ b/drivers/media/platform/qcom/iris/iris_vdec.h @@ -9,7 +9,6 @@ struct iris_inst; int iris_vdec_inst_init(struct iris_inst *inst); -void iris_vdec_inst_deinit(struct iris_inst *inst); int iris_vdec_enum_fmt(struct iris_inst *inst, struct v4l2_fmtdesc *f); int iris_vdec_try_fmt(struct iris_inst *inst, struct v4l2_format *f); int iris_vdec_s_fmt(struct iris_inst *inst, struct v4l2_format *f); diff --git a/drivers/media/platform/qcom/iris/iris_venc.c b/drivers/media/platform/qcom/iris/iris_venc.c index aa27b22704eb..4d886769d958 100644 --- a/drivers/media/platform/qcom/iris/iris_venc.c +++ b/drivers/media/platform/qcom/iris/iris_venc.c @@ -79,12 +79,6 @@ int iris_venc_inst_init(struct iris_inst *inst) return iris_ctrls_init(inst); } -void iris_venc_inst_deinit(struct iris_inst *inst) -{ - kfree(inst->fmt_dst); - kfree(inst->fmt_src); -} - static const struct iris_fmt iris_venc_formats_cap[] = { [IRIS_FMT_H264] = { .pixfmt = V4L2_PIX_FMT_H264, diff --git a/drivers/media/platform/qcom/iris/iris_venc.h b/drivers/media/platform/qcom/iris/iris_venc.h index c4db7433da53..00c1716b2747 100644 --- a/drivers/media/platform/qcom/iris/iris_venc.h +++ b/drivers/media/platform/qcom/iris/iris_venc.h @@ -9,7 +9,6 @@ struct iris_inst; int iris_venc_inst_init(struct iris_inst *inst); -void iris_venc_inst_deinit(struct iris_inst *inst); int iris_venc_enum_fmt(struct iris_inst *inst, struct v4l2_fmtdesc *f); int iris_venc_try_fmt(struct iris_inst *inst, struct v4l2_format *f); int iris_venc_s_fmt(struct iris_inst *inst, struct v4l2_format *f); diff --git a/drivers/media/platform/qcom/iris/iris_vidc.c b/drivers/media/platform/qcom/iris/iris_vidc.c index bd38d84c9cc7..5eb1786b0737 100644 --- a/drivers/media/platform/qcom/iris/iris_vidc.c +++ b/drivers/media/platform/qcom/iris/iris_vidc.c @@ -289,10 +289,6 @@ int iris_close(struct file *filp) v4l2_m2m_ctx_release(inst->m2m_ctx); v4l2_m2m_release(inst->m2m_dev); mutex_lock(&inst->lock); - if (inst->domain == DECODER) - iris_vdec_inst_deinit(inst); - else if (inst->domain == ENCODER) - iris_venc_inst_deinit(inst); iris_session_close(inst); iris_inst_change_state(inst, IRIS_INST_DEINIT); iris_v4l2_fh_deinit(inst, filp); @@ -304,6 +300,8 @@ int iris_close(struct file *filp) mutex_unlock(&inst->lock); mutex_destroy(&inst->ctx_q_lock); mutex_destroy(&inst->lock); + kfree(inst->fmt_src); + kfree(inst->fmt_dst); kfree(inst); return 0; -- cgit v1.2.3 From 3e0b2053751657ed2924adfe3ff25b1450231e33 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 27 Mar 2026 22:19:55 +0200 Subject: media: qcom: iris: increase H265D_MAX_SLICE to fix H.265 decoding on SC7280 Follow the commit bfe1326573ff ("venus: Fix for H265 decoding failure.") and increase H265D_MAX_SLICE following firmware requirements on that platform. Otherwise decoding of the H.265 streams fails with the "insufficient scratch_1 buffer size" from the firmware. Signed-off-by: Dmitry Baryshkov Reviewed-by: Dikshita Agarwal Reviewed-by: Vikash Garodia Reviewed-by: Konrad Dybcio [bod: Fixed commit log withthe => with the] Fixes: e1f5d32608ec ("media: iris: Add internal buffer calculation for HEVC and VP9 decoders") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/iris/iris_vpu_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h index 12640eb5ed8c..8c0d6b7b5de8 100644 --- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h +++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h @@ -67,7 +67,7 @@ struct iris_inst; #define SIZE_DOLBY_RPU_METADATA (41 * 1024) #define H264_CABAC_HDR_RATIO_HD_TOT 1 #define H264_CABAC_RES_RATIO_HD_TOT 3 -#define H265D_MAX_SLICE 1200 +#define H265D_MAX_SLICE 3600 #define SIZE_H265D_HW_PIC_T SIZE_H264D_HW_PIC_T #define H265_CABAC_HDR_RATIO_HD_TOT 2 #define H265_CABAC_RES_RATIO_HD_TOT 2 -- cgit v1.2.3 From dd1b373941079cc102cc18bc68884e18245f5912 Mon Sep 17 00:00:00 2001 From: Wenmeng Liu Date: Fri, 13 Mar 2026 18:13:02 +0800 Subject: media: qcom: camss: Fix csid IRQ offset for sa8775p Fix BUF_DONE_IRQ_STATUS_RDI_OFFSET calculation for csid lite on sa8775p platform. The offset should be 0 for csid lite on sa8775p, Signed-off-by: Wenmeng Liu Reviewed-by: Bryan O'Donoghue Fixes: ed03e99de0fa ("media: qcom: camss: Add support for CSID 690") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/camss/camss-csid-gen3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen3.c b/drivers/media/platform/qcom/camss/camss-csid-gen3.c index 664245cf6eb0..bd059243790e 100644 --- a/drivers/media/platform/qcom/camss/camss-csid-gen3.c +++ b/drivers/media/platform/qcom/camss/camss-csid-gen3.c @@ -48,9 +48,9 @@ #define IS_CSID_690(csid) ((csid->camss->res->version == CAMSS_8775P) \ || (csid->camss->res->version == CAMSS_8300)) #define CSID_BUF_DONE_IRQ_STATUS 0x8C -#define BUF_DONE_IRQ_STATUS_RDI_OFFSET (csid_is_lite(csid) ?\ - 1 : (IS_CSID_690(csid) ?\ - 13 : 14)) +#define BUF_DONE_IRQ_STATUS_RDI_OFFSET (csid_is_lite(csid) ? \ + ((IS_CSID_690(csid) ? 0 : 1)) : \ + ((IS_CSID_690(csid) ? 13 : 14))) #define CSID_BUF_DONE_IRQ_MASK 0x90 #define CSID_BUF_DONE_IRQ_CLEAR 0x94 #define CSID_BUF_DONE_IRQ_SET 0x98 -- cgit v1.2.3 From fe56c674118aa46da1a3e65aa22ca709ebd7d812 Mon Sep 17 00:00:00 2001 From: Wenmeng Liu Date: Fri, 13 Mar 2026 18:13:03 +0800 Subject: media: qcom: camss: Fix csid clock configuration for sa8775p Fix the mismatch between clock list and clock rate table for CSID lite instances. The current implementation has 5 clocks defined but only 2 are actually needed (vfe_lite_csid and vfe_lite_cphy_rx), while the clock rate table doesn't match this configuration. Update both clock list and rate table to maintain consistency: - Remove unused clocks: cpas_vfe_lite, vfe_lite_ahb, vfe_lite - Update clock rate table to match the remaining two clocks Signed-off-by: Wenmeng Liu Reviewed-by: Bryan O'Donoghue Fixes: ed03e99de0fa ("media: qcom: camss: Add support for CSID 690") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/camss/camss.c | 40 ++++++++++++------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c index 00b87fd9afbd..cb0134718985 100644 --- a/drivers/media/platform/qcom/camss/camss.c +++ b/drivers/media/platform/qcom/camss/camss.c @@ -3598,12 +3598,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID2 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite0" }, .interrupt = { "csid_lite0" }, @@ -3617,12 +3615,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID3 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite1" }, .interrupt = { "csid_lite1" }, @@ -3636,12 +3632,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID4 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite2" }, .interrupt = { "csid_lite2" }, @@ -3655,12 +3649,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID5 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite3" }, .interrupt = { "csid_lite3" }, @@ -3674,12 +3666,10 @@ static const struct camss_subdev_resources csid_res_8775p[] = { /* CSID6 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", - "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + .clock = { "vfe_lite_csid", "vfe_lite_cphy_rx" }, .clock_rate = { - { 0, 0, 400000000, 400000000, 0}, - { 0, 0, 400000000, 480000000, 0} + { 400000000, 480000000 }, + { 400000000, 480000000 } }, .reg = { "csid_lite4" }, .interrupt = { "csid_lite4" }, -- cgit v1.2.3 From d31fac47b39f5e1ed85a587688ca70b793e421b4 Mon Sep 17 00:00:00 2001 From: Wenmeng Liu Date: Fri, 13 Mar 2026 18:13:04 +0800 Subject: media: qcom: camss: Add missing clocks for VFE lite on sa8775p Add missing required clocks (cpas_ahb and camnoc_axi) for VFE lite instances on sa8775p platform. These clocks are necessary for proper VFE lite operation: Reviewed-by: Bryan O'Donoghue Signed-off-by: Wenmeng Liu Fixes: e7b59e1d06fb ("media: qcom: camss: Add support for VFE 690") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/camss/camss.c | 40 +++++++++++++++++++------------ 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c index cb0134718985..9335636d7c4d 100644 --- a/drivers/media/platform/qcom/camss/camss.c +++ b/drivers/media/platform/qcom/camss/camss.c @@ -3742,15 +3742,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE2 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite0" }, .interrupt = { "vfe_lite0" }, @@ -3765,15 +3767,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE3 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite1" }, .interrupt = { "vfe_lite1" }, @@ -3788,15 +3792,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE4 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite2" }, .interrupt = { "vfe_lite2" }, @@ -3811,15 +3817,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE5 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite3" }, .interrupt = { "vfe_lite3" }, @@ -3834,15 +3842,17 @@ static const struct camss_subdev_resources vfe_res_8775p[] = { /* VFE6 (lite) */ { .regulators = {}, - .clock = { "cpas_vfe_lite", "vfe_lite_ahb", + .clock = { "cpas_ahb", "cpas_vfe_lite", "vfe_lite_ahb", "vfe_lite_csid", "vfe_lite_cphy_rx", - "vfe_lite"}, + "vfe_lite", "camnoc_axi"}, .clock_rate = { - { 0, 0, 0, 0 }, + { 0 }, + { 0 }, { 300000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 400000000, 400000000, 400000000, 400000000 }, { 480000000, 600000000, 600000000, 600000000 }, + { 400000000 }, }, .reg = { "vfe_lite4" }, .interrupt = { "vfe_lite4" }, -- cgit v1.2.3 From 23c39cb598977f10909a2387c5e5f34afc1d6933 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Mar 2026 16:18:24 +0100 Subject: media: qcom: camss: avoid format string warning clang-22 warns about csiphy_match_clock_name() taking a variable format string that is not checked against the 'int index' argument: drivers/media/platform/qcom/camss/camss-csiphy.c:566:44: error: diagnostic behavior may be improved by adding the 'format(printf, 2, 3)' attribute to the declaration of 'csiphy_match_clock_name' [-Werror,-Wmissing-format-attribute] 561 | static bool csiphy_match_clock_name(const char *clock_name, const char *format, | __attribute__((format(printf, 2, 3))) 562 | int index) 563 | { 564 | char name[16]; /* csiphyXXX_timer\0 */ 565 | 566 | snprintf(name, sizeof(name), format, index); | ^ drivers/media/platform/qcom/camss/camss-csiphy.c:561:13: note: 'csiphy_match_clock_name' declared here 561 | static bool csiphy_match_clock_name(const char *clock_name, const char *format, | ^ Change the function to use a snprintf() style format string that allows this to be checked at the call site. Signed-off-by: Arnd Bergmann Reviewed-by: Bryan O'Donoghue Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- drivers/media/platform/qcom/camss/camss-csiphy.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c index 62623393f414..78a1b568dbae 100644 --- a/drivers/media/platform/qcom/camss/camss-csiphy.c +++ b/drivers/media/platform/qcom/camss/camss-csiphy.c @@ -558,12 +558,16 @@ static int csiphy_init_formats(struct v4l2_subdev *sd, return csiphy_set_format(sd, fh ? fh->state : NULL, &format); } -static bool csiphy_match_clock_name(const char *clock_name, const char *format, - int index) +static bool __printf(2, 3) +csiphy_match_clock_name(const char *clock_name, const char *format, ...) { char name[16]; /* csiphyXXX_timer\0 */ + va_list args; + + va_start(args, format); + vsnprintf(name, sizeof(name), format, args); + va_end(args); - snprintf(name, sizeof(name), format, index); return !strcmp(clock_name, name); } -- cgit v1.2.3 From 620b46ed6ae17c8438d889c8c0cfddab36a1476c Mon Sep 17 00:00:00 2001 From: "Harry Yoo (Oracle)" Date: Mon, 27 Apr 2026 16:09:52 +0900 Subject: mm/page_alloc: return NULL early from alloc_frozen_pages_nolock() in NMI on UP On UP kernels (!CONFIG_SMP), spin_trylock() is a no-op that unconditionally succeeds even when the lock is already held. As a result, alloc_frozen_pages_nolock() called from NMI context can re-enter rmqueue() and acquire the zone lock that the interrupted context is already holding, corrupting the freelists. With CONFIG_DEBUG_SPINLOCK on UP, the following BUG is triggered with the slub_kunit test module: BUG: spinlock trylock failure on UP on CPU#0, kunit_try_catch/243 [...] Call Trace: dump_stack_lvl+0x3f/0x60 do_raw_spin_trylock+0x41/0x50 _raw_spin_trylock+0x24/0x50 rmqueue.isra.0+0x2a9/0xa70 get_page_from_freelist+0xeb/0x450 alloc_frozen_pages_nolock_noprof+0x111/0x1e0 allocate_slab+0x42a/0x500 ___slab_alloc+0xa7/0x4c0 kmalloc_nolock_noprof+0x164/0x310 [...] Fix this by returning NULL early when invoked from NMI on a UP kernel. Link: https://lore.kernel.org/linux-mm/ad_cqe51pvr1WaDg@hyeyoo Cc: stable@vger.kernel.org Fixes: d7242af86434 ("mm: Introduce alloc_frozen_pages_nolock()") Signed-off-by: Harry Yoo (Oracle) Link: https://patch.msgid.link/20260427-nolock-api-fix-v2-1-a6b83a92d9a4@kernel.org Signed-off-by: Vlastimil Babka (SUSE) --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 111b54df8a3c..b1b1039287e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7775,6 +7775,11 @@ struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq())) return NULL; + + /* On UP, spin_trylock() always succeeds even when it is locked */ + if (!IS_ENABLED(CONFIG_SMP) && in_nmi()) + return NULL; + if (!pcp_allowed_order(order)) return NULL; -- cgit v1.2.3 From 5b31044e649e3e54c2caef135c09b371c2fbcd08 Mon Sep 17 00:00:00 2001 From: "Harry Yoo (Oracle)" Date: Mon, 27 Apr 2026 16:09:53 +0900 Subject: mm/slab: return NULL early from kmalloc_nolock() in NMI on UP On UP kernels (!CONFIG_SMP), spin_trylock() is a no-op that unconditionally succeeds even when the lock is already held. As a result, kmalloc_nolock() called from NMI context can re-enter the slab allocator and acquire n->list_lock that the interrupted context is already holding, corrupting slab state. With CONFIG_DEBUG_SPINLOCK on UP, the following BUG is triggered with the slub_kunit test module: BUG: spinlock trylock failure on UP on CPU#0, kunit_try_catch/243 [...] Call Trace: dump_stack_lvl+0x3f/0x60 do_raw_spin_trylock+0x41/0x50 _raw_spin_trylock+0x24/0x50 get_from_partial_node+0x120/0x4d0 ___slab_alloc+0x8a/0x4c0 kmalloc_nolock_noprof+0x164/0x310 [...] Fix this by returning NULL early when invoked from NMI on a UP kernel. Link: https://lore.kernel.org/linux-mm/ad_cqe51pvr1WaDg@hyeyoo Cc: stable@vger.kernel.org Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Signed-off-by: Harry Yoo (Oracle) Link: https://patch.msgid.link/20260427-nolock-api-fix-v2-2-a6b83a92d9a4@kernel.org Signed-off-by: Vlastimil Babka (SUSE) --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 161079ac5ba1..0baa906f39ab 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5339,6 +5339,10 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node) if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq())) return NULL; + /* On UP, spin_trylock() always succeeds even when it is locked */ + if (!IS_ENABLED(CONFIG_SMP) && in_nmi()) + return NULL; + retry: if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return NULL; -- cgit v1.2.3 From 1101baca98669833fb3ad2dcd24bc06f9e70e66b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 20 Apr 2026 12:26:44 -0700 Subject: KVM: selftests: Add check_steal_time_uapi() implementation for LoongArch Define check_steal_time_uapi() for LoongArch so that the steal_time test builds. Note, while LoongArch's steal_time_init() has some funky asserts, none of the code is uniquely verifying KVM's uAPI. Cc: Jiakai Xu Cc: Jiakai Xu Cc: Andrew Jones Cc: Anup Patel Cc: Tianrui Zhao Cc: Bibo Mao Cc: Huacai Chen Fixes: 40351ed924dd ("KVM: selftests: Refactor UAPI tests into dedicated function") Signed-off-by: Sean Christopherson Message-ID: <20260420192644.3892050-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/steal_time.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index efe56a10d13e..ea18bbbbd9f9 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -461,6 +461,11 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) ksft_print_msg(" version: %d\n", st->version); ksft_print_msg(" preempted: %d\n", st->preempted); } + +static void check_steal_time_uapi(void) +{ + +} #endif static void *do_steal_time(void *arg) -- cgit v1.2.3 From b560d414239232c6ed7205d3795d3f588034d69b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 10 Apr 2026 09:09:35 +0200 Subject: pinctrl: mediatek: moore: implement gpio_chip::get_direction() If the gpio_chip::get_direction() callback is not implemented by the GPIO controller driver, GPIOLIB emits a warning. Implement get_direction() for the GPIO part of pinctrl-moore. Fixes: 471e998c0e31 ("gpiolib: remove redundant callback check") Fixes: e623c4303ed1 ("gpiolib: sanitize the return value of gpio_chip::get_direction()") Reported-by: Frank Wunderlich Closes: https://lore.kernel.org/all/20260409132724.126258-1-linux@fw-web.de/ Signed-off-by: Bartosz Golaszewski Tested-By: Frank Wunderlich Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-moore.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-moore.c b/drivers/pinctrl/mediatek/pinctrl-moore.c index 70f608347a5f..071ba849e532 100644 --- a/drivers/pinctrl/mediatek/pinctrl-moore.c +++ b/drivers/pinctrl/mediatek/pinctrl-moore.c @@ -520,6 +520,23 @@ static int mtk_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio, return pinctrl_gpio_direction_output(chip, gpio); } +static int mtk_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) +{ + struct mtk_pinctrl *hw = gpiochip_get_data(chip); + const struct mtk_pin_desc *desc; + int ret, dir; + + desc = (const struct mtk_pin_desc *)&hw->soc->pins[offset]; + if (!desc->name) + return -ENOTSUPP; + + ret = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &dir); + if (ret) + return ret; + + return dir ? GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN; +} + static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset) { struct mtk_pinctrl *hw = gpiochip_get_data(chip); @@ -566,6 +583,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw) chip->parent = hw->dev; chip->request = gpiochip_generic_request; chip->free = gpiochip_generic_free; + chip->get_direction = mtk_gpio_get_direction; chip->direction_input = pinctrl_gpio_direction_input; chip->direction_output = mtk_gpio_direction_output; chip->get = mtk_gpio_get; -- cgit v1.2.3 From b51d33ea8a164bb5f0eec8ad817fa9730ac2b577 Mon Sep 17 00:00:00 2001 From: Til Kaiser Date: Mon, 13 Apr 2026 15:52:34 +0200 Subject: pinctrl: qcom: ipq4019: mark gpio as a GPIO pin function The qcom pinctrl core supports marking functions that represent GPIO mode via PINCTRL_GPIO_PINFUNCTION(), so that strict pinmuxing does not reject GPIO requests for pins that are muxed to the GPIO function. ipq4019 still describes its gpio function with QCA_PIN_FUNCTION(gpio), so it is not treated as a GPIO pin function. As a result, GPIO consumers can still conflict with pinctrl states that select the "gpio" function. Add a QCA_GPIO_PIN_FUNCTION() helper and use it for the ipq4019 gpio function, matching how the msm-based qcom drivers handle this. This allows ipq4019 to keep the GPIO-related pin configuration in DTS without tripping over strict pinmux ownership checks. Fixes: cc85cb96e2e4 ("pinctrl: qcom: make the pinmuxing strict") Signed-off-by: Til Kaiser Reviewed-by: Dmitry Baryshkov Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-ipq4019.c | 2 +- drivers/pinctrl/qcom/pinctrl-msm.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c index c5f0decc3eb3..05fdd73b951e 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c @@ -479,7 +479,7 @@ static const struct pinfunction ipq4019_functions[] = { QCA_PIN_FUNCTION(blsp_uart0), QCA_PIN_FUNCTION(blsp_uart1), QCA_PIN_FUNCTION(chip_rst), - QCA_PIN_FUNCTION(gpio), + QCA_GPIO_PIN_FUNCTION(gpio), QCA_PIN_FUNCTION(i2s_rx), QCA_PIN_FUNCTION(i2s_spdif_in), QCA_PIN_FUNCTION(i2s_spdif_out), diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index a4af279f748a..4fbff61de6bb 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -39,6 +39,11 @@ struct pinctrl_pin_desc; fname##_groups, \ ARRAY_SIZE(fname##_groups)) +#define QCA_GPIO_PIN_FUNCTION(fname) \ + [qca_mux_##fname] = PINCTRL_GPIO_PINFUNCTION(#fname, \ + fname##_groups, \ + ARRAY_SIZE(fname##_groups)) + /** * struct msm_pingroup - Qualcomm pingroup definition * @grp: Generic data of the pin group (name and pins) -- cgit v1.2.3 From bfb4dc533d0abaca07013dd71e6b5c6f182232b3 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Fri, 10 Apr 2026 18:11:06 +0800 Subject: xfs: remove the meaningless XFS_ALLOC_FLAG_FREEING In xfs_refcount_finish_one(), there's no need to pass XFS_ALLOC_FLAG_FREEING to xfs_alloc_read_agf(). So remove it. Signed-off-by: Jinliang Zheng Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_refcount.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 40c7f0ff6cf3..0ec6ccd8b4dc 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1414,8 +1414,7 @@ xfs_refcount_finish_one( if (rcur == NULL) { struct xfs_perag *pag = to_perag(ri->ri_group); - error = xfs_alloc_read_agf(pag, tp, - XFS_ALLOC_FLAG_FREEING, &agbp); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) return error; -- cgit v1.2.3 From 00dd8d7ec5253c6273023a0fd6dc08683e0bdfef Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Sat, 11 Apr 2026 15:24:13 +0100 Subject: xfs: zero entire directory data block header region at init xfs_dir3_data_init currently zeroes only the xfs_dir3_blk_hdr portion of the directory data block header, then manually initializes the bestfree entries in a loop. This leaves the pad field in xfs_dir3_data_hdr uninitialized and requires explicit zeroing of each bestfree slot. Zero the entire header region (geo->data_entry_offset bytes) unconditionally before setting individual fields. This covers all current and future header fields, all padding (implicit and explicit), and the bestfree array, so the manual zeroing loop for bestfree can be removed. Suggested-by: Dave Chinner Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_dir2_data.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 80ba94f51e5c..35ff119aa84b 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -728,7 +728,6 @@ xfs_dir3_data_init( struct xfs_dir2_data_unused *dup; struct xfs_dir2_data_free *bf; int error; - int i; /* * Get the buffer set up for the block. @@ -741,13 +740,16 @@ xfs_dir3_data_init( xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF); /* - * Initialize the header. + * Initialize the whole directory header region to zero + * so that all padding, bestfree entries, and any + * future header fields are clean. */ hdr = bp->b_addr; + memset(hdr, 0, geo->data_entry_offset); + if (xfs_has_crc(mp)) { struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - memset(hdr3, 0, sizeof(*hdr3)); hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->owner = cpu_to_be64(args->owner); @@ -759,10 +761,6 @@ xfs_dir3_data_init( bf = xfs_dir2_data_bestfree_p(mp, hdr); bf[0].offset = cpu_to_be16(geo->data_entry_offset); bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset); - for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { - bf[i].length = 0; - bf[i].offset = 0; - } /* * Set up an unused entry for the block's body. -- cgit v1.2.3 From 8fbb1877dfa5e26bda1baf8cc6abd3f805098486 Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Sat, 11 Apr 2026 15:24:14 +0100 Subject: xfs: zero directory data block padding on write verification Old kernels did not zero the pad field in xfs_dir3_data_hdr when initializing directory data blocks, so existing filesystems may have non-zero padding on disk. Zero the pad field in xfs_dir3_data_write_verify alongside the existing LSN and checksum updates. The pad field is pure alignment padding with no runtime meaning, so zeroing it during write verification is safe and has no additional I/O cost. This lets filesystems gradually self-heal stale non-zero padding as directories are modified, without requiring an explicit repair pass. Suggested-by: Dave Chinner Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_dir2_data.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 35ff119aa84b..aecbab61014c 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -382,6 +382,7 @@ xfs_dir3_data_write_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + struct xfs_dir3_data_hdr *datahdr3 = bp->b_addr; xfs_failaddr_t fa; fa = xfs_dir3_data_verify(bp); @@ -396,6 +397,11 @@ xfs_dir3_data_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + /* + * Zero padding that may be stale from old kernels. + */ + datahdr3->pad = 0; + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } -- cgit v1.2.3 From 939919ccddfcc379bb82b1f90f732d9a5cb32cc8 Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Sat, 11 Apr 2026 15:24:15 +0100 Subject: xfs: check directory data block header padding in scrub Add the missing scrub check for the pad field in directory data block headers. Old kernels may have written non-zero padding without issue, and the write path now self-heals stale padding on modification. Flag non-zero padding as an optimization opportunity (preen) rather than corruption. Add xchk_fblock_set_preen helper for reporting file fork block issues that could be optimized. The trace event xchk_fblock_preen already exists. Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/common.c | 11 +++++++++++ fs/xfs/scrub/common.h | 2 ++ fs/xfs/scrub/dir.c | 7 ++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 20e63069088b..3d40cb0b2496 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -251,6 +251,17 @@ xchk_ino_set_preen( trace_xchk_ino_preen(sc, ino, __return_address); } +/* Record a block indexed by a file fork that could be optimized. */ +void +xchk_fblock_set_preen( + struct xfs_scrub *sc, + int whichfork, + xfs_fileoff_t offset) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; + trace_xchk_fblock_preen(sc, whichfork, offset, __return_address); +} + /* Record something being wrong with the filesystem primary superblock. */ void xchk_set_corrupt( diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index f2ecc68538f0..b494d747c008 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -25,6 +25,8 @@ bool xchk_fblock_xref_process_error(struct xfs_scrub *sc, void xchk_block_set_preen(struct xfs_scrub *sc, struct xfs_buf *bp); void xchk_ino_set_preen(struct xfs_scrub *sc, xfs_ino_t ino); +void xchk_fblock_set_preen(struct xfs_scrub *sc, + int whichfork, xfs_fileoff_t offset); void xchk_set_corrupt(struct xfs_scrub *sc); void xchk_block_set_corrupt(struct xfs_scrub *sc, diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index e09724cd3725..09715a4aa154 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -492,7 +492,12 @@ xchk_directory_data_bestfree( goto out; xchk_buffer_recheck(sc, bp); - /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */ + if (xfs_has_crc(sc->mp)) { + struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; + + if (hdr3->pad) + xchk_fblock_set_preen(sc, XFS_DATA_FORK, lblk); + } if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out_buf; -- cgit v1.2.3 From 592975da8c3ca87b043077e6eafa37665eae7936 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Wed, 15 Apr 2026 09:45:14 +1000 Subject: xfs: fix memory leak on error in xfs_alloc_zone_info() Currently, the 0th index of the zi_used_bucket_bitmap array is not freed on error due to the pre-decrement then evaluate semantic of the while loop used in xfs_alloc_zone_info(). Fix it by allowing for the i == 0 case to be covered. Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Cc: stable@vger.kernel.org # v6.15 Reviewed-by: Damien Le Moal Reviewed-by: Carlos Maiolino Signed-off-by: Wilfred Mallawa Reviewed-by: Hans Holmberg Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index a851b98143c0..c64f9ab743a6 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -1217,7 +1217,7 @@ xfs_alloc_zone_info( return zi; out_free_bitmaps: - while (--i > 0) + while (--i >= 0) kvfree(zi->zi_used_bucket_bitmap[i]); kfree(zi); return NULL; -- cgit v1.2.3 From af47a4be6a90c8bfc874f9994ac9c15813b9718b Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Fri, 17 Apr 2026 12:16:30 +1000 Subject: xfs: fix memory leak for data allocated by xfs_zone_gc_data_alloc() In xfs_zone_gc_mount(), on error, a struct xfs_zone_gc_data allocated with xfs_zone_gc_data_alloc() is freed with kfree(), however, this doesn't free the underlying folios or the rmap_irecs. Use xfs_zone_gc_data_free() to correctly free this memory. Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Cc: stable@vger.kernel.org # v6.15 Signed-off-by: Wilfred Mallawa Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index fedcc47048af..c8a1d5c0332c 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -1221,7 +1221,7 @@ out_put_oz: if (data->oz) xfs_open_zone_put(data->oz); out_free_gc_data: - kfree(data); + xfs_zone_gc_data_free(data); return error; } -- cgit v1.2.3 From fca20fcb76a20655daf18738f4a88c638a6bb64c Mon Sep 17 00:00:00 2001 From: Yuto Ohnuki Date: Fri, 10 Apr 2026 18:06:15 +0100 Subject: xfs: check da node block pad field during scrub The da node block header (xfs_da3_node_hdr) contains a __pad32 field that should always be zero. Add a check for this during directory and attribute btree scrubbing. Since old kernels may have written non-zero padding without issues, flag this as an optimization opportunity (preen) rather than corruption. Signed-off-by: Yuto Ohnuki Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/dabtree.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 1a71d36898b1..c2d6ad59d03e 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -454,7 +454,12 @@ xchk_da_btree_block( } } - /* XXX: Check hdr3.pad32 once we know how to fix it. */ + if (xfs_has_crc(ip->i_mount)) { + struct xfs_da3_node_hdr *nodehdr3 = blk->bp->b_addr; + + if (nodehdr3->__pad32) + xchk_da_set_preen(ds, level); + } break; default: xchk_da_set_corrupt(ds, level); -- cgit v1.2.3 From 86637727c11a105499e9faa38f3422dfcf4d211d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 6 Jan 2026 18:09:51 +0100 Subject: arm64: dts: renesas: r8a78000: Fix SCIF brg_int clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the documentation, the internal clock input for the BRG is SGASYNCD4_PERW_BUSφ. Fixes: c13a643e2c491f5b ("arm64: dts: renesas: Add R8A78000 SoC support") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/459d360a8332f92b3766b30814e7e1c76169aaf7.1767719254.git.geert+renesas@glider.be --- arch/arm64/boot/dts/renesas/r8a78000.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a78000.dtsi b/arch/arm64/boot/dts/renesas/r8a78000.dtsi index 3e1c98903cea..3ec1b53d2782 100644 --- a/arch/arm64/boot/dts/renesas/r8a78000.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a78000.dtsi @@ -699,7 +699,7 @@ "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0700000 0 0x40>; interrupts = ; - clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; + clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; }; @@ -709,7 +709,7 @@ "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0704000 0 0x40>; interrupts = ; - clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; + clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; }; @@ -719,7 +719,7 @@ "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc0708000 0 0x40>; interrupts = ; - clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; + clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; }; @@ -729,7 +729,7 @@ "renesas,rcar-gen5-scif", "renesas,scif"; reg = <0 0xc070c000 0 0x40>; interrupts = ; - clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd16>, <&scif_clk>; + clocks = <&dummy_clk_sgasyncd16>, <&dummy_clk_sgasyncd4>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; status = "disabled"; }; -- cgit v1.2.3 From d289b5f56ab7fe939dc5bfc87c856b46fe5def38 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 Mar 2026 05:23:58 +0100 Subject: arm64: dts: renesas: draak/ebisu-panel: Fix missing cells and reg in DTO Add missing cells and reg DT property in the Draak/Ebisu panel DTO to fix the following DTC W=1 warning: arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso:30.10-34.5: Warning (unit_address_vs_reg): /fragment@2/__overlay__/ports/port@1: node has a unit name, but no reg or ranges property Signed-off-by: Marek Vasut Reviewed-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260326042411.215241-2-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso b/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso index 258f8668ca36..90767d74e21b 100644 --- a/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso +++ b/arch/arm64/boot/dts/renesas/draak-ebisu-panel-aa104xd12.dtso @@ -27,7 +27,12 @@ status = "okay"; ports { + #address-cells = <1>; + #size-cells = <0>; + port@1 { + reg = <1>; + lvds1_out: endpoint { remote-endpoint = <&panel_in>; }; -- cgit v1.2.3 From 2016dde0685a091002851df8005757150a0e9350 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 Mar 2026 05:23:59 +0100 Subject: arm64: dts: renesas: salvator-panel: Fix missing cells and reg in DTO Add missing cells and reg DT property in the Salvator-X panel DTO to fix the following DTC W=1 warning: arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso:30.10-34.5: Warning (unit_address_vs_reg): /fragment@2/__overlay__/ports/port@1: node has a unit name, but no reg or ranges property Signed-off-by: Marek Vasut Reviewed-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260326042411.215241-3-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso b/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso index c83a30adc6ad..7807c3f80409 100644 --- a/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso +++ b/arch/arm64/boot/dts/renesas/salvator-panel-aa104xd12.dtso @@ -27,7 +27,12 @@ status = "okay"; ports { + #address-cells = <1>; + #size-cells = <0>; + port@1 { + reg = <1>; + lvds0_out: endpoint { remote-endpoint = <&panel_in>; }; -- cgit v1.2.3 From 25b113f187bf07f8caa3f40a96e7ec6de850767e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 Mar 2026 05:24:00 +0100 Subject: arm64: dts: renesas: rz-smarc-cru-csi-ov5645: Fix missing cells and reg in CSI2 subnode Add missing cells and reg DT property in the CSI2 subnode to fix the following DTC W=1 warning: arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi:49.10-55.5: Warning (unit_address_vs_reg): /fragment@2/__overlay__/ports/port@0: node has a unit name, but no reg or ranges property Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20260326042411.215241-4-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi index 4d2b0655859a..3feffa4f16a9 100644 --- a/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi +++ b/arch/arm64/boot/dts/renesas/rz-smarc-cru-csi-ov5645.dtsi @@ -46,7 +46,12 @@ status = "okay"; ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + csi2_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; -- cgit v1.2.3 From ca743e8ac2b41c295d5ee12ed231fccb52161a0b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 Mar 2026 05:24:01 +0100 Subject: arm64: dts: renesas: rz-smarc-du-adv7513-smarc: Fix missing cells and reg in DU subnode Add missing cells and reg DT property in the DU subnode to fix the following DTC W=1 warning: arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi:29.10-33.5: Warning (unit_address_vs_reg): /fragment@1/__overlay__/ports/port@0: node has a unit name, but no reg or ranges property Signed-off-by: Marek Vasut Reviewed-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260326042411.215241-5-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi b/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi index 36707576030d..f5412578ee65 100644 --- a/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi +++ b/arch/arm64/boot/dts/renesas/rz-smarc-du-adv7513.dtsi @@ -26,7 +26,12 @@ status = "okay"; ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + du_out_rgb: endpoint { remote-endpoint = <&adv7513_in>; }; -- cgit v1.2.3 From 1ca2d1af3826a6de6fd300f9b122d10d21a64266 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Mar 2026 00:42:06 +0100 Subject: ARM: dts: renesas: r8a7778: Add missing unit address to bus node Add missing unit address to bus node to fix the following DTC W=1 warning: arch/arm/boot/dts/renesas/r8a7778.dtsi:43.12-48.4: Warning (unit_address_vs_reg): /bus: node has a reg or ranges property, but no unit name Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260327234244.91707-2-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm/boot/dts/renesas/r8a7778.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/renesas/r8a7778.dtsi b/arch/arm/boot/dts/renesas/r8a7778.dtsi index 859dd29dfce3..7db456b19795 100644 --- a/arch/arm/boot/dts/renesas/r8a7778.dtsi +++ b/arch/arm/boot/dts/renesas/r8a7778.dtsi @@ -40,7 +40,7 @@ spi2 = &hspi2; }; - lbsc: bus { + lbsc: bus@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From fd62c046cdc8fb8b1b3e358e791317b70bbc1269 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Mar 2026 00:42:07 +0100 Subject: ARM: dts: renesas: r8a7779: Add missing unit address to bus node Add missing unit address to bus node to fix the following DTC W=1 warning: arch/arm/boot/dts/renesas/r8a7779.dtsi:707.12-712.4: Warning (unit_address_vs_reg): /bus: node has a reg or ranges property, but no unit name Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260327234244.91707-3-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm/boot/dts/renesas/r8a7779.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/renesas/r8a7779.dtsi b/arch/arm/boot/dts/renesas/r8a7779.dtsi index e437c22f452d..9e8a7e190c89 100644 --- a/arch/arm/boot/dts/renesas/r8a7779.dtsi +++ b/arch/arm/boot/dts/renesas/r8a7779.dtsi @@ -704,7 +704,7 @@ }; }; - lbsc: bus { + lbsc: bus@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 78c459d057e970401f59781c73e1523bc1dec51f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Mar 2026 00:42:08 +0100 Subject: ARM: dts: renesas: r8a7792: Add missing unit address to bus node Add missing unit address to bus node to fix the following DTC W=1 warning: arch/arm/boot/dts/renesas/r8a7792.dtsi:89.12-94.4: Warning (unit_address_vs_reg): /bus: node has a reg or ranges property, but no unit name Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260327234244.91707-4-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm/boot/dts/renesas/r8a7792.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/renesas/r8a7792.dtsi b/arch/arm/boot/dts/renesas/r8a7792.dtsi index 9e0de69ac3a3..fbdbcff1cbed 100644 --- a/arch/arm/boot/dts/renesas/r8a7792.dtsi +++ b/arch/arm/boot/dts/renesas/r8a7792.dtsi @@ -86,7 +86,7 @@ bootph-all; }; - lbsc: bus { + lbsc: bus@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From c5f21e57e7582572dbb2eed4eaa041cad5694c90 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Mar 2026 00:42:09 +0100 Subject: ARM: dts: renesas: r7s72100: Add missing unit address to bus node Add missing unit address to bus node to fix the following DTC W=1 warning: arch/arm/boot/dts/renesas/r7s72100.dtsi:40.11-46.4: Warning (unit_address_vs_reg): /bus: node has a reg or ranges property, but no unit name Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260327234244.91707-5-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm/boot/dts/renesas/r7s72100.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/renesas/r7s72100.dtsi b/arch/arm/boot/dts/renesas/r7s72100.dtsi index 245c26bb8e03..6ec57ffa72e8 100644 --- a/arch/arm/boot/dts/renesas/r7s72100.dtsi +++ b/arch/arm/boot/dts/renesas/r7s72100.dtsi @@ -37,7 +37,7 @@ clock-div = <3>; }; - bsc: bus { + bsc: bus@0 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From 714e1d6bba0e0abe5c87c8e189a35fa690540df4 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Mar 2026 00:42:10 +0100 Subject: ARM: dts: renesas: genmai: Drop superfluous cells Drop superfluous address-cells and size-cells to fix DTC W=1 warning: arch/arm/boot/dts/renesas/r7s72100-genmai.dts:28.17-55.4: Warning (avoid_unnecessary_addr_size): /flash@18000000: unnecessary #address-cells/#size-cells without "ranges", "dma-ranges" or child "reg" or "ranges" property Signed-off-by: Marek Vasut Fixes: 30e0a8cf886cb459 ("ARM: dts: renesas: genmai: Add FLASH nodes") Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260327234244.91707-6-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm/boot/dts/renesas/r7s72100-genmai.dts | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts index 3c3756509714..da552a66615e 100644 --- a/arch/arm/boot/dts/renesas/r7s72100-genmai.dts +++ b/arch/arm/boot/dts/renesas/r7s72100-genmai.dts @@ -34,9 +34,6 @@ clocks = <&mstp9_clks R7S72100_CLK_SPIBSC0>; power-domains = <&cpg_clocks>; - #address-cells = <1>; - #size-cells = <1>; - partitions { compatible = "fixed-partitions"; #address-cells = <1>; -- cgit v1.2.3 From ab83176d3cf1cf1c1f6e604432905bda4515d17f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 28 Mar 2026 00:42:11 +0100 Subject: ARM: dts: renesas: rskrza1: Drop superfluous cells Drop superfluous address-cells and size-cells to fix DTC W=1 warning: arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts:32.17-72.4: Warning (avoid_unnecessary_addr_size): /flash@18000000: unnecessary #address-cells/#size-cells without "ranges", "dma-ranges" or child "reg" or "ranges" property Signed-off-by: Marek Vasut Fixes: 98537eb77d3ef185 ("ARM: dts: renesas: rskrza1: Add FLASH nodes") Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260327234244.91707-7-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts index 91178fb9e721..3306bc9b7bc3 100644 --- a/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts +++ b/arch/arm/boot/dts/renesas/r7s72100-rskrza1.dts @@ -36,8 +36,6 @@ power-domains = <&cpg_clocks>; bank-width = <4>; device-width = <1>; - #address-cells = <1>; - #size-cells = <1>; partitions { compatible = "fixed-partitions"; -- cgit v1.2.3 From d6cdab742c0548b5ce3309da108bbf7a1fc6f68e Mon Sep 17 00:00:00 2001 From: Tommaso Merciai Date: Tue, 7 Apr 2026 17:34:28 +0200 Subject: arm64: dts: renesas: r9a09g057: Add #mux-state-cells to usb2{0,1}phyrst The renesas,rzv2h-usb2phy-reset binding schema defines #mux-state-cells as a required property. Add it to the usb20phyrst and usb21phyrst nodes to fix the following warnings: arch/arm64/boot/dts/renesas/r9a09g057h44-rzv2h-evk.dtb: usb20phy-reset@15830000 (renesas,r9a09g057-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g057h44-rzv2h-evk.dtb: usb21phy-reset@15840000 (renesas,r9a09g057-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g057h44-rzv2h-evk-cn15-emmc.dtb: usb20phy-reset@15830000 (renesas,r9a09g057-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g057h44-rzv2h-evk-cn15-emmc.dtb: usb21phy-reset@15840000 (renesas,r9a09g057-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g057h44-rzv2h-evk-cn15-sd.dtb: usb20phy-reset@15830000 (renesas,r9a09g057-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g057h44-rzv2h-evk-cn15-sd.dtb: usb21phy-reset@15840000 (renesas,r9a09g057-usb2phy-reset): '#mux-state-cells' is a required property Fixes: 6a1b6f7e56dc ("dt-bindings: reset: renesas,rzv2h-usb2phy: Add '#mux-state-cells' property") Signed-off-by: Tommaso Merciai Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/22fb9a500cdbc3272dc23cd5e36bca5fbbec75fc.1775575276.git.tommaso.merciai.xr@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r9a09g057.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi index 9581af58024e..6f6fe5f36bef 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g057.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g057.dtsi @@ -1345,6 +1345,7 @@ resets = <&cpg 0xaf>; power-domains = <&cpg>; #reset-cells = <0>; + #mux-state-cells = <1>; status = "disabled"; }; @@ -1355,6 +1356,7 @@ resets = <&cpg 0xaf>; power-domains = <&cpg>; #reset-cells = <0>; + #mux-state-cells = <1>; status = "disabled"; }; -- cgit v1.2.3 From 7e070a14beaf036588f164575bbaf7011dd26285 Mon Sep 17 00:00:00 2001 From: Tommaso Merciai Date: Tue, 7 Apr 2026 17:34:29 +0200 Subject: arm64: dts: renesas: r9a09g056: Add #mux-state-cells to usb20phyrst The renesas,rzv2h-usb2phy-reset binding schema defines #mux-state-cells as a required property. Add it to the usb20phyrst node to fix the following warnings: arch/arm64/boot/dts/renesas/r9a09g056n48-rzv2n-evk.dtb: usb20phy-reset@15830000 (renesas,r9a09g056-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g056n48-rzv2n-evk-cn15-emmc.dtb: usb20phy-reset@15830000 (renesas,r9a09g056-usb2phy-reset): '#mux-state-cells' is a required property arch/arm64/boot/dts/renesas/r9a09g056n48-rzv2n-evk-cn15-sd.dtb: usb20phy-reset@15830000 (renesas,r9a09g056-usb2phy-reset): '#mux-state-cells' is a required property Fixes: 6a1b6f7e56dc ("dt-bindings: reset: renesas,rzv2h-usb2phy: Add '#mux-state-cells' property") Signed-off-by: Tommaso Merciai Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/31210e05f7189b466b30eedbdda3d11726dac279.1775575276.git.tommaso.merciai.xr@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r9a09g056.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/renesas/r9a09g056.dtsi b/arch/arm64/boot/dts/renesas/r9a09g056.dtsi index 40525470194e..7ccddd6a4a9a 100644 --- a/arch/arm64/boot/dts/renesas/r9a09g056.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a09g056.dtsi @@ -1327,6 +1327,7 @@ resets = <&cpg 0xaf>; power-domains = <&cpg>; #reset-cells = <0>; + #mux-state-cells = <1>; status = "disabled"; }; -- cgit v1.2.3 From 0cfe660559e857d7c00ab86c73e4510ce069086f Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 24 Apr 2026 15:39:00 -0400 Subject: KVM: s390: pci: Fix aisb calculation The current implementation of aisb calculation will erroneously index via an unsigned long * as well as multiply by 8B for every 64-bits in the offset; only one or the other is required. This throws off aisb calculations once the number of devices exceeds 64, and can result in out-of-bounds access as well as failure to indicate summary bits associated with those devices in guests. Fix this by converting to a physical address before applying the offset, as is already done in arch/s390/pci/pci_irq.c. Fixes: 3c5a1b6f0a18 ("KVM: s390: pci: provide routines for enabling/disabling interrupt forwarding") Signed-off-by: Matthew Rosato Reviewed-by: Niklas Schnelle Signed-off-by: Christian Borntraeger --- arch/s390/kvm/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index eed45af1a92d..5b075c38998e 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -166,7 +166,7 @@ static int kvm_zpci_set_airq(struct zpci_dev *zdev) fib.fmt0.noi = airq_iv_end(zdev->aibv); fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector); fib.fmt0.aibvo = 0; - fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); + fib.fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8; fib.fmt0.aisbo = zdev->aisb & 63; fib.gd = zdev->gisa; @@ -308,7 +308,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, /* Update guest FIB for re-issue */ fib->fmt0.aisbo = zdev->aisb & 63; - fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); + fib->fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8; fib->fmt0.isc = gisc; /* Save some guest fib values in the host for later use */ -- cgit v1.2.3 From 6dba9b7268cc50166bce47608670192fd874e363 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 28 Mar 2026 09:05:45 +0000 Subject: pinctrl: renesas: rzg2l: Fix incorrect PUPD register offset for high pins during suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When saving/restoring pull-up/down register state during suspend/resume, the second PUPD register access was incorrectly using the same base offset as the first, effectively reading/writing the same register twice instead of the adjacent one. Add the correct + 4 byte offset to the second RZG2L_PCTRL_REG_ACCESS32 call so that pupd[1][port] is properly saved and restored from the next 32-bit register in the PUPD register pair, covering pins 4–7 of ports with 4 or more pins. Fixes: b2bd65fbb617 ("pinctrl: renesas: rzg2l: Add suspend/resume support for pull up/down") Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260328090548.84124-1-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 561e6018fd89..68b94c748f53 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -3049,7 +3049,7 @@ static void rzg2l_pinctrl_pm_setup_regs(struct rzg2l_pinctrl *pctrl, bool suspen RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + PUPD(off), cache->pupd[0][port]); if (pincnt >= 4) { - RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + PUPD(off), + RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + PUPD(off) + 4, cache->pupd[1][port]); } } -- cgit v1.2.3 From c88ab9407986836820848128ce1f90f2fa49da95 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 13 Apr 2026 19:24:51 +0100 Subject: pinctrl: renesas: rzg2l: Fix SMT register cache handling Store SMT register cache per bank instead of using a single array. On RZ/V2H(P), RZ/V2N, and RZ/G3E, the SMT register is split across two 32-bit registers: bits 0/8/16/24 control pins 0-3, while pins 4-7 are controlled by the corresponding bits in the next register. The previous implementation cached only a single SMT register, leading to incomplete save/restore of SMT state. Convert cache->smt to a per-bank array and allocate storage for both halves. Update suspend/resume handling to save and restore both SMT registers when present. Fixes: 837afa592c623 ("pinctrl: renesas: rzg2l: Add suspend/resume support for Schmitt control registers") Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260413182456.811543-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 68b94c748f53..1c6b115e65d8 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -335,7 +335,7 @@ struct rzg2l_pinctrl_reg_cache { u32 *iolh[2]; u32 *ien[2]; u32 *pupd[2]; - u32 *smt; + u32 *smt[2]; u8 sd_ch[2]; u8 eth_poc[2]; u8 oen; @@ -2737,10 +2737,6 @@ static int rzg2l_pinctrl_reg_cache_alloc(struct rzg2l_pinctrl *pctrl) if (!cache->pfc) return -ENOMEM; - cache->smt = devm_kcalloc(pctrl->dev, nports, sizeof(*cache->smt), GFP_KERNEL); - if (!cache->smt) - return -ENOMEM; - for (u8 i = 0; i < 2; i++) { u32 n_dedicated_pins = pctrl->data->n_dedicated_pins; @@ -2759,6 +2755,11 @@ static int rzg2l_pinctrl_reg_cache_alloc(struct rzg2l_pinctrl *pctrl) if (!cache->pupd[i]) return -ENOMEM; + cache->smt[i] = devm_kcalloc(pctrl->dev, nports, sizeof(*cache->smt[i]), + GFP_KERNEL); + if (!cache->smt[i]) + return -ENOMEM; + /* Allocate dedicated cache. */ dedicated_cache->iolh[i] = devm_kcalloc(pctrl->dev, n_dedicated_pins, sizeof(*dedicated_cache->iolh[i]), @@ -3066,8 +3067,14 @@ static void rzg2l_pinctrl_pm_setup_regs(struct rzg2l_pinctrl *pctrl, bool suspen } } - if (has_smt) - RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + SMT(off), cache->smt[port]); + if (has_smt) { + RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + SMT(off), + cache->smt[0][port]); + if (pincnt >= 4) { + RZG2L_PCTRL_REG_ACCESS32(suspend, pctrl->base + SMT(off) + 4, + cache->smt[1][port]); + } + } } } -- cgit v1.2.3 From 3d4c2268bd7243c3780fe32bf24ff876da272acf Mon Sep 17 00:00:00 2001 From: Ashutosh Desai Date: Mon, 20 Apr 2026 01:36:37 +0000 Subject: drm/gem: Fix inconsistent plane dimension calculation in drm_gem_fb_init_with_funcs() drm_gem_fb_init_with_funcs() computes sub-sampled plane dimensions using plain integer division: unsigned int width = mode_cmd->width / (i ? info->hsub : 1); unsigned int height = mode_cmd->height / (i ? info->vsub : 1); However, the ioctl-level framebuffer_check() in drm_framebuffer.c uses drm_format_info_plane_width/height() which round up dimensions via DIV_ROUND_UP(). This inconsistency corrupts the subsequent GEM object size check for certain pixel format and dimension combinations. For example, with NV12 (vsub=2) and a 1-pixel-tall framebuffer the GEM size validation path sees height=0 instead of height=1. The expression (height - 1) then wraps to UINT_MAX as an unsigned int, causing min_size to overflow and wrap back to a small value. A tiny GEM object therefore passes the size guard, yet when the GPU accesses the chroma plane it will read or write memory beyond the object's bounds. Fix by replacing the open-coded divisions with drm_format_info_plane_width() and drm_format_info_plane_height(), which use DIV_ROUND_UP() and match the calculation already used in framebuffer_check(). Fixes: 4c3dbb2c312c ("drm: Add GEM backed framebuffer library") Cc: stable@vger.kernel.org # v4.14+ Reviewed-by: Thomas Zimmermann Signed-off-by: Ashutosh Desai Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260420013637.457751-1-ashutoshdesai993@gmail.com --- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 9166c353f131..88808e972cc1 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -172,8 +172,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, } for (i = 0; i < info->num_planes; i++) { - unsigned int width = mode_cmd->width / (i ? info->hsub : 1); - unsigned int height = mode_cmd->height / (i ? info->vsub : 1); + unsigned int width = drm_format_info_plane_width(info, mode_cmd->width, i); + unsigned int height = drm_format_info_plane_height(info, mode_cmd->height, i); unsigned int min_size; objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]); -- cgit v1.2.3 From 4aa8110000b0d215deef8eed283565dd0c1def88 Mon Sep 17 00:00:00 2001 From: Yuho Choi Date: Sun, 19 Apr 2026 20:25:13 -0400 Subject: drm/sysfb: ofdrm: fix PCI device reference leaks display_get_pci_dev_of() gets a referenced PCI device via pci_get_device(). Drop that reference when pci_enable_device() fails and release it during the managed teardown path after pci_disable_device(). Without that, ofdrm leaks the pci_dev reference on both the error path and the normal cleanup path. Fixes: c8a17756c425 ("drm/ofdrm: Add ofdrm for Open Firmware framebuffers") Co-developed-by: Myeonghun Pak Signed-off-by: Myeonghun Pak Co-developed-by: Ijae Kim Signed-off-by: Ijae Kim Co-developed-by: Taegyu Kim Signed-off-by: Taegyu Kim Signed-off-by: Yuho Choi Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260420002513.216-1-dbgh9129@gmail.com --- drivers/gpu/drm/sysfb/ofdrm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sysfb/ofdrm.c b/drivers/gpu/drm/sysfb/ofdrm.c index d38ba70f4e0d..247cf13c80a0 100644 --- a/drivers/gpu/drm/sysfb/ofdrm.c +++ b/drivers/gpu/drm/sysfb/ofdrm.c @@ -350,6 +350,7 @@ static void ofdrm_pci_release(void *data) struct pci_dev *pcidev = data; pci_disable_device(pcidev); + pci_dev_put(pcidev); } static int ofdrm_device_init_pci(struct ofdrm_device *odev) @@ -375,6 +376,7 @@ static int ofdrm_device_init_pci(struct ofdrm_device *odev) if (ret) { drm_err(dev, "pci_enable_device(%s) failed: %d\n", dev_name(&pcidev->dev), ret); + pci_dev_put(pcidev); return ret; } ret = devm_add_action_or_reset(&pdev->dev, ofdrm_pci_release, pcidev); -- cgit v1.2.3 From 87e63466c9fc30c3d95b8741c3df1f1ff01d7f23 Mon Sep 17 00:00:00 2001 From: Ravi Singh Date: Wed, 22 Apr 2026 07:39:59 +0000 Subject: xfs: flush delalloc blocks on ENOSPC in xfs_trans_alloc_icreate xfs_trans_alloc_icreate() can fail with ENOSPC when delalloc reservations have consumed most of the available block count (fdblocks). xfs_trans_alloc() already retries internally with xfs_blockgc_flush_all(), but that only trims post-EOF speculative preallocation and may not free enough space for the transaction reservation. Add a retry with xfs_flush_inodes() when xfs_trans_alloc() returns ENOSPC. This forces writeback of all dirty inodes via sync_inodes_sb(), converting delalloc reservations to real allocations and freeing the over-reserved portion back to fdblocks. This fixes all callers of xfs_trans_alloc_icreate() and removes the existing caller-level retry from xfs_create(), which is now handled centrally. Signed-off-by: Ravi Singh Reviewed-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_inode.c | 6 ------ fs/xfs/xfs_trans.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index beaa26ec62da..9978ac1422fc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -699,12 +699,6 @@ xfs_create( */ error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, &tp); - if (error == -ENOSPC) { - /* flush outstanding delalloc blocks and retry */ - xfs_flush_inodes(mp); - error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, - resblks, &tp); - } if (error) goto out_parent; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index bcc470f56e46..148cc32449c1 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1199,10 +1199,21 @@ xfs_trans_alloc_icreate( { struct xfs_trans *tp; bool retried = false; + bool flushed = false; int error; retry: error = xfs_trans_alloc(mp, resv, dblocks, 0, 0, &tp); + if (error == -ENOSPC && !flushed) { + /* + * Flush all delalloc blocks to reclaim space from speculative + * preallocation. This is similar to the quota retry below + * but targets FS-wide ENOSPC. + */ + xfs_flush_inodes(mp); + flushed = true; + goto retry; + } if (error) return error; -- cgit v1.2.3 From aaaa684bab1f6d9ecfc49db328facb1771fd0eb2 Mon Sep 17 00:00:00 2001 From: Sasha Finkelstein Date: Mon, 20 Apr 2026 14:17:43 +0200 Subject: drm/appletbdrm: Use kvzalloc for big allocations This driver is attached to a ~2000x80 screen, which is a lot more than a single page. This causes out of memory errors in some rare cases. Reported-by: soopyc Closes: https://github.com/t2linux/fedora/issues/51 Signed-off-by: Sasha Finkelstein Signed-off-by: Thomas Zimmermann Reviewed-by: Aditya Garg Reviewed-by: Thomas Zimmermann Fixes: 0670c2f56e45 ("drm/tiny: add driver for Apple Touch Bars in x86 Macs") Cc: # v6.15+ Link: https://patch.msgid.link/20260420-x86-tb-vmalloc-v1-1-7757ff657223@chaosmail.tech --- drivers/gpu/drm/tiny/appletbdrm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c index 3bae91d7eefe..278bb23fe4c8 100644 --- a/drivers/gpu/drm/tiny/appletbdrm.c +++ b/drivers/gpu/drm/tiny/appletbdrm.c @@ -353,7 +353,7 @@ static int appletbdrm_primary_plane_helper_atomic_check(struct drm_plane *plane, frames_size + sizeof(struct appletbdrm_fb_request_footer), 16); - appletbdrm_state->request = kzalloc(request_size, GFP_KERNEL); + appletbdrm_state->request = kvzalloc(request_size, GFP_KERNEL); if (!appletbdrm_state->request) return -ENOMEM; @@ -543,7 +543,7 @@ static void appletbdrm_primary_plane_destroy_state(struct drm_plane *plane, { struct appletbdrm_plane_state *appletbdrm_state = to_appletbdrm_plane_state(state); - kfree(appletbdrm_state->request); + kvfree(appletbdrm_state->request); kfree(appletbdrm_state->response); __drm_gem_destroy_shadow_plane_state(&appletbdrm_state->base); -- cgit v1.2.3 From 9d5a2b8f6281f6090002517fb9272ea07038afe8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Apr 2026 09:48:32 +0200 Subject: drm/color-mgmt: Typo s/R332/RGB332/ Fix a typo of "RGB332" in kerneldoc for the drm_crtc_fill_palette_332() helper. Fixes: 7ff61177b7116825 ("drm/color-mgmt: Prepare for RGB332 palettes") Signed-off-by: Geert Uytterhoeven Reviewed-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/c413e45c8f752a532a4ff377f7a8b9eaab4a082a.1776757681.git.geert+renesas@glider.be --- drivers/gpu/drm/drm_color_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index c598b99673fc..e7db4e4ea700 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -831,7 +831,7 @@ static void fill_palette_332(struct drm_crtc *crtc, u16 r, u16 g, u16 b, } /** - * drm_crtc_fill_palette_332 - Programs a default palette for R332-like formats + * drm_crtc_fill_palette_332 - Programs a default palette for RGB332-like formats * @crtc: The displaying CRTC * @set_palette: Callback for programming the hardware gamma LUT * -- cgit v1.2.3 From 163f6494233e1679ec6fa6a4803f74ae7b1c94db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Apr 2026 12:38:30 +0200 Subject: ata: pata_parport: switch to dynamic root device Driver core expects devices to be dynamically allocated and will, for example, complain loudly when no release function has been provided. Use root_device_register() to allocate and register the root device instead of open coding using a static device. Note that this also fixes a reference leak in the unlikely event that device_register() ever fails. Signed-off-by: Johan Hovold Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/pata_parport/pata_parport.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index a5b959891cb7..40baeac594a9 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -459,19 +459,11 @@ static void pata_parport_dev_release(struct device *dev) kfree(pi); } -static void pata_parport_bus_release(struct device *dev) -{ - /* nothing to do here but required to avoid warning on device removal */ -} - static const struct bus_type pata_parport_bus_type = { .name = DRV_NAME, }; -static struct device pata_parport_bus = { - .init_name = DRV_NAME, - .release = pata_parport_bus_release, -}; +static struct device *pata_parport_bus; static const struct scsi_host_template pata_parport_sht = { PATA_PARPORT_SHT("pata_parport") @@ -518,7 +510,7 @@ static struct pi_adapter *pi_init_one(struct parport *parport, } /* set up pi->dev before pi_probe_unit() so it can use dev_printk() */ - pi->dev.parent = &pata_parport_bus; + pi->dev.parent = pata_parport_bus; pi->dev.bus = &pata_parport_bus_type; pi->dev.driver = &pr->driver; pi->dev.release = pata_parport_dev_release; @@ -780,8 +772,9 @@ static __init int pata_parport_init(void) return error; } - error = device_register(&pata_parport_bus); - if (error) { + pata_parport_bus = root_device_register(DRV_NAME); + if (IS_ERR(pata_parport_bus)) { + error = PTR_ERR(pata_parport_bus); pr_err("failed to register pata_parport bus, error: %d\n", error); goto out_unregister_bus; } @@ -811,7 +804,7 @@ out_remove_del: out_remove_new: bus_remove_file(&pata_parport_bus_type, &bus_attr_new_device); out_unregister_dev: - device_unregister(&pata_parport_bus); + root_device_unregister(pata_parport_bus); out_unregister_bus: bus_unregister(&pata_parport_bus_type); return error; @@ -822,7 +815,7 @@ static __exit void pata_parport_exit(void) parport_unregister_driver(&pata_parport_driver); bus_remove_file(&pata_parport_bus_type, &bus_attr_new_device); bus_remove_file(&pata_parport_bus_type, &bus_attr_delete_device); - device_unregister(&pata_parport_bus); + root_device_unregister(pata_parport_bus); bus_unregister(&pata_parport_bus_type); } -- cgit v1.2.3 From 2454bd74cb989365edda476c4bd1c4e90eacf568 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Fri, 24 Apr 2026 17:59:14 +0000 Subject: MAINTAINERS, mailmap: update Aditya Garg's email address My Outlook email address often sends emails from kernel devs to the junk folder. Also, emails from some addresses (eg suse.de) are not received at all. Update the email to my alternate Proton Mail address. Signed-off-by: Aditya Garg Acked-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260424175846.15103-1-gargaditya08@proton.me --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 34acd34bbf9b..35be6d46bde3 100644 --- a/.mailmap +++ b/.mailmap @@ -19,6 +19,7 @@ Abhinav Kumar Ahmad Masri Adam Oldham Adam Radford +Aditya Garg Adriana Reus Adrian Bunk Ajay Kaher diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..a86040ecad91 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7873,7 +7873,7 @@ F: drivers/gpu/drm/sun4i/sun8i* DRM DRIVER FOR APPLE TOUCH BARS M: Aun-Ali Zaidi -M: Aditya Garg +M: Aditya Garg L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git -- cgit v1.2.3 From 711a9c018ad252b2807f85d44e1267b595644f9b Mon Sep 17 00:00:00 2001 From: Rio Liu Date: Wed, 15 Apr 2026 16:57:13 +0000 Subject: wifi: mac80211: skip ieee80211_verify_sta_ht_mcs_support check in non-strict mode Some Xfinity XB8 firmware advertises >1 spatial stream MCS indexes in their basic HT-MCS set. On cards with lower spatial streams, the check would fail, and we'd be stuck with no HT when in fact work fine with its own supported rate. This change makes it so the check is only performed in strict mode. Fixes: 574faa0e936d ("wifi: mac80211: add HT and VHT basic set verification") Signed-off-by: Rio Liu Link: https://patch.msgid.link/99Mv9QEceyPrQhSP52MtAVmz0_kWJmzqotJjD9YW6LGLqk-AZloAueUyHCURilFkuqOh6Ecv8i2KKdSE1ujP3AnbU5QEouVisT1w_V3xdfc=@r26.me Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 160ae65a5c64..298ebff6bbf8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -437,6 +437,15 @@ ieee80211_verify_sta_ht_mcs_support(struct ieee80211_sub_if_data *sdata, memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + /* + * Some Xfinity XB8 firmware advertises >1 spatial stream MCS indexes in + * their basic HT-MCS set. On cards with lower spatial streams, the check + * would fail, and we'd be stuck with no HT when it in fact work fine with + * its own supported rate. So check it only in strict mode. + */ + if (!ieee80211_hw_check(&sdata->local->hw, STRICT)) + return true; + /* * P802.11REVme/D7.0 - 6.5.4.2.4 * ... -- cgit v1.2.3 From c623b63580880cc742255eaed3d79804c1b91143 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 16 Apr 2026 11:33:39 +0200 Subject: wifi: brcmfmac: Fix potential use-after-free issue when stopping watchdog task Watchdog task might end between send_sig() and kthread_stop() calls, what results in the use-after-free issue. Fix this by increasing watchdog task reference count before calling send_sig() and dropping it by switching to kthread_stop_put(). Cc: stable@vger.kernel.org Fixes: 373c83a801f1 ("brcmfmac: stop watchdog before detach and free everything") Fixes: a9ffda88be74 ("brcm80211: fmac: abstract bus_stop interface function pointer") Signed-off-by: Marek Szyprowski Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260416093339.2066829-1-m.szyprowski@samsung.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 30f6fcb68632..8fb595733b9c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -2476,8 +2476,9 @@ static void brcmf_sdio_bus_stop(struct device *dev) brcmf_dbg(TRACE, "Enter\n"); if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } @@ -4567,8 +4568,9 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) if (bus) { /* Stop watchdog task */ if (bus->watchdog_tsk) { + get_task_struct(bus->watchdog_tsk); send_sig(SIGTERM, bus->watchdog_tsk, 1); - kthread_stop(bus->watchdog_tsk); + kthread_stop_put(bus->watchdog_tsk); bus->watchdog_tsk = NULL; } -- cgit v1.2.3 From 1f4f78bf8549e6ac4f04fba4176854f3a6e0c332 Mon Sep 17 00:00:00 2001 From: Tristan Madani Date: Fri, 17 Apr 2026 11:11:44 +0000 Subject: wifi: b43: enforce bounds check on firmware key index in b43_rx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The firmware-controlled key index in b43_rx() can exceed the dev->key[] array size (58 entries). The existing B43_WARN_ON is non-enforcing in production builds, allowing an out-of-bounds read. Make the B43_WARN_ON check enforcing by dropping the frame when the firmware returns an invalid key index. Suggested-by: Jonas Gorski Acked-by: Michael Büsch Fixes: e4d6b7951812 ("[B43]: add mac80211-based driver for modern BCM43xx devices") Cc: stable@vger.kernel.org Signed-off-by: Tristan Madani Link: https://patch.msgid.link/20260417111145.2694196-1-tristmd@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/b43/xmit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c index 7651b1bdb592..f0b082596637 100644 --- a/drivers/net/wireless/broadcom/b43/xmit.c +++ b/drivers/net/wireless/broadcom/b43/xmit.c @@ -702,7 +702,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) * key index, but the ucode passed it slightly different. */ keyidx = b43_kidx_to_raw(dev, keyidx); - B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key)); + if (B43_WARN_ON(keyidx >= ARRAY_SIZE(dev->key))) + goto drop; if (dev->key[keyidx].algorithm != B43_SEC_ALGO_NONE) { wlhdr_len = ieee80211_hdrlen(fctl); -- cgit v1.2.3 From a035766f970bde2d4298346a31a80685be5c0205 Mon Sep 17 00:00:00 2001 From: Tristan Madani Date: Fri, 17 Apr 2026 11:11:45 +0000 Subject: wifi: b43legacy: enforce bounds check on firmware key index in RX path Same fix as b43: the firmware-controlled key index in b43legacy_rx() can exceed dev->max_nr_keys. The existing B43legacy_WARN_ON is non-enforcing in production builds, allowing an out-of-bounds read of dev->key[]. Make the check enforcing by dropping the frame for invalid indices. Fixes: 75388acd0cd8 ("[B43LEGACY]: add mac80211-based driver for legacy BCM43xx devices") Cc: stable@vger.kernel.org Signed-off-by: Tristan Madani Link: https://patch.msgid.link/20260417111145.2694196-2-tristmd@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/b43legacy/xmit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/xmit.c b/drivers/net/wireless/broadcom/b43legacy/xmit.c index efd63f4ce74f..ee199d4eaf03 100644 --- a/drivers/net/wireless/broadcom/b43legacy/xmit.c +++ b/drivers/net/wireless/broadcom/b43legacy/xmit.c @@ -476,7 +476,8 @@ void b43legacy_rx(struct b43legacy_wldev *dev, * key index, but the ucode passed it slightly different. */ keyidx = b43legacy_kidx_to_raw(dev, keyidx); - B43legacy_WARN_ON(keyidx >= dev->max_nr_keys); + if (B43legacy_WARN_ON(keyidx >= dev->max_nr_keys)) + goto drop; if (dev->key[keyidx].algorithm != B43legacy_SEC_ALGO_NONE) { /* Remove PROTECTED flag to mark it as decrypted. */ -- cgit v1.2.3 From 3994b4afd521d60e47e012fe2ed7b606aaec370b Mon Sep 17 00:00:00 2001 From: Amir Mohammad Jahangirzad Date: Sat, 18 Apr 2026 04:12:47 +0330 Subject: wifi: libertas: fix integer underflow in process_cmdrequest() The existing validation only checks if recvlength exceeds LBS_CMD_BUFFER_SIZE, but doesn't check the lower bound. When a USB device sends a response shorter than MESSAGE_HEADER_LEN, the subtraction (recvlength - MESSAGE_HEADER_LEN) wraps to a huge value, causing memcpy to corrupt the heap. Add the same lower bound check that libertas_tf already has. Signed-off-by: Amir Mohammad Jahangirzad Link: https://patch.msgid.link/20260418004247.368944-1-a.jahangirzad@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/if_usb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 4fae0e335136..a00d53350fa9 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -633,9 +633,10 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff, unsigned long flags; u8 i; - if (recvlength > LBS_CMD_BUFFER_SIZE) { + if (recvlength < MESSAGE_HEADER_LEN || + recvlength > LBS_CMD_BUFFER_SIZE) { lbs_deb_usbd(&cardp->udev->dev, - "The receive buffer is too large\n"); + "The receive buffer is invalid: %d\n", recvlength); kfree_skb(skb); return; } -- cgit v1.2.3 From 381cd547bc6e35a610c5dfebe554d891eea40f03 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 21 Apr 2026 18:45:52 -0400 Subject: wifi: nl80211: require admin perm on SET_PMK / DEL_PMK NL80211_CMD_SET_PMK and NL80211_CMD_DEL_PMK manage the offloaded 4-way-handshake PMK state used by drivers advertising NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X. The only in-tree driver that wires up both ->set_pmk / ->del_pmk and advertises the feature today is brcmfmac, so the practical reach of this patch is narrow. Both ops were introduced without a .flags gate, so the generic netlink layer dispatches them to an unprivileged caller instead of rejecting with -EPERM at the permission check. Every other connection-state op in the adjacent block (CONNECT, ASSOCIATE, AUTHENTICATE, SET_KEY, ...) carries GENL_UNS_ADMIN_PERM; SET_PMK / DEL_PMK were introduced without the flag in 2017 and left unchanged by later refactors. Johannes checked the original Intel submission history and confirmed there is no admin check in any prior revision either, so this seems likely to be a simple oversight rather than an intentional carve-out. Require GENL_UNS_ADMIN_PERM so the genl layer performs the same capable(CAP_NET_ADMIN) check as its siblings. wpa_supplicant already needs CAP_NET_ADMIN for every other nl80211 op it issues, so supplicant operation is unaffected. The worst case the missing gate enables today is an unprivileged local process on a multi-user system invalidating the offloaded PMK state of another user's 4-way-handshake session, forcing a full EAP re-auth on the next reconnect. Verified in UML: an unprivileged probe (uid=1000) sees SET_MULTICAST_TO_UNICAST (sibling op with GENL_UNS_ADMIN_PERM) return -EPERM on both pre- and post-fix kernels, while SET_PMK / DEL_PMK return -ENODEV from nl80211_pre_doit()'s wdev lookup pre- fix (proving dispatch crossed the genl permission check) and -EPERM post-fix (rejected at the genl layer as intended). Suggested-by: Johannes Berg Fixes: 3a00df5707b6 ("cfg80211: support 4-way handshake offloading for 802.1X") Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260421224552.4044147-1-michael.bommarito@gmail.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f334cdef8958..67088804dcc7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19828,6 +19828,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, @@ -19835,6 +19836,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, + .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { -- cgit v1.2.3 From 9b55d5c1f5e481e391957f9096d798ca331c461b Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 21 Apr 2026 20:06:51 -0400 Subject: wifi: mac80211: check ieee80211_rx_data_set_link return in pubsta MLO path __ieee80211_rx_handle_packet() resolves the link via ieee80211_rx_data_set_link() on the pubsta->mlo path but ignores the helper's return value. Inside the helper, rx->link = rcu_dereference(rx->sdata->link[link_id]); can leave rx->link NULL if link_id references a slot already cleared by ieee80211_vif_set_links() during station-initiated ML reconfiguration (see mlme.c's ieee80211_ml_reconfiguration(), which invalidates sdata->link[] before the matching ieee80211_sta_remove_link() loop walks the link-sta hash). RX dispatch still resolves a link_sta from the hash and then drops into ieee80211_prepare_and_rx_handle(), which dereferences link->conf->addr. Every other user site of ieee80211_rx_data_set_link() checks the return and bails on failure; only this branch did not. Mirror the safe pattern. Fixes: e66b7920aa5a ("wifi: mac80211: fix initialization of rx->link and rx->link_sta") Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260422000651.4184602-1-michael.bommarito@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3e5d1c47a5b0..5a92413a911f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -5380,7 +5380,9 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!link_sta) goto out; - ieee80211_rx_data_set_link(&rx, link_sta->link_id); + if (!ieee80211_rx_data_set_link(&rx, + link_sta->link_id)) + goto out; } if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) -- cgit v1.2.3 From 7a5b81e0c87a075afd572f659d8eb68c9c4cd2ba Mon Sep 17 00:00:00 2001 From: Catherine Date: Fri, 24 Apr 2026 21:14:36 +0800 Subject: wifi: mac80211: drop stray 'static' from fast-RX rx_result ieee80211_invoke_fast_rx() is documented as safe for parallel RX, but its per-invocation rx_result is declared static. Concurrent callers then share one instance and can overwrite each other's result between ieee80211_rx_mesh_data() and the switch on res. That can make a packet that was queued or consumed by ieee80211_rx_mesh_data() fall through into ieee80211_rx_8023(), or make a packet that should continue return as queued. Make res an automatic variable so each invocation keeps its own result. Fixes: 3468e1e0c639 ("wifi: mac80211: add mesh fast-rx support") Cc: stable@vger.kernel.org Signed-off-by: Catherine Link: https://patch.msgid.link/20260424131435.83212-2-enderaoelyther@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5a92413a911f..d18e962126ce 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4971,7 +4971,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - static ieee80211_rx_result res; + ieee80211_rx_result res; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; -- cgit v1.2.3 From 13917f71a95a6ee6132ecb9a544cd5bc7197c68d Mon Sep 17 00:00:00 2001 From: Ziran Zhang Date: Sat, 25 Apr 2026 22:29:43 +0800 Subject: docs: isofs: replace dead ECMA-119 FTP link The original link is no longer valid. Replace it with the official PDF of the 2nd edition. The new link points to the exact 2nd edition that the existing comment in isofs.rst refers to. Signed-off-by: Ziran Zhang Link: https://patch.msgid.link/20260425142943.6809-1-zhangcoder@yeah.net Signed-off-by: Jan Kara --- Documentation/filesystems/isofs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/isofs.rst b/Documentation/filesystems/isofs.rst index 08fd469091d4..2a30999b024f 100644 --- a/Documentation/filesystems/isofs.rst +++ b/Documentation/filesystems/isofs.rst @@ -57,7 +57,7 @@ Mount options unique to the isofs filesystem. Recommended documents about ISO 9660 standard are located at: - http://www.y-adagio.com/ -- ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf +- https://ecma-international.org/wp-content/uploads/ECMA-119_2nd_edition_december_1987.pdf Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically identical with ISO 9660.", so it is a valid and gratis substitute of the -- cgit v1.2.3 From 4023b7424ecd5d38cc75b650d6c1bf630ef8cb40 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Mon, 13 Apr 2026 17:54:59 +0800 Subject: arm64/scs: Fix potential sign extension issue of advance_loc4 The expression (*opcode++ << 24) and exp * code_alignment_factor may overflow signed int and becomes negative. Fix this by casting each byte to u64 before shifting. Also fix the misaligned break statement while we are here. Example of the result can be seen here: Link: https://godbolt.org/z/zhY8d3595 It maybe not a real problem, but could be a issue in future. Fixes: d499e9627d70 ("arm64/scs: Fix handling of advance_loc4") Signed-off-by: Wentao Guan Signed-off-by: Catalin Marinas --- arch/arm64/kernel/pi/patch-scs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index dac568e4a54f..3944ad899021 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -196,9 +196,9 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, loc += *opcode++ * code_alignment_factor; loc += (*opcode++ << 8) * code_alignment_factor; loc += (*opcode++ << 16) * code_alignment_factor; - loc += (*opcode++ << 24) * code_alignment_factor; + loc += ((u64)*opcode++ << 24) * code_alignment_factor; size -= 4; - break; + break; case DW_CFA_def_cfa: case DW_CFA_offset_extended: -- cgit v1.2.3 From 0faacc0841d66f3cf51989c10a83f3a82d52ff2c Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Thu, 23 Apr 2026 10:11:31 -0300 Subject: ALSA: hda: cs35l56: Propagate ASP TX source control errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cs35l56_hda_mixer_get() ignores regmap_read() and cs35l56_hda_mixer_put() ignores regmap_update_bits_check(). This makes the ASP TX source controls report success when a regmap access fails. The write path returns no change instead of an error, and the read path continues after a failed read instead of aborting the control callback. Propagate the regmap errors, matching the posture and volume controls in this driver. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Reviewed-by: Richard Fitzgerald Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260423-alsa-cs35l56-asp-tx-source-errors-v1-1-17ea7c62ec31@gmail.com --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index 1ace4beef508..dc25960a4f23 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -180,11 +180,15 @@ static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol, { struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol); unsigned int reg_val; - int i; + int i, ret; cs35l56_hda_wait_dsp_ready(cs35l56); - regmap_read(cs35l56->base.regmap, kcontrol->private_value, ®_val); + ret = regmap_read(cs35l56->base.regmap, kcontrol->private_value, + ®_val); + if (ret) + return ret; + reg_val &= CS35L56_ASP_TXn_SRC_MASK; for (i = 0; i < CS35L56_NUM_INPUT_SRC; ++i) { @@ -203,15 +207,20 @@ static int cs35l56_hda_mixer_put(struct snd_kcontrol *kcontrol, struct cs35l56_hda *cs35l56 = snd_kcontrol_chip(kcontrol); unsigned int item = ucontrol->value.enumerated.item[0]; bool changed; + int ret; if (item >= CS35L56_NUM_INPUT_SRC) return -EINVAL; cs35l56_hda_wait_dsp_ready(cs35l56); - regmap_update_bits_check(cs35l56->base.regmap, kcontrol->private_value, - CS35L56_INPUT_MASK, cs35l56_tx_input_values[item], - &changed); + ret = regmap_update_bits_check(cs35l56->base.regmap, + kcontrol->private_value, + CS35L56_INPUT_MASK, + cs35l56_tx_input_values[item], + &changed); + if (ret) + return ret; return changed; } -- cgit v1.2.3 From 58c0ac6125d89bf6ec65a521eaeb52a0e8e20a9f Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 20 Apr 2026 08:42:03 +0000 Subject: iommu/amd: Use maximum Event log buffer size when SNP is enabled on Family 0x19 Due to CVE-2023-20585, the Event log buffer must use the maximum supported size (512K) on Milan/Genoa (Family 0x19) systems when SNP is enabled, to mitigate a potential security vulnerability. All other systems continue to use the default Event log buffer size (8K). Apply the errata fix by making the following changes: * Introduce new global variable (amd_iommu_evtlog_size) to have event log buffer size. Adjust variable size for family 0x19. * Since 'iommu_snp_enable()' must be called after the core IOMMU subsystem is initialized, it cannot be moved to the early init stage. The SNP errata must also be applied after the 'iommu_snp_enable()' check. Therefore, 'alloc_event_buffer()' and 'iommu_enable_event_buffer()' are now called in the IOMMU_ENABLED state, after the errata is applied. * Adjust alloc_event_buffer() and iommu_enable_event_buffer() to handle all IOMMU instances. * Also rename EVT_* macros to make it more readable. Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-3016.html Cc: Borislav Petkov Cc: Suravee Suthikulpanit Cc: Joerg Roedel Signed-off-by: Vasant Hegde Tested-by: Dheeraj Kumar Srivastava Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 2 + drivers/iommu/amd/amd_iommu_types.h | 10 +++- drivers/iommu/amd/init.c | 110 +++++++++++++++++++++++++----------- drivers/iommu/amd/iommu.c | 2 +- 4 files changed, 86 insertions(+), 38 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 1342e764a548..f1c486dcf0f3 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -11,6 +11,8 @@ #include "amd_iommu_types.h" +extern int amd_iommu_evtlog_size; + irqreturn_t amd_iommu_int_thread(int irq, void *data); irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index c685d3771436..c3430c09bc5c 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -141,7 +142,6 @@ #define MMIO_STATUS_GALOG_INT_MASK BIT(10) /* event logging constants */ -#define EVENT_ENTRY_SIZE 0x10 #define EVENT_TYPE_SHIFT 28 #define EVENT_TYPE_MASK 0xf #define EVENT_TYPE_ILL_DEV 0x1 @@ -259,8 +259,12 @@ #define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x)) /* constants for event buffer handling */ -#define EVT_BUFFER_SIZE 8192 /* 512 entries */ -#define EVT_LEN_MASK (0x9ULL << 56) +#define EVTLOG_ENTRY_SIZE 0x10 +#define EVTLOG_SIZE_SHIFT 56 +#define EVTLOG_SIZE_DEF SZ_8K /* 512 entries */ +#define EVTLOG_LEN_MASK_DEF (0x9ULL << EVTLOG_SIZE_SHIFT) +#define EVTLOG_SIZE_MAX SZ_512K /* 32K entries */ +#define EVTLOG_LEN_MASK_MAX (0xFULL << EVTLOG_SIZE_SHIFT) /* Constants for PPR Log handling */ #define PPR_LOG_ENTRIES 512 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 56ad020df494..d8dc5c6db29d 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -132,6 +132,8 @@ struct ivhd_entry { u8 uid; } __attribute__((packed)); +int amd_iommu_evtlog_size = EVTLOG_SIZE_DEF; + /* * An AMD IOMMU memory definition structure. It defines things like exclusion * ranges for devices and regions that should be unity mapped. @@ -865,35 +867,47 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, } /* allocates the memory where the IOMMU will log its events to */ -static int __init alloc_event_buffer(struct amd_iommu *iommu) +static int __init alloc_event_buffer(void) { - iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, - EVT_BUFFER_SIZE); + struct amd_iommu *iommu; - return iommu->evt_buf ? 0 : -ENOMEM; + for_each_iommu(iommu) { + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL, + amd_iommu_evtlog_size); + if (!iommu->evt_buf) + return -ENOMEM; + } + + return 0; } -static void iommu_enable_event_buffer(struct amd_iommu *iommu) +static void iommu_enable_event_buffer(void) { + struct amd_iommu *iommu; u64 entry; - BUG_ON(iommu->evt_buf == NULL); + for_each_iommu(iommu) { + BUG_ON(iommu->evt_buf == NULL); - if (!is_kdump_kernel()) { - /* - * Event buffer is re-used for kdump kernel and setting - * of MMIO register is not required. - */ - entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; - memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, - &entry, sizeof(entry)); - } + if (!is_kdump_kernel()) { + /* + * Event buffer is re-used for kdump kernel and setting + * of MMIO register is not required. + */ + entry = iommu_virt_to_phys(iommu->evt_buf); + entry |= (amd_iommu_evtlog_size == EVTLOG_SIZE_DEF) ? + EVTLOG_LEN_MASK_DEF : EVTLOG_LEN_MASK_MAX; + + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + } - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + /* set head and tail to zero manually */ + writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + } } /* @@ -984,15 +998,20 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) return 0; } -static int __init remap_event_buffer(struct amd_iommu *iommu) +static int __init remap_event_buffer(void) { + struct amd_iommu *iommu; u64 paddr; pr_info_once("Re-using event buffer from the previous kernel\n"); - paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; - iommu->evt_buf = iommu_memremap(paddr, EVT_BUFFER_SIZE); + for_each_iommu(iommu) { + paddr = readq(iommu->mmio_base + MMIO_EVT_BUF_OFFSET) & PM_ADDR_MASK; + iommu->evt_buf = iommu_memremap(paddr, amd_iommu_evtlog_size); + if (!iommu->evt_buf) + return -ENOMEM; + } - return iommu->evt_buf ? 0 : -ENOMEM; + return 0; } static int __init remap_command_buffer(struct amd_iommu *iommu) @@ -1044,10 +1063,6 @@ static int __init alloc_iommu_buffers(struct amd_iommu *iommu) ret = remap_command_buffer(iommu); if (ret) return ret; - - ret = remap_event_buffer(iommu); - if (ret) - return ret; } else { ret = alloc_cwwb_sem(iommu); if (ret) @@ -1056,10 +1071,6 @@ static int __init alloc_iommu_buffers(struct amd_iommu *iommu) ret = alloc_command_buffer(iommu); if (ret) return ret; - - ret = alloc_event_buffer(iommu); - if (ret) - return ret; } return 0; @@ -2893,7 +2904,6 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); iommu_enable_gt(iommu); iommu_enable_ga(iommu); @@ -2957,7 +2967,6 @@ static void early_enable_iommus(void) iommu_disable_event_buffer(iommu); iommu_disable_irtcachedis(iommu); iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); @@ -3070,6 +3079,7 @@ static void amd_iommu_resume(void *data) for_each_iommu(iommu) early_enable_iommu(iommu); + iommu_enable_event_buffer(); amd_iommu_enable_interrupts(); } @@ -3399,6 +3409,23 @@ disable_snp: #endif } +static void amd_iommu_apply_erratum_snp(void) +{ +#ifdef CONFIG_KVM_AMD_SEV + if (!amd_iommu_snp_en) + return; + + /* Errata fix for Family 0x19 */ + if (boot_cpu_data.x86 != 0x19) + return; + + /* Set event log buffer size to max */ + amd_iommu_evtlog_size = EVTLOG_SIZE_MAX; + pr_info("Applying erratum: Increase Event log size to 0x%x\n", + amd_iommu_evtlog_size); +#endif +} + /**************************************************************************** * * AMD IOMMU Initialization State Machine @@ -3435,6 +3462,21 @@ static int __init state_next(void) case IOMMU_ENABLED: register_syscore(&amd_iommu_syscore); iommu_snp_enable(); + + amd_iommu_apply_erratum_snp(); + + /* Allocate/enable event log buffer */ + if (is_kdump_kernel()) + ret = remap_event_buffer(); + else + ret = alloc_event_buffer(); + + if (ret) { + init_state = IOMMU_INIT_ERROR; + break; + } + iommu_enable_event_buffer(); + ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; @@ -4037,7 +4079,7 @@ int amd_iommu_snp_disable(void) return 0; for_each_iommu(iommu) { - ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE); + ret = iommu_make_shared(iommu->evt_buf, amd_iommu_evtlog_size); if (ret) return ret; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 01171361f9bc..157505d96fdd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1010,7 +1010,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) iommu_print_event(iommu, iommu->evt_buf + head); /* Update head pointer of hardware ring-buffer */ - head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; + head = (head + EVTLOG_ENTRY_SIZE) % amd_iommu_evtlog_size; writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } -- cgit v1.2.3 From 1f44aab79bac31f459422dfb213e907bb386509c Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 20 Apr 2026 08:42:04 +0000 Subject: iommu/amd: Use maximum PPR log buffer size when SNP is enabled on Family 0x19 Due to CVE-2023-20585, the PPR log buffer must use the maximum supported size (512K) on Genoa (Family 0x19, model >= 0x10) systems when SNP is enabled, to mitigate a potential security vulnerability. Note that Family 0x19 models below 0x10 (Milan) do not support PPR when SNP is enabled. Hence the PPR log size increase is only applied for model >= 0x10. All other systems continue to use the default PPR log buffer size (8K). Apply the errata fix by making the following changes: - Introduce global new variable (amd_iommu_pprlog_size) to have PPR log buffer size. Adjust variable size for Genoa family. - Extend 'amd_iommu_apply_erratum_snp()' to also set the PPR log buffer size to maximum for Family 0x19 model >= 0x10 when SNP is enabled. - Rename PPR_* macros to make it more readable. Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-3016.html Cc: Borislav Petkov Cc: Suravee Suthikulpanit Cc: Joerg Roedel Signed-off-by: Vasant Hegde Tested-by: Dheeraj Kumar Srivastava Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/amd_iommu_types.h | 11 ++++++----- drivers/iommu/amd/init.c | 13 ++++++++++++- drivers/iommu/amd/ppr.c | 8 +++++--- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index f1c486dcf0f3..834d8fabfba3 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -12,6 +12,7 @@ #include "amd_iommu_types.h" extern int amd_iommu_evtlog_size; +extern int amd_iommu_pprlog_size; irqreturn_t amd_iommu_int_thread(int irq, void *data); irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index c3430c09bc5c..f9f718087893 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -267,11 +267,12 @@ #define EVTLOG_LEN_MASK_MAX (0xFULL << EVTLOG_SIZE_SHIFT) /* Constants for PPR Log handling */ -#define PPR_LOG_ENTRIES 512 -#define PPR_LOG_SIZE_SHIFT 56 -#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT) -#define PPR_ENTRY_SIZE 16 -#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES) +#define PPRLOG_ENTRY_SIZE 0x10 +#define PPRLOG_SIZE_SHIFT 56 +#define PPRLOG_SIZE_DEF SZ_8K /* 512 entries */ +#define PPRLOG_LEN_MASK_DEF (0x9ULL << PPRLOG_SIZE_SHIFT) +#define PPRLOG_SIZE_MAX SZ_512K /* 32K entries */ +#define PPRLOG_LEN_MASK_MAX (0xFULL << PPRLOG_SIZE_SHIFT) /* PAGE_SERVICE_REQUEST PPR Log Buffer Entry flags */ #define PPR_FLAG_EXEC 0x002 /* Execute permission requested */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index d8dc5c6db29d..3bdb380d23e9 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -133,6 +133,7 @@ struct ivhd_entry { } __attribute__((packed)); int amd_iommu_evtlog_size = EVTLOG_SIZE_DEF; +int amd_iommu_pprlog_size = PPRLOG_SIZE_DEF; /* * An AMD IOMMU memory definition structure. It defines things like exclusion @@ -3423,6 +3424,16 @@ static void amd_iommu_apply_erratum_snp(void) amd_iommu_evtlog_size = EVTLOG_SIZE_MAX; pr_info("Applying erratum: Increase Event log size to 0x%x\n", amd_iommu_evtlog_size); + + /* + * Set PPR log buffer size to max. + * (Family 0x19, model < 0x10 doesn't support PPR when SNP is enabled). + */ + if (boot_cpu_data.x86_model >= 0x10) { + amd_iommu_pprlog_size = PPRLOG_SIZE_MAX; + pr_info("Applying erratum: Increase PPR log size to 0x%x\n", + amd_iommu_pprlog_size); + } #endif } @@ -4083,7 +4094,7 @@ int amd_iommu_snp_disable(void) if (ret) return ret; - ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE); + ret = iommu_make_shared(iommu->ppr_log, amd_iommu_pprlog_size); if (ret) return ret; diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index e6767c057d01..1f8d2823bea4 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -20,7 +20,7 @@ int __init amd_iommu_alloc_ppr_log(struct amd_iommu *iommu) { iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, - PPR_LOG_SIZE); + amd_iommu_pprlog_size); return iommu->ppr_log ? 0 : -ENOMEM; } @@ -33,7 +33,9 @@ void amd_iommu_enable_ppr_log(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_PPR_EN); - entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; + entry = iommu_virt_to_phys(iommu->ppr_log); + entry |= (amd_iommu_pprlog_size == PPRLOG_SIZE_DEF) ? + PPRLOG_LEN_MASK_DEF : PPRLOG_LEN_MASK_MAX; memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, &entry, sizeof(entry)); @@ -201,7 +203,7 @@ void amd_iommu_poll_ppr_log(struct amd_iommu *iommu) raw[0] = raw[1] = 0UL; /* Update head pointer of hardware ring-buffer */ - head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; + head = (head + PPRLOG_ENTRY_SIZE) % amd_iommu_pprlog_size; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); /* Handle PPR entry */ -- cgit v1.2.3 From 901ac0ff15edf9503162e2cf6579bd11a30f1ed4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Apr 2026 13:21:55 +0200 Subject: ALSA: pcm: oss: Fix data race at accessing runtime.oss.trigger Currently the runtime.oss.trigger field may be accessed concurrently without protection, which may lead to the data race. And, in this case, it may lead to more severe problem because it's a bit field; as writing the data, it may overwrite other bit fields as well, which confuses the operation completely, as spotted by fuzzing. Fix it by covering runtime.oss.trigger bit fled also with the existing params_lock mutex in both snd_pcm_oss_get_trigger() and snd_pcm_oss_poll(). Reported-and-tested-by: Jaeyoung Chung Closes: https://lore.kernel.org/20260423145330.210035-1-jjy600901@snu.ac.kr Cc: Link: https://patch.msgid.link/20260424112205.123703-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index a140a0d9abb8..33fd34f0d615 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2155,10 +2155,16 @@ static int snd_pcm_oss_get_trigger(struct snd_pcm_oss_file *pcm_oss_file) psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; csubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; - if (psubstream && psubstream->runtime && psubstream->runtime->oss.trigger) - result |= PCM_ENABLE_OUTPUT; - if (csubstream && csubstream->runtime && csubstream->runtime->oss.trigger) - result |= PCM_ENABLE_INPUT; + if (psubstream && psubstream->runtime) { + guard(mutex)(&psubstream->runtime->oss.params_lock); + if (psubstream->runtime->oss.trigger) + result |= PCM_ENABLE_OUTPUT; + } + if (csubstream && csubstream->runtime) { + guard(mutex)(&csubstream->runtime->oss.params_lock); + if (csubstream->runtime->oss.trigger) + result |= PCM_ENABLE_INPUT; + } return result; } @@ -2832,6 +2838,17 @@ static int snd_pcm_oss_capture_ready(struct snd_pcm_substream *substream) runtime->oss.period_frames; } +static bool need_input_retrigger(struct snd_pcm_runtime *runtime) +{ + bool ret; + + guard(mutex)(&runtime->oss.params_lock); + ret = runtime->oss.trigger; + if (ret) + runtime->oss.trigger = 0; + return ret; +} + static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait) { struct snd_pcm_oss_file *pcm_oss_file; @@ -2864,11 +2881,11 @@ static __poll_t snd_pcm_oss_poll(struct file *file, poll_table * wait) snd_pcm_oss_capture_ready(csubstream)) mask |= EPOLLIN | EPOLLRDNORM; } - if (ostate != SNDRV_PCM_STATE_RUNNING && runtime->oss.trigger) { + if (ostate != SNDRV_PCM_STATE_RUNNING && + need_input_retrigger(runtime)) { struct snd_pcm_oss_file ofile; memset(&ofile, 0, sizeof(ofile)); ofile.streams[SNDRV_PCM_STREAM_CAPTURE] = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; - runtime->oss.trigger = 0; snd_pcm_oss_set_trigger(&ofile, PCM_ENABLE_INPUT); } } -- cgit v1.2.3 From e5c33cdc6f402eab8abd36ecf436b22c9d3a8aff Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Fri, 24 Apr 2026 09:48:41 -0300 Subject: ALSA: aloop: Fix peer runtime UAF during format-change stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit loopback_check_format() may stop the capture side when playback starts with parameters that no longer match a running capture stream. Commit 826af7fa62e3 ("ALSA: aloop: Fix racy access at PCM trigger") moved the peer lookup under cable->lock, but the actual snd_pcm_stop() still runs after dropping that lock. A concurrent close can clear the capture entry from cable->streams[] and detach or free its runtime while the playback trigger path still holds a stale peer substream pointer. Keep a per-cable count of in-flight peer stops before dropping cable->lock, and make free_cable() wait for those stops before detaching the runtime. This preserves the existing behavior while making the peer runtime lifetime explicit. Reported-by: syzbot+8fa95c41eafbc9d2ff6f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8fa95c41eafbc9d2ff6f Fixes: 597603d615d2 ("ALSA: introduce the snd-aloop module for the PCM loopback") Cc: stable@vger.kernel.org Suggested-by: Takashi Iwai Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260424-alsa-aloop-peer-stop-uaf-v2-1-94e68101db8a@gmail.com Signed-off-by: Takashi Iwai --- sound/drivers/aloop.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index aa0d2fcb1a18..a37a1695f51c 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -99,6 +99,9 @@ struct loopback_ops { struct loopback_cable { spinlock_t lock; struct loopback_pcm *streams[2]; + /* in-flight peer stops running outside cable->lock */ + atomic_t stop_count; + wait_queue_head_t stop_wait; struct snd_pcm_hardware hw; /* flags */ unsigned int valid; @@ -366,8 +369,11 @@ static int loopback_check_format(struct loopback_cable *cable, int stream) return 0; if (stream == SNDRV_PCM_STREAM_CAPTURE) return -EIO; - else if (cruntime->state == SNDRV_PCM_STATE_RUNNING) + else if (cruntime->state == SNDRV_PCM_STATE_RUNNING) { + /* close must not free the peer runtime below */ + atomic_inc(&cable->stop_count); stop_capture = true; + } } setup = get_setup(dpcm_play); @@ -396,8 +402,11 @@ static int loopback_check_format(struct loopback_cable *cable, int stream) } } - if (stop_capture) + if (stop_capture) { snd_pcm_stop(dpcm_capt->substream, SNDRV_PCM_STATE_DRAINING); + if (atomic_dec_and_test(&cable->stop_count)) + wake_up(&cable->stop_wait); + } return 0; } @@ -1049,23 +1058,29 @@ static void free_cable(struct snd_pcm_substream *substream) struct loopback *loopback = substream->private_data; int dev = get_cable_index(substream); struct loopback_cable *cable; + struct loopback_pcm *dpcm; + bool other_alive; cable = loopback->cables[substream->number][dev]; if (!cable) return; - if (cable->streams[!substream->stream]) { - /* other stream is still alive */ - guard(spinlock_irq)(&cable->lock); - cable->streams[substream->stream] = NULL; - } else { - struct loopback_pcm *dpcm = substream->runtime->private_data; - if (cable->ops && cable->ops->close_cable && dpcm) - cable->ops->close_cable(dpcm); - /* free the cable */ - loopback->cables[substream->number][dev] = NULL; - kfree(cable); + scoped_guard(spinlock_irq, &cable->lock) { + cable->streams[substream->stream] = NULL; + other_alive = cable->streams[!substream->stream]; } + + /* Pair with the stop_count increment in loopback_check_format(). */ + wait_event(cable->stop_wait, !atomic_read(&cable->stop_count)); + if (other_alive) + return; + + dpcm = substream->runtime->private_data; + if (cable->ops && cable->ops->close_cable && dpcm) + cable->ops->close_cable(dpcm); + /* free the cable */ + loopback->cables[substream->number][dev] = NULL; + kfree(cable); } static int loopback_jiffies_timer_open(struct loopback_pcm *dpcm) @@ -1260,6 +1275,8 @@ static int loopback_open(struct snd_pcm_substream *substream) goto unlock; } spin_lock_init(&cable->lock); + atomic_set(&cable->stop_count, 0); + init_waitqueue_head(&cable->stop_wait); cable->hw = loopback_pcm_hardware; if (loopback->timer_source) cable->ops = &loopback_snd_timer_ops; -- cgit v1.2.3 From 26265dd69da32d88a88d21987853cec899d9e21f Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Fri, 24 Apr 2026 18:50:10 -0300 Subject: ALSA: usb-audio: Fix UAC3 cluster descriptor size check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UAC3 cluster descriptor length check in snd_usb_get_audioformat_uac3()was added to make sure that the buffer is large enough for a struct uac3_cluster_header_descriptor before the returned data is cast and used. However, the check uses sizeof(cluster), where cluster is a pointer, not the size of the descriptor header. This makes the validation depend on the architecture pointer size and does not match the intended object size. Check against sizeof(*cluster) instead. Fixes: fb4e2a6e8f28 ("ALSA: usb-audio: Fix out-of-bounds read in snd_usb_get_audioformat_uac3()") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260424-alsa-usb-uac3-cluster-size-v1-1-99a5808898a3@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 2532bf97e05e..6c51226f771b 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -1003,7 +1003,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, * and request Cluster Descriptor */ wLength = le16_to_cpu(hc_header.wLength); - if (wLength < sizeof(cluster)) + if (wLength < sizeof(*cluster)) return NULL; cluster = kzalloc(wLength, GFP_KERNEL); if (!cluster) -- cgit v1.2.3 From f9471dc1a7177f594acf8106cc4a6ab33bf0bcb6 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Fri, 24 Apr 2026 11:50:51 +0000 Subject: iommu/pages: Fix iommu_pages_flush_incoherent() for non-x86 The dma_sync_single_for_device() function expects a dma_addr_t, but iommu_pages_flush_incoherent() was incorrectly passing a virtual address. Since iommu_pages_start_incoherent() enforces a 1:1 mapping between DMA addresses and physical addresses (checked via WARN_ON), we can convert the virtual address to a physical address before passing it to the DMA API. This also matches the behaviour of the other non-x86 in iommu_pages_free_incoherent(), which uses virt_to_phys(virt); Fixes: 36ae67b13976 ("iommu/pages: Add support for incoherent IOMMU page table walkers") Signed-off-by: Mostafa Saleh Reviewed-by: Lu Baolu Reviewed-by: Pranjal Shrivastava Reviewed-by: Jason Gunthorpe Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-pages.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index ae9da4f571f6..e9e605b5fa3a 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h @@ -137,7 +137,7 @@ static inline void iommu_pages_flush_incoherent(struct device *dma_dev, void *virt, size_t offset, size_t len) { - dma_sync_single_for_device(dma_dev, (uintptr_t)virt + offset, len, + dma_sync_single_for_device(dma_dev, virt_to_phys(virt) + offset, len, DMA_TO_DEVICE); } void iommu_pages_stop_incoherent_list(struct iommu_pages_list *list, -- cgit v1.2.3 From 597aa74b0e73f5e0c915b5d0c95cb296774589bd Mon Sep 17 00:00:00 2001 From: Yuxuan Qiu Date: Fri, 24 Apr 2026 19:21:07 +0800 Subject: ALSA: hda/realtek: enable mute LED support on ThinkBook 16p On ThinkBook 16p systems the platform mute LED is present and bound to the audio-mute trigger, but it does not react to Master mute changes. The affected fixup chain sets up the DAC routing, but does not enable vmaster mute LED handling. Because of that, the generic HDA code does not mark Master Playback Switch with SNDRV_CTL_ELEM_ACCESS_SPK_LED, and the audio-mute trigger never receives speaker mute updates. Add a ThinkBook-specific wrapper around alc287_fixup_bind_dacs() and enable spec->gen.vmaster_mute_led during PRE_PROBE. This keeps the existing DAC binding logic unchanged while allowing the normal generic LED path to drive the mute LED. Signed-off-by: Yuxuan Qiu Link: https://patch.msgid.link/20260424112107.22206-1-yuxuanqiu596@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index d720565db4aa..5268fefce85e 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3694,6 +3694,17 @@ static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, spec->power_hook = alc287_s4_power_gpio3_default; spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; } + +static void alc287_fixup_tb_vmaster_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->gen.vmaster_mute_led = 1; + + alc287_fixup_bind_dacs(codec, fix, action); +} /* GPIO2: mute led GPIO3: micmute led */ static void alc245_tas2781_spi_hp_fixup_muteled(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -6448,7 +6459,7 @@ static const struct hda_fixup alc269_fixups[] = { }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, - .v.func = alc287_fixup_bind_dacs, + .v.func = alc287_fixup_tb_vmaster_led, .chained = true, .chain_id = ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, }, -- cgit v1.2.3 From caecde119e341acd9819cbc1c54edf6caa6c6389 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Apr 2026 08:58:57 -0700 Subject: arm64/irqflags: __always_inline the arch_local_irq_*() helpers The arch_local_irq_*() wrappers in dispatch between two underlying primitives: the __daif_* path on most systems, and the __pmr_* path on builds that use GIC PMR-based masking (Pseudo-NMI). The leaf primitives are already __always_inline, but the wrappers themselves are plain "static inline". That is unsafe for noinstr callers: nothing prevents the compiler from emitting an out-of-line copy of e.g. arch_local_irq_disable(), and an out-of-line copy can be instrumented (ftrace, kcov, sanitizers), which breaks the noinstr contract on the entry/idle paths that rely on these helpers. x86 hit and fixed exactly this class of bug in commit 7a745be1cc90 ("x86/entry: __always_inline irqflags for noinstr"). Force-inline all of the arch_local_irq_*() wrappers so they cannot be emitted out-of-line: - arch_local_irq_enable() - arch_local_irq_disable() - arch_local_save_flags() - arch_irqs_disabled_flags() - arch_irqs_disabled() - arch_local_irq_save() - arch_local_irq_restore() The primary motivation is noinstr safety. There is a useful side effect for fleet-wide profiling: when the wrapper is emitted out-of-line, samples taken inside it during the post-WFI IRQ unmask in default_idle_call() are attributed to arch_local_irq_enable rather than default_idle_call(), and the FP-unwinder loses default_idle_call() from the chain. Signed-off-by: Breno Leitao Reviewed-by: Leonardo Bras Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/irqflags.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index d4d7451c2c12..a8cb5a5c93b7 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -40,7 +40,7 @@ static __always_inline void __pmr_local_irq_enable(void) barrier(); } -static inline void arch_local_irq_enable(void) +static __always_inline void arch_local_irq_enable(void) { if (system_uses_irq_prio_masking()) { __pmr_local_irq_enable(); @@ -68,7 +68,7 @@ static __always_inline void __pmr_local_irq_disable(void) barrier(); } -static inline void arch_local_irq_disable(void) +static __always_inline void arch_local_irq_disable(void) { if (system_uses_irq_prio_masking()) { __pmr_local_irq_disable(); @@ -90,7 +90,7 @@ static __always_inline unsigned long __pmr_local_save_flags(void) /* * Save the current interrupt enable state. */ -static inline unsigned long arch_local_save_flags(void) +static __always_inline unsigned long arch_local_save_flags(void) { if (system_uses_irq_prio_masking()) { return __pmr_local_save_flags(); @@ -109,7 +109,7 @@ static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags) return flags != GIC_PRIO_IRQON; } -static inline bool arch_irqs_disabled_flags(unsigned long flags) +static __always_inline bool arch_irqs_disabled_flags(unsigned long flags) { if (system_uses_irq_prio_masking()) { return __pmr_irqs_disabled_flags(flags); @@ -128,7 +128,7 @@ static __always_inline bool __pmr_irqs_disabled(void) return __pmr_irqs_disabled_flags(__pmr_local_save_flags()); } -static inline bool arch_irqs_disabled(void) +static __always_inline bool arch_irqs_disabled(void) { if (system_uses_irq_prio_masking()) { return __pmr_irqs_disabled(); @@ -160,7 +160,7 @@ static __always_inline unsigned long __pmr_local_irq_save(void) return flags; } -static inline unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { if (system_uses_irq_prio_masking()) { return __pmr_local_irq_save(); @@ -187,7 +187,7 @@ static __always_inline void __pmr_local_irq_restore(unsigned long flags) /* * restore saved IRQ state */ -static inline void arch_local_irq_restore(unsigned long flags) +static __always_inline void arch_local_irq_restore(unsigned long flags) { if (system_uses_irq_prio_masking()) { __pmr_local_irq_restore(flags); -- cgit v1.2.3 From d830631a128b394793811d9de61550d4518906cd Mon Sep 17 00:00:00 2001 From: Naser Al-Asbahi Date: Sat, 25 Apr 2026 17:40:14 +0200 Subject: ALSA: hda/realtek: Add micmute LED quirk for Acer Aspire A315-44P The mic-mute LED on the Acer Aspire A315-44P (subsystem ID 0x10251640) does not light up when the microphone is muted. The LED is connected to GPIO3 of the Realtek ALC256 codec. Add a quirk entry using ALC256_FIXUP_ACER_SFG16_MICMUTE_LED, which configures GPIO3 (bitmask 0x04) as the micmute LED, identical to the Acer Swift SFG16. Tested by manually sending HDA verb commands directly to the codec and verifying that GPIO3 drives the LED while GPIO1 and GPIO2 do not. Signed-off-by: Naser Al-Asbahi Link: https://patch.msgid.link/20260425154014.83982-1-nasserqahtan0@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 5268fefce85e..a9cd03bb73c4 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6728,6 +6728,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x159c, "Acer Nitro 5 AN515-58", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1597, "Acer Nitro 5 AN517-55", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x160e, "Acer PT316-51S", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1640, "Acer Aspire A315-44P", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED), SND_PCI_QUIRK(0x1025, 0x1679, "Acer Nitro 16 AN16-41", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED), SND_PCI_QUIRK(0x1025, 0x171e, "Acer Nitro ANV15-51", ALC245_FIXUP_ACER_MICMUTE_LED), -- cgit v1.2.3 From b795666e1ee4101a949248bfff6074a6dce91824 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 25 Apr 2026 20:03:27 -0400 Subject: ALSA: hda: Remove duplicate cmedia entries in codecs Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kconfiglint reports: M004: 'snd-hda-codec-cmedia-y' assigned with ':=' but was already assigned at line 5; previous value is overwritten sound/hda/codecs/Makefile contains duplicate entries for the C-Media codec driver — both the composite module definition and the obj-* build target appear twice: Line 5: snd-hda-codec-cmedia-y := cmedia.o Line 10: snd-hda-codec-cmedia-y := cmedia.o (duplicate) Line 24: obj-$(CONFIG_SND_HDA_CODEC_CMEDIA) += snd-hda-codec-cmedia.o Line 29: obj-$(CONFIG_SND_HDA_CODEC_CMEDIA) += snd-hda-codec-cmedia.o (duplicate) This file was created by commit 6014e9021b28 ("ALSA: hda: Move codec drivers into sound/hda/codecs directory") which moved codec drivers from sound/pci/hda/ to sound/hda/codecs/. In that initial file, cmedia appeared once in each section. Immediately after, commit aeeb85f26c3b ("ALSA: hda: Split Realtek HD-audio codec driver") reordered the entries and inserted cmedia at new positions near the top of each section, as part of splitting out the Realtek driver. However, the original cmedia entries were not removed during this reordering, creating duplicates of both lines. The second assignment harmlessly overwrites the first with the same value, and the second obj-* line causes the module to be listed twice — neither causes a build failure, but both are dead code. Remove the duplicate entries (second occurrence of each). Assisted-by: Claude:claude-opus-4-6 kconfiglint Signed-off-by: Sasha Levin Link: https://patch.msgid.link/20260426000327.56079-1-sashal@kernel.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/hda/codecs/Makefile b/sound/hda/codecs/Makefile index e7f03e281999..88d2f8a79467 100644 --- a/sound/hda/codecs/Makefile +++ b/sound/hda/codecs/Makefile @@ -7,7 +7,6 @@ snd-hda-codec-cm9825-y := cm9825.o snd-hda-codec-analog-y := analog.o snd-hda-codec-ca0110-y := ca0110.o snd-hda-codec-ca0132-y := ca0132.o -snd-hda-codec-cmedia-y := cmedia.o snd-hda-codec-conexant-y := conexant.o snd-hda-codec-idt-y := sigmatel.o snd-hda-codec-senarytech-y := senarytech.o @@ -26,7 +25,6 @@ obj-$(CONFIG_SND_HDA_CODEC_CM9825) += snd-hda-codec-cm9825.o obj-$(CONFIG_SND_HDA_CODEC_ANALOG) += snd-hda-codec-analog.o obj-$(CONFIG_SND_HDA_CODEC_CA0110) += snd-hda-codec-ca0110.o obj-$(CONFIG_SND_HDA_CODEC_CA0132) += snd-hda-codec-ca0132.o -obj-$(CONFIG_SND_HDA_CODEC_CMEDIA) += snd-hda-codec-cmedia.o obj-$(CONFIG_SND_HDA_CODEC_CONEXANT) += snd-hda-codec-conexant.o obj-$(CONFIG_SND_HDA_CODEC_SIGMATEL) += snd-hda-codec-idt.o obj-$(CONFIG_SND_HDA_CODEC_SENARYTECH) += snd-hda-codec-senarytech.o -- cgit v1.2.3 From 110189f0268d0eb85895721526328cac5804a739 Mon Sep 17 00:00:00 2001 From: Rosalie Wanders Date: Sun, 26 Apr 2026 04:55:19 +0200 Subject: ALSA: usb-audio: apply quirk for Playstation PDP Riffmaster This device, just like the Playstation 5's DualSense, has a volume that's too low, hid-playstation solves this by raising the minimum volume on the device itself by sending an output report, third party PS5 controllers/accessories do not support this output report format, so we apply a quirk to raise the minimum volume by 6dB. Signed-off-by: Rosalie Wanders Link: https://patch.msgid.link/20260426025520.3985-2-rosalie@mailbox.org Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 85653112e7f3..5fba456eb4a9 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1190,6 +1190,16 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->res = 1; } break; + + case USB_ID(0x0e6f, 0x024a): /* PDP Riffmaster for PS4 */ + case USB_ID(0x0e6f, 0x0249): /* PDP Riffmaster for PS5 */ + if (!strcmp(kctl->id.name, "PCM Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for PDP Riffmaster for PS4/PS5\n"); + cval->min = -2560; /* Mute under it */ + } + break; + case USB_ID(0x3302, 0x12db): /* MOONDROP Quark2 */ if (!strcmp(kctl->id.name, "PCM Playback Volume")) { usb_audio_info(chip, -- cgit v1.2.3 From d1f73f169c1014463b5060e3f60813e13ddc7b87 Mon Sep 17 00:00:00 2001 From: SeungJu Cheon Date: Sun, 26 Apr 2026 20:12:39 +0900 Subject: sound: ua101: fix division by zero at probe Add a missing sanity check for bNrChannels in detect_usb_format() to prevent a division by zero in playback_urb_complete() and capture_urb_complete(). USB core does not validate class-specific descriptor fields such as bNrChannels, so drivers must verify them before use. If a device provides bNrChannels = 0, frame_bytes becomes zero and is later used as a divisor in the URB completion handlers, leading to a kernel crash. Fixes: 63978ab3e3e9 ("sound: add Edirol UA-101 support") Cc: stable@vger.kernel.org Signed-off-by: SeungJu Cheon Link: https://patch.msgid.link/20260426111239.103296-1-suunj1331@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/misc/ua101.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 49b3dd8d827d..d129b42eb979 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -974,6 +974,13 @@ static int detect_usb_format(struct ua101 *ua) ua->capture.channels = fmt_capture->bNrChannels; ua->playback.channels = fmt_playback->bNrChannels; + if (!ua->capture.channels || !ua->playback.channels) { + dev_err(&ua->dev->dev, + "invalid channel count: capture %u, playback %u\n", + ua->capture.channels, ua->playback.channels); + return -EINVAL; + } + ua->capture.frame_bytes = fmt_capture->bSubframeSize * ua->capture.channels; ua->playback.frame_bytes = -- cgit v1.2.3 From 26735dfdd8930d9ef1fa92e590a9bf77726efdf6 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 17 Apr 2026 13:13:31 +0200 Subject: pmdomain: core: Fix detach procedure for virtual devices in genpd If a device is attached to a PM domain through genpd_dev_pm_attach_by_id(), genpd calls pm_runtime_enable() for the corresponding virtual device that it registers. While this avoids boilerplate code in drivers, there is no corresponding call to pm_runtime_disable() in genpd_dev_pm_detach(). This means these virtual devices are typically detached from its genpd, while runtime PM remains enabled for them, which is not how things are designed to work. In worst cases it may lead to critical errors, like a NULL pointer dereference bug in genpd_runtime_suspend(), which was recently reported. For another case, we may end up keeping an unnecessary vote for a performance state for the device. To fix these problems, let's add this missing call to pm_runtime_disable() in genpd_dev_pm_detach(). Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdWapT40hV3c+CSBqFOW05aWcV1a6v_NiJYgoYi0i9_PDQ@mail.gmail.com/ Fixes: 3c095f32a92b ("PM / Domains: Add support for multi PM domains per device to genpd") Cc: stable@vger.kernel.org Tested-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- drivers/pmdomain/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 4d32fc676aaf..71e930e80178 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -3089,6 +3089,7 @@ static const struct bus_type genpd_bus_type = { static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + bool is_virt_dev; unsigned int i; int ret = 0; @@ -3098,6 +3099,13 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); + /* Check if the device was created by genpd at attach. */ + is_virt_dev = dev->bus == &genpd_bus_type; + + /* Disable runtime PM if we enabled it at attach. */ + if (is_virt_dev) + pm_runtime_disable(dev); + /* Drop the default performance state */ if (dev_gpd_data(dev)->default_pstate) { dev_pm_genpd_set_performance_state(dev, 0); @@ -3123,7 +3131,7 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) genpd_queue_power_off_work(pd); /* Unregister the device if it was created by genpd. */ - if (dev->bus == &genpd_bus_type) + if (is_virt_dev) device_unregister(dev); } -- cgit v1.2.3 From 7a5f1cd22d47f8ca4b760b6334378ae42c1bd24b Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Sun, 26 Apr 2026 05:49:34 +0530 Subject: ALSA: caiaq: fix usb_dev refcount leak on probe failure create_card() takes a reference on the USB device with usb_get_dev() and stores the matching usb_put_dev() in card_free(), which is installed as the snd_card's ->private_free destructor. However, ->private_free is only assigned near the end of init_card(), after several failure points (usb_set_interface(), EP type checks, usb_submit_urb(), the EP1_CMD_GET_DEVICE_INFO exchange, and its timeout). When any of those fail, init_card() returns an error to snd_probe(), which calls snd_card_free(card). Because ->private_free is still NULL, card_free() never runs, the usb_get_dev() reference is not dropped, and the struct usb_device leaks along with its descriptor allocations and device_private. syzbot reproduces this with a malformed UAC3 device whose only valid altsetting is 0; init_card()'s usb_set_interface(usb_dev, 0, 1) call fails with -EIO and triggers the leak. Move the ->private_free assignment into create_card(), immediately after usb_get_dev(), so that every error path reaching snd_card_free() balances the reference. card_free()'s callees (snd_usb_caiaq_input_free, free_urbs, kfree) already tolerate the partially-initialized state because the chip private area is zero-initialized by snd_card_new(). Fixes: 80bb50e2d459 ("ALSA: caiaq: take a reference on the USB device in create_card()") Reported-by: syzbot+2afd7e71155c7e241560@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2afd7e71155c7e241560 Tested-by: syzbot+2afd7e71155c7e241560@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Deepanshu Kartikey Link: https://patch.msgid.link/20260426001934.70813-1-kartikey406@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 8af0c04041ee..ad9f744b496b 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -423,6 +423,7 @@ static int create_card(struct usb_device *usb_dev, cdev = caiaqdev(card); cdev->chip.dev = usb_get_dev(usb_dev); + card->private_free = card_free; cdev->chip.card = card; cdev->chip.usb_id = USB_ID(le16_to_cpu(usb_dev->descriptor.idVendor), le16_to_cpu(usb_dev->descriptor.idProduct)); @@ -511,7 +512,6 @@ static int init_card(struct snd_usb_caiaqdev *cdev) scnprintf(card->longname, sizeof(card->longname), "%s %s (%s)", cdev->vendor_name, cdev->product_name, usbpath); - card->private_free = card_free; err = setup_card(cdev); if (err < 0) return err; -- cgit v1.2.3 From 3ea4415015d690a51a3fb1f98dfc9a02f88f7bc4 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 20 Apr 2026 02:27:13 -0700 Subject: ACPI: arm64: cpuidle: Tolerate platforms with no deep PSCI idle states Commit cac173bea57d ("ACPI: processor: idle: Rework the handling of acpi_processor_ffh_lpi_probe()") moved the acpi_processor_ffh_lpi_probe() call from acpi_processor_setup_cpuidle_dev(), where its return value was ignored, to acpi_processor_get_power_info(), where it is now treated as a hard failure. As a result, platforms where psci_acpi_cpu_init_idle() returned -ENODEV stopped registering any cpuidle states, forcing CPUs to busy-poll when idle. On NVIDIA Grace (aarch64) systems with PSCIv1.1, pr->power.count is 1 (only WFI, no deep PSCI states beyond it), so the previous "count = pr->power.count - 1; if (count <= 0) return -ENODEV;" check returned -ENODEV for all 72 CPUs and disabled cpuidle entirely. The lpi_states count is already validated in acpi_processor_get_lpi_info(), so the check here is redundant. Simplify the loop to iterate over lpi_states[1..power.count). When only WFI is present, the loop body simply does not execute and the function returns 0, which is the correct outcome: there is nothing to validate for FFH and no error to report. Suggested-by: Huisong Li Cc: stable@vger.kernel.org Fixes: cac173bea57d ("ACPI: processor: idle: Rework the handling of acpi_processor_ffh_lpi_probe()") Signed-off-by: Breno Leitao Reviewed-by: Sudeep Holla Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/cpuidle.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/arm64/cpuidle.c b/drivers/acpi/arm64/cpuidle.c index 801f9c450142..c68a5db8ebba 100644 --- a/drivers/acpi/arm64/cpuidle.c +++ b/drivers/acpi/arm64/cpuidle.c @@ -16,7 +16,7 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) { - int i, count; + int i; struct acpi_lpi_state *lpi; struct acpi_processor *pr = per_cpu(processors, cpu); @@ -30,14 +30,10 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; - count = pr->power.count - 1; - if (count <= 0) - return -ENODEV; - - for (i = 0; i < count; i++) { + for (i = 1; i < pr->power.count; i++) { u32 state; - lpi = &pr->power.lpi_states[i + 1]; + lpi = &pr->power.lpi_states[i]; /* * Only bits[31:0] represent a PSCI power_state while * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification -- cgit v1.2.3 From ec1fcddb3117d9452210e838fd37389ee61e10e8 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 8 Apr 2026 14:11:21 +0000 Subject: pmdomain: mediatek: fix use-after-free in scpsys_get_bus_protection_legacy() In scpsys_get_bus_protection_legacy(), of_find_node_with_property() returns a device node with its reference count incremented. The function then calls of_node_put(node) before checking whether syscon_regmap_lookup_by_phandle() returns an error. If an error occurs, dev_err_probe() dereferences the node pointer to print diagnostic information, but the node memory may have already been freed due to the earlier of_node_put(), leading to a use-after-free vulnerability. Fix this by moving the of_node_put() call after the error check, ensuring the node is still valid when accessed in the error path. Fixes: c29345fa5f66 ("pmdomain: mediatek: Refactor bus protection regmaps retrieval") Cc: stable@vger.kernel.org Signed-off-by: Wentao Liang Signed-off-by: Ulf Hansson --- drivers/pmdomain/mediatek/mtk-pm-domains.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index d2b8d0332951..e1cfd4223473 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c @@ -1015,6 +1015,7 @@ static int scpsys_get_bus_protection_legacy(struct device *dev, struct scpsys *s struct device_node *node, *smi_np; int num_regmaps = 0, i, j; struct regmap *regmap[3]; + int ret = 0; /* * Legacy code retrieves a maximum of three bus protection handles: @@ -1065,11 +1066,14 @@ static int scpsys_get_bus_protection_legacy(struct device *dev, struct scpsys *s if (node) { regmap[2] = syscon_regmap_lookup_by_phandle(node, "mediatek,infracfg-nao"); num_regmaps++; - of_node_put(node); - if (IS_ERR(regmap[2])) - return dev_err_probe(dev, PTR_ERR(regmap[2]), + if (IS_ERR(regmap[2])) { + ret = dev_err_probe(dev, PTR_ERR(regmap[2]), "%pOF: failed to get infracfg regmap\n", node); + of_node_put(node); + return ret; + } + of_node_put(node); } else { regmap[2] = NULL; } -- cgit v1.2.3 From 82d1f01292d3f09bf063f829f8ab8de12b4280a1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 20 Apr 2026 13:47:26 +0200 Subject: vmalloc: fix buffer overflow in vrealloc_node_align() Commit 4c5d3365882d ("mm/vmalloc: allow to set node and align in vrealloc") added the ability to force a new allocation if the current pointer is on the wrong NUMA node, or if an alignment constraint is not met, even if the user is shrinking the allocation. On this path (need_realloc), the code allocates a new object of 'size' bytes and then memcpy()s 'old_size' bytes into it. If the request is to shrink the object (size < old_size), this results in an out-of-bounds write on the new buffer. Fix this by bounding the copy length by the new allocation size. Link: https://lore.kernel.org/20260420114805.3572606-2-elver@google.com Fixes: 4c5d3365882d ("mm/vmalloc: allow to set node and align in vrealloc") Signed-off-by: Marco Elver Reported-by: Harry Yoo (Oracle) Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka (SUSE) Reviewed-by: Harry Yoo (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index aa08651ec0df..c31a8615a832 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4361,7 +4361,7 @@ need_realloc: return NULL; if (p) { - memcpy(n, p, old_size); + memcpy(n, p, min(size, old_size)); vfree(p); } -- cgit v1.2.3 From 602655067e25030cb68c32a355ba1007a76a0c5a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 20 Apr 2026 18:15:50 +0300 Subject: mailmap: update entry for Dan Carpenter Update my mailmap entry to point to my current email address. Link: https://lore.kernel.org/ab2d502542c24491c191a76494717c340afb9a9b.1776691831.git.error27@gmail.com Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 34acd34bbf9b..a8fd13adf226 100644 --- a/.mailmap +++ b/.mailmap @@ -207,6 +207,7 @@ Claudiu Beznea Colin Ian King Corey Minyard Damian Hobson-Garcia +Dan Carpenter Dan Carpenter Dan Williams Daniel Borkmann -- cgit v1.2.3 From 0562b572ce591858749fc2f9477567e7e5c5d99f Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Wed, 15 Apr 2026 19:37:38 +0000 Subject: liveupdate: fix return value on session allocation failure When session allocation fails during deserialization, the global 'err' variable was not updated before returning. This caused subsequent calls to luo_session_deserialize() to incorrectly report success. Ensure 'err' is set to the error code from PTR_ERR(session). This ensures that an error is correctly returned to userspace when it attempts to open /dev/liveupdate in the new kernel if deserialization failed. Link: https://lore.kernel.org/20260415193738.515491-1-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Reviewed-by: Pratyush Yadav (Google) Cc: David Matlack Cc: Mike Rapoport Cc: Samiullah Khawaja Signed-off-by: Andrew Morton --- kernel/liveupdate/luo_session.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/liveupdate/luo_session.c b/kernel/liveupdate/luo_session.c index a3327a28fc1f..7a42385dabe2 100644 --- a/kernel/liveupdate/luo_session.c +++ b/kernel/liveupdate/luo_session.c @@ -514,11 +514,12 @@ int luo_session_deserialize(void) { struct luo_session_header *sh = &luo_session_global.incoming; static bool is_deserialized; - static int err; + static int saved_err; + int err; /* If has been deserialized, always return the same error code */ if (is_deserialized) - return err; + return saved_err; is_deserialized = true; if (!sh->active) @@ -547,7 +548,8 @@ int luo_session_deserialize(void) pr_warn("Failed to allocate session [%.*s] during deserialization %pe\n", (int)sizeof(sh->ser[i].name), sh->ser[i].name, session); - return PTR_ERR(session); + err = PTR_ERR(session); + goto save_err; } err = luo_session_insert(sh, session); @@ -555,7 +557,7 @@ int luo_session_deserialize(void) pr_warn("Failed to insert session [%s] %pe\n", session->name, ERR_PTR(err)); luo_session_free(session); - return err; + goto save_err; } scoped_guard(mutex, &session->mutex) { @@ -565,7 +567,7 @@ int luo_session_deserialize(void) if (err) { pr_warn("Failed to deserialize files for session [%s] %pe\n", session->name, ERR_PTR(err)); - return err; + goto save_err; } } @@ -574,6 +576,9 @@ int luo_session_deserialize(void) sh->ser = NULL; return 0; +save_err: + saved_err = err; + return err; } int luo_session_serialize(void) -- cgit v1.2.3 From 9ec95329894864170a1a7685b9a11b739393131a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 10 Apr 2026 02:03:03 -0700 Subject: kho: fix error handling in kho_add_subtree() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two error handling issues in kho_add_subtree(), where it doesn't handle the error path correctly. 1. If fdt_setprop() fails after the subnode has been created, the subnode is not removed. This leaves an incomplete node in the FDT (missing "preserved-data" or "blob-size" properties). 2. The fdt_setprop() return value (an FDT error code) is stored directly in err and returned to the caller, which expects -errno. Fix both by storing fdt_setprop() results in fdt_err, jumping to a new out_del_node label that removes the subnode on failure, and only setting err = 0 on the success path, otherwise returning -ENOMEM (instead of FDT_ERR_ errors that would come from fdt_setprop). No user-visible changes. This patch fixes error handling in the KHO (Kexec HandOver) subsystem, which is used to preserve data across kexec reboots. The fix only affects a rare failure path during kexec preparation — specifically when the kernel runs out of space in the Flattened Device Tree buffer while registering preserved memory regions. In the unlikely event that this error path was triggered, the old code would leave a malformed node in the device tree and return an incorrect error code to the calling subsystem, which could lead to confusing log messages or incorrect recovery decisions. With this fix, the incomplete node is properly cleaned up and the appropriate errno value is propagated, this error code is not returned to the user. Link: https://lore.kernel.org/20260410-kho_fix_send-v2-1-1b4debf7ee08@debian.org Fixes: 3dc92c311498 ("kexec: add Kexec HandOver (KHO) generation helpers") Signed-off-by: Breno Leitao Suggested-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Breno Leitao Cc: Pasha Tatashin Cc: Signed-off-by: Andrew Morton --- kernel/liveupdate/kexec_handover.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 94762de1fe5f..18509d8082ea 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -762,19 +762,24 @@ int kho_add_subtree(const char *name, void *blob, size_t size) goto out_pack; } - err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_PROP_NAME, - &phys, sizeof(phys)); - if (err < 0) - goto out_pack; + fdt_err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_PROP_NAME, + &phys, sizeof(phys)); + if (fdt_err < 0) + goto out_del_node; - err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_SIZE_PROP_NAME, - &size_u64, sizeof(size_u64)); - if (err < 0) - goto out_pack; + fdt_err = fdt_setprop(root_fdt, off, KHO_SUB_TREE_SIZE_PROP_NAME, + &size_u64, sizeof(size_u64)); + if (fdt_err < 0) + goto out_del_node; WARN_ON_ONCE(kho_debugfs_blob_add(&kho_out.dbg, name, blob, size, false)); + err = 0; + goto out_pack; + +out_del_node: + fdt_del_node(root_fdt, off); out_pack: fdt_pack(root_fdt); -- cgit v1.2.3 From 0437906841d0448121a7907b71b73c6cf2fc7afb Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 26 Mar 2026 16:46:29 -0700 Subject: mm: start background writeback based on per-wb threshold for strictlimit BDIs The proactive nr_dirty > gdtc->bg_thresh check in balance_dirty_pages() only checks the global dirty threshold to start background writeback while the writer is still free-running, but for strictlimit BDIs (eg fuse), the per-wb dirty count can exceed the per-wb background threshold while the global threshold is not yet exceeded, so background writeback for this case never gets proactively started. Add a per-wb threshold check for strictlimit BDIs so that background writeback is started when wb_dirty exceeds wb_bg_thresh, which drains dirty pages before the writer hits the throttle wall, matching the proactive behavior that the global check provides for non-strictlimit BDIs. fio runs on fuse show about a 3-4% improvement in perf for buffered writes: fio --name=writeback_test --ioengine=psync --rw=write --bs=128k \ --size=2G --numjobs=4 --ramp_time=10 --runtime=20 \ --time_based --group_reporting=1 --direct=0 Link: https://lore.kernel.org/20260326234629.840938-2-joannelkoong@gmail.com Signed-off-by: Joanne Koong Reviewed-by: Jan Kara Cc: Matthew Wilcox (Oracle) Cc: Miklos Szeredi Cc: Christoph Hellwig Cc: Johannes Weiner Signed-off-by: Andrew Morton --- mm/page-writeback.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 88cd53d4ba09..833f743f309f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1835,7 +1835,9 @@ static int balance_dirty_pages(struct bdi_writeback *wb, balance_domain_limits(mdtc, strictlimit); } - if (nr_dirty > gdtc->bg_thresh && !writeback_in_progress(wb)) + if (!writeback_in_progress(wb) && + (nr_dirty > gdtc->bg_thresh || + (strictlimit && gdtc->wb_dirty > gdtc->wb_bg_thresh))) wb_start_background_writeback(wb); /* @@ -1862,15 +1864,9 @@ free_running: * Unconditionally start background writeback if it's not * already in progress. We need to do this because the global * dirty threshold check above (nr_dirty > gdtc->bg_thresh) - * doesn't account for these cases: - * - * a) strictlimit BDIs: throttling is calculated using per-wb - * thresholds. The per-wb threshold can be exceeded even when - * nr_dirty < gdtc->bg_thresh - * - * b) memcg-based throttling: memcg uses its own dirty count and - * thresholds and can trigger throttling even when global - * nr_dirty < gdtc->bg_thresh + * doesn't account for the memcg-based throttling case. memcg + * uses its own dirty count and thresholds and can trigger + * throttling even when global nr_dirty < gdtc->bg_thresh * * Writeback needs to be started else the writer stalls in the * throttle loop waiting for dirty pages to be written back -- cgit v1.2.3 From 619eab23e1ce7c97e54bfc5a417306d94b3f6f13 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 21 Apr 2026 11:21:50 +0100 Subject: mm/vma: do not try to unmap a VMA if mmap_prepare() invoked from mmap() The mmap_prepare hook functionality includes the ability to invoke mmap_prepare() from the mmap() hook of existing 'stacked' drivers, that is ones which are capable of calling the mmap hooks of other drivers/file systems (e.g. overlayfs, shm). As part of the mmap_prepare action functionality, we deal with errors by unmapping the VMA should one arise. This works in the usual mmap_prepare case, as we invoke this action at the last moment, when the VMA is established in the maple tree. However, the mmap() hook passes a not-fully-established VMA pointer to the caller (which is the motivation behind the mmap_prepare() work), which is detached. So attempting to unmap a VMA in this state will be problematic, with the most obvious symptom being a warning in vma_mark_detached(), because the VMA is already detached. It's also unncessary - the mmap() handler will clean up the VMA on error. So to fix this issue, this patch propagates whether or not an mmap action is being completed via the compatibility layer or directly. If the former, then we do not attempt VMA cleanup, if the latter, then we do. This patch also updates the userland VMA tests to reflect the change. Link: https://lore.kernel.org/20260421102150.189982-1-ljs@kernel.org Fixes: ac0a3fc9c07d ("mm: add ability to take further action in vm_area_desc") Signed-off-by: Lorenzo Stoakes Reported-by: syzbot+db390288d141a1dccf96@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69e69734.050a0220.24bfd3.0027.GAE@google.com/ Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Pedro Falcato Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- mm/util.c | 26 +++++++++++++++++--------- mm/vma.c | 3 ++- tools/testing/vma/include/dup.h | 2 +- tools/testing/vma/include/stubs.h | 3 ++- 5 files changed, 23 insertions(+), 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0b776907152e..af23453e9dbd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4391,7 +4391,7 @@ static inline void mmap_action_map_kernel_pages_full(struct vm_area_desc *desc, int mmap_action_prepare(struct vm_area_desc *desc); int mmap_action_complete(struct vm_area_struct *vma, - struct mmap_action *action); + struct mmap_action *action, bool is_compat); /* Look up the first VMA which exactly match the interval vm_start ... vm_end */ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, diff --git a/mm/util.c b/mm/util.c index 232c3930a662..3cc949a0b7ed 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1232,7 +1232,7 @@ int __compat_vma_mmap(struct vm_area_desc *desc, /* Update the VMA from the descriptor. */ compat_set_vma_from_desc(vma, desc); /* Complete any specified mmap actions. */ - return mmap_action_complete(vma, &desc->action); + return mmap_action_complete(vma, &desc->action, /*is_compat=*/true); } EXPORT_SYMBOL(__compat_vma_mmap); @@ -1389,7 +1389,8 @@ static int call_vma_mapped(struct vm_area_struct *vma) } static int mmap_action_finish(struct vm_area_struct *vma, - struct mmap_action *action, int err) + struct mmap_action *action, int err, + bool is_compat) { size_t len; @@ -1400,8 +1401,12 @@ static int mmap_action_finish(struct vm_area_struct *vma, /* do_munmap() might take rmap lock, so release if held. */ maybe_rmap_unlock_action(vma, action); - if (!err) - return 0; + /* + * If this is invoked from the compatibility layer, post-mmap() hook + * logic will handle cleanup for us. + */ + if (!err || is_compat) + return err; /* * If an error occurs, unmap the VMA altogether and return an error. We @@ -1451,13 +1456,15 @@ EXPORT_SYMBOL(mmap_action_prepare); * mmap_action_complete - Execute VMA descriptor action. * @vma: The VMA to perform the action upon. * @action: The action to perform. + * @is_compat: Is this being invoked from the compatibility layer? * * Similar to mmap_action_prepare(). * - * Return: 0 on success, or error, at which point the VMA will be unmapped. + * Return: 0 on success, or error, at which point the VMA will be unmapped if + * !@is_compat. */ int mmap_action_complete(struct vm_area_struct *vma, - struct mmap_action *action) + struct mmap_action *action, bool is_compat) { int err = 0; @@ -1478,7 +1485,7 @@ int mmap_action_complete(struct vm_area_struct *vma, break; } - return mmap_action_finish(vma, action, err); + return mmap_action_finish(vma, action, err, is_compat); } EXPORT_SYMBOL(mmap_action_complete); #else @@ -1500,7 +1507,8 @@ int mmap_action_prepare(struct vm_area_desc *desc) EXPORT_SYMBOL(mmap_action_prepare); int mmap_action_complete(struct vm_area_struct *vma, - struct mmap_action *action) + struct mmap_action *action, + bool is_compat) { int err = 0; @@ -1517,7 +1525,7 @@ int mmap_action_complete(struct vm_area_struct *vma, break; } - return mmap_action_finish(vma, action, err); + return mmap_action_finish(vma, action, err, is_compat); } EXPORT_SYMBOL(mmap_action_complete); #endif diff --git a/mm/vma.c b/mm/vma.c index 377321b48734..d90791b00a7b 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2780,7 +2780,8 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr, __mmap_complete(&map, vma); if (have_mmap_prepare && allocated_new) { - error = mmap_action_complete(vma, &desc.action); + error = mmap_action_complete(vma, &desc.action, + /*is_compat=*/false); if (error) return error; } diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h index b4864aad2db0..9e0dfd3a85b0 100644 --- a/tools/testing/vma/include/dup.h +++ b/tools/testing/vma/include/dup.h @@ -1330,7 +1330,7 @@ static inline int __compat_vma_mmap(struct vm_area_desc *desc, /* Update the VMA from the descriptor. */ compat_set_vma_from_desc(vma, desc); /* Complete any specified mmap actions. */ - return mmap_action_complete(vma, &desc->action); + return mmap_action_complete(vma, &desc->action, /*is_compat=*/true); } static inline int compat_vma_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h index a30b8bc84955..64164e25658f 100644 --- a/tools/testing/vma/include/stubs.h +++ b/tools/testing/vma/include/stubs.h @@ -87,7 +87,8 @@ static inline int mmap_action_prepare(struct vm_area_desc *desc) } static inline int mmap_action_complete(struct vm_area_struct *vma, - struct mmap_action *action) + struct mmap_action *action, + bool is_compat) { return 0; } -- cgit v1.2.3 From 69df7cda05bebbf790f6eb0f2fda5b89f596bd59 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 22 Apr 2026 13:37:26 +0100 Subject: MAINTAINERS: fix regex pattern in CORE MM category The pattern "include/linux/page[-_]*" matches every file that starts with "page", because it's a regex and not a glob (so it has the meaning of include/linux/page + match [-_] 0+ times). Fix it up into a more regex-correct expression. Doing so reduces CC's drastically in patches that touch pagemap.h (which is maintained as part of PAGE CACHE). As a side-effect, move linux/pageblock-flags.h explicitly under PAGE ALLOCATOR. Link: https://lore.kernel.org/linux-mm/20260422005608.342028-1-fmayle@google.com/ Link: https://lore.kernel.org/20260422123726.517220-1-pfalcato@suse.de Signed-off-by: Pedro Falcato Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand (Arm) Acked-by: Vlastimil Babka (SUSE) Reviewed-by: SeongJae Park Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..d6f017581d54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16805,7 +16805,7 @@ F: mm/sparse.c F: mm/util.c F: mm/vmpressure.c F: mm/vmstat.c -N: include/linux/page[-_]* +N: include\/linux\/page[-_][a-zA-Z]* MEMORY MANAGEMENT - EXECMEM M: Andrew Morton @@ -16962,6 +16962,7 @@ S: Maintained F: include/linux/compaction.h F: include/linux/gfp.h F: include/linux/page-isolation.h +F: include/linux/pageblock-flags.h F: mm/compaction.c F: mm/debug_page_alloc.c F: mm/debug_page_ref.c -- cgit v1.2.3 From 8f5ce56b76303c55b78a87af996e2e0f8535f979 Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Wed, 22 Apr 2026 23:33:53 +0900 Subject: mm/hugetlb_cma: round up per_node before logging it When the user requests a total hugetlb CMA size without per-node specification, hugetlb_cma_reserve() computes per_node from hugetlb_cma_size and the number of nodes that have memory per_node = DIV_ROUND_UP(hugetlb_cma_size, nodes_weight(hugetlb_bootmem_nodes)); The reservation loop later computes size = round_up(min(per_node, hugetlb_cma_size - reserved), PAGE_SIZE << order); So the actually reserved per_node size is multiple of (PAGE_SIZE << order), but the logged per_node is not rounded up, so it may be smaller than the actual reserved size. For example, as the existing comment describes, if a 3 GB area is requested on a machine with 4 NUMA nodes that have memory, 1 GB is allocated on the first three nodes, but the printed log is hugetlb_cma: reserve 3072 MiB, up to 768 MiB per node Round per_node up to (PAGE_SIZE << order) before logging so that the printed log always matches the actual reserved size. No functional change to the actual reservation size, as the following case analysis shows 1. remaining (hugetlb_cma_size - reserved) >= rounded per_node - AS-IS: min() picks unrounded per_node; round_up() returns rounded per_node - TO-BE: min() picks rounded per_node; round_up() returns rounded per_node (no-op) 2. remaining < unrounded per_node - AS-IS: min() picks remaining; round_up() returns round_up(remaining) - TO-BE: min() picks remaining; round_up() returns round_up(remaining) 3. unrounded per_node <= remaining < rounded per_node - AS-IS: min() picks unrounded per_node; round_up() returns rounded per_node - TO-BE: min() picks remaining; round_up() returns round_up(remaining) equals rounded per_node Link: https://lore.kernel.org/20260422143353.852257-1-ekffu200098@gmail.com Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") # 5.7 Signed-off-by: Sang-Heon Jeon Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/hugetlb_cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index f83ae4998990..7693ccefd0c6 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -204,6 +204,7 @@ void __init hugetlb_cma_reserve(void) */ per_node = DIV_ROUND_UP(hugetlb_cma_size, nodes_weight(hugetlb_bootmem_nodes)); + per_node = round_up(per_node, PAGE_SIZE << order); pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", hugetlb_cma_size / SZ_1M, per_node / SZ_1M); } -- cgit v1.2.3 From 77a50e9652ac3c669c6690088bce97d960f5fd17 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 22 Apr 2026 14:43:10 -0400 Subject: MAINTAINERS: update Liam's email address Switching to private email address. Update all contact information Add an entry to mailmap at the same time. Link: https://lore.kernel.org/20260422184310.2682901-1-liam@infradead.org Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 20 ++++++++++---------- include/linux/maple_tree.h | 2 +- lib/maple_tree.c | 2 +- lib/test_maple_tree.c | 4 ++-- tools/testing/radix-tree/maple.c | 2 +- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.mailmap b/.mailmap index a8fd13adf226..f1eafd91d2c2 100644 --- a/.mailmap +++ b/.mailmap @@ -496,6 +496,7 @@ Leon Romanovsky Leon Romanovsky Leon Romanovsky Leo Yan +Liam R. Howlett Liam Mark Linas Vepstas Linus Lüssing diff --git a/MAINTAINERS b/MAINTAINERS index d6f017581d54..be2e017b3a9d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15399,7 +15399,7 @@ F: include/net/netns/mctp.h F: net/mctp/ MAPLE TREE -M: Liam R. Howlett +M: Liam R. Howlett R: Alice Ryhl R: Andrew Ballance L: maple-tree@lists.infradead.org @@ -16759,7 +16759,7 @@ MEMORY MANAGEMENT - CORE M: Andrew Morton M: David Hildenbrand R: Lorenzo Stoakes -R: Liam R. Howlett +R: Liam R. Howlett R: Vlastimil Babka R: Mike Rapoport R: Suren Baghdasaryan @@ -16895,7 +16895,7 @@ MEMORY MANAGEMENT - MISC M: Andrew Morton M: David Hildenbrand R: Lorenzo Stoakes -R: Liam R. Howlett +R: Liam R. Howlett R: Vlastimil Babka R: Mike Rapoport R: Suren Baghdasaryan @@ -16997,7 +16997,7 @@ M: Andrew Morton M: David Hildenbrand M: Lorenzo Stoakes R: Rik van Riel -R: Liam R. Howlett +R: Liam R. Howlett R: Vlastimil Babka R: Harry Yoo R: Jann Horn @@ -17044,7 +17044,7 @@ M: David Hildenbrand M: Lorenzo Stoakes R: Zi Yan R: Baolin Wang -R: Liam R. Howlett +R: Liam R. Howlett R: Nico Pache R: Ryan Roberts R: Dev Jain @@ -17082,7 +17082,7 @@ F: tools/testing/selftests/mm/uffd-*.[ch] MEMORY MANAGEMENT - RUST M: Alice Ryhl R: Lorenzo Stoakes -R: Liam R. Howlett +R: Liam R. Howlett L: linux-mm@kvack.org L: rust-for-linux@vger.kernel.org S: Maintained @@ -17096,7 +17096,7 @@ F: rust/kernel/page.rs MEMORY MAPPING M: Andrew Morton -M: Liam R. Howlett +M: Liam R. Howlett M: Lorenzo Stoakes R: Vlastimil Babka R: Jann Horn @@ -17128,7 +17128,7 @@ F: tools/testing/vma/ MEMORY MAPPING - LOCKING M: Andrew Morton M: Suren Baghdasaryan -M: Liam R. Howlett +M: Liam R. Howlett M: Lorenzo Stoakes R: Vlastimil Babka R: Shakeel Butt @@ -17143,7 +17143,7 @@ F: mm/mmap_lock.c MEMORY MAPPING - MADVISE (MEMORY ADVICE) M: Andrew Morton -M: Liam R. Howlett +M: Liam R. Howlett M: Lorenzo Stoakes M: David Hildenbrand R: Vlastimil Babka @@ -23370,7 +23370,7 @@ RUST [ALLOC] M: Danilo Krummrich R: Lorenzo Stoakes R: Vlastimil Babka -R: Liam R. Howlett +R: Liam R. Howlett R: Uladzislau Rezki L: rust-for-linux@vger.kernel.org S: Maintained diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 0c464eade1d6..4a5631906aff 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -4,7 +4,7 @@ /* * Maple Tree - An RCU-safe adaptive tree for storing ranges * Copyright (c) 2018-2022 Oracle - * Authors: Liam R. Howlett + * Authors: Liam R. Howlett * Matthew Wilcox */ diff --git a/lib/maple_tree.c b/lib/maple_tree.c index d18d7ed9ab67..60ae5e6fc1ee 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -2,7 +2,7 @@ /* * Maple Tree implementation * Copyright (c) 2018-2022 Oracle Corporation - * Authors: Liam R. Howlett + * Authors: Liam R. Howlett * Matthew Wilcox * Copyright (c) 2023 ByteDance * Author: Peng Zhang diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 434d8a2fdd99..b9367c61e8b5 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2,7 +2,7 @@ /* * test_maple_tree.c: Test the maple tree API * Copyright (c) 2018-2022 Oracle Corporation - * Author: Liam R. Howlett + * Author: Liam R. Howlett * * Any tests that only require the interface of the tree. */ @@ -4021,6 +4021,6 @@ static void __exit maple_tree_harvest(void) module_init(maple_tree_seed); module_exit(maple_tree_harvest); -MODULE_AUTHOR("Liam R. Howlett "); +MODULE_AUTHOR("Liam R. Howlett "); MODULE_DESCRIPTION("maple tree API test module"); MODULE_LICENSE("GPL"); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index feedd5ab7058..0607913a3022 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -2,7 +2,7 @@ /* * maple_tree.c: Userspace testing for maple tree test-suite * Copyright (c) 2018-2022 Oracle Corporation - * Author: Liam R. Howlett + * Author: Liam R. Howlett * * Any tests that require internal knowledge of the tree or threads and other * difficult to handle in kernel tests. -- cgit v1.2.3 From adeeacf696b2b2e8e4a42089f09c76578b42d92f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 23 Apr 2026 15:16:28 +0800 Subject: MAINTAINERS, mailmap: update email address for Qi Zheng Update my email address to qi.zheng@linux.dev. Link: https://lore.kernel.org/20260423071628.44044-1-qi.zheng@linux.dev Signed-off-by: Qi Zheng Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index f1eafd91d2c2..ed8f79dac6ae 100644 --- a/.mailmap +++ b/.mailmap @@ -689,6 +689,7 @@ Punit Agrawal Puranjay Mohan Qais Yousef Qais Yousef +Qi Zheng Quentin Monnet Quentin Monnet Quentin Perret diff --git a/MAINTAINERS b/MAINTAINERS index be2e017b3a9d..c99c6d04bbc6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16984,7 +16984,7 @@ M: Andrew Morton M: Johannes Weiner R: David Hildenbrand R: Michal Hocko -R: Qi Zheng +R: Qi Zheng R: Shakeel Butt R: Lorenzo Stoakes L: linux-mm@kvack.org @@ -24315,7 +24315,7 @@ F: include/media/i2c/rj54n1cb0c.h SHRINKER M: Andrew Morton M: Dave Chinner -R: Qi Zheng +R: Qi Zheng R: Roman Gushchin R: Muchun Song L: linux-mm@kvack.org -- cgit v1.2.3 From 2aeb64a2b90da8ed1bb71d1a6dce61a11f69f3cf Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 23 Apr 2026 21:26:49 +0800 Subject: MAINTAINERS: update Li Wang's email address Update my email address as my work email account is no longer in use. Also update .mailmap. Link: https://lore.kernel.org/20260423132649.31126-1-li.wang@linux.dev Signed-off-by: Li Wang Reviewed-by: Cyril Hrubis Reviewed-by: Petr Vorel Cc: Arnd Bergmann Cc: Jakub Kacinski Cc: Li Wang Cc: Martin Kepplinger Cc: Shannon Nelson Signed-off-by: Andrew Morton --- .mailmap | 2 ++ MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index ed8f79dac6ae..8a3204280eb2 100644 --- a/.mailmap +++ b/.mailmap @@ -507,6 +507,8 @@ Linus Walleij Linus Walleij Linus Walleij +Li Wang +Li Wang Li Yang Li Yang Lior David diff --git a/MAINTAINERS b/MAINTAINERS index c99c6d04bbc6..30f21a90b1da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15252,7 +15252,7 @@ M: Andrea Cervesato M: Cyril Hrubis M: Jan Stancek M: Petr Vorel -M: Li Wang +M: Li Wang M: Yang Xu M: Xiao Yang L: ltp@lists.linux.it (subscribers-only) -- cgit v1.2.3 From 1e68eb96e8beb1abefd12dd22c5637795d8a877e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 23 Apr 2026 08:02:51 -0700 Subject: mm/damon/sysfs-schemes: protect memcg_path kfree() with damon_sysfs_lock Patch series "mm/damon/sysfs-schemes: fix use-after-free for [memcg_]path". Reads of 'memcg_path' and 'path' files in DAMON sysfs interface could race with their writes, results in use-after-free. Fix those. This patch (of 2): damon_sysfs_scheme_filter->mmecg_path can be read and written by users, via DAMON sysfs memcg_path file. It can also be indirectly read, for the parameters {on,off}line committing to DAMON. The reads for parameters committing are protected by damon_sysfs_lock to avoid the sysfs files being destroyed while any of the parameters are being read. But the user-driven direct reads and writes are not protected by any lock, while the write is deallocating the memcg_path-pointing buffer. As a result, the readers could read the already freed buffer (user-after-free). Note that the user-reads don't race when the same open file is used by the writer, due to kernfs's open file locking. Nonetheless, doing the reads and writes with separate open files would be common. Fix it by protecting both the user-direct reads and writes with damon_sysfs_lock. Link: https://lore.kernel.org/20260423150253.111520-1-sj@kernel.org Link: https://lore.kernel.org/20260423150253.111520-2-sj@kernel.org Fixes: 4f489fe6afb3 ("mm/damon/sysfs-schemes: free old damon_sysfs_scheme_filter->memcg_path on write") Co-developed-by: Junxi Qian Signed-off-by: Junxi Qian Signed-off-by: SeongJae Park Cc: # 6.16.x Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 5186966dafb3..8d32a20531d4 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -533,9 +533,14 @@ static ssize_t memcg_path_show(struct kobject *kobj, { struct damon_sysfs_scheme_filter *filter = container_of(kobj, struct damon_sysfs_scheme_filter, kobj); + int len; - return sysfs_emit(buf, "%s\n", + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + len = sysfs_emit(buf, "%s\n", filter->memcg_path ? filter->memcg_path : ""); + mutex_unlock(&damon_sysfs_lock); + return len; } static ssize_t memcg_path_store(struct kobject *kobj, @@ -550,8 +555,13 @@ static ssize_t memcg_path_store(struct kobject *kobj, return -ENOMEM; strscpy(path, buf, count + 1); + if (!mutex_trylock(&damon_sysfs_lock)) { + kfree(path); + return -EBUSY; + } kfree(filter->memcg_path); filter->memcg_path = path; + mutex_unlock(&damon_sysfs_lock); return count; } -- cgit v1.2.3 From cf3b71421ca00807328c6d9cd242f9de3b77a4bf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 23 Apr 2026 08:02:52 -0700 Subject: mm/damon/sysfs-schemes: protect path kfree() with damon_sysfs_lock damon_sysfs_quot_goal->path can be read and written by users, via DAMON sysfs 'path' file. It can also be indirectly read, for the parameters {on,off}line committing to DAMON. The reads for parameters committing are protected by damon_sysfs_lock to avoid the sysfs files being destroyed while any of the parameters are being read. But the user-driven direct reads and writes are not protected by any lock, while the write is deallocating the path-pointing buffer. As a result, the readers could read the already freed buffer (user-after-free). Note that the user-reads don't race when the same open file is used by the writer, due to kernfs's open file locking. Nonetheless, doing the reads and writes with separate open files would be common. Fix it by protecting both the user-direct reads and writes with damon_sysfs_lock. Link: https://lore.kernel.org/20260423150253.111520-3-sj@kernel.org Fixes: c41e253a411e ("mm/damon/sysfs-schemes: implement path file under quota goal directory") Co-developed-by: Junxi Qian Signed-off-by: Junxi Qian Signed-off-by: SeongJae Park Cc: # 6.19.x Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 8d32a20531d4..245d63808411 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1197,8 +1197,13 @@ static ssize_t path_show(struct kobject *kobj, { struct damos_sysfs_quota_goal *goal = container_of(kobj, struct damos_sysfs_quota_goal, kobj); + int len; - return sysfs_emit(buf, "%s\n", goal->path ? goal->path : ""); + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + len = sysfs_emit(buf, "%s\n", goal->path ? goal->path : ""); + mutex_unlock(&damon_sysfs_lock); + return len; } static ssize_t path_store(struct kobject *kobj, @@ -1213,8 +1218,13 @@ static ssize_t path_store(struct kobject *kobj, return -ENOMEM; strscpy(path, buf, count + 1); + if (!mutex_trylock(&damon_sysfs_lock)) { + kfree(path); + return -EBUSY; + } kfree(goal->path); goal->path = path; + mutex_unlock(&damon_sysfs_lock); return count; } -- cgit v1.2.3 From a3907a3169d09ebaeef9631ab6a4534314545ef7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 16 Apr 2026 19:40:56 +0100 Subject: selftests/mm: specify requirement for PROC_MEM_ALWAYS_FORCE=y Several of the mm selftests made use of /proc/pid/mem as part of their operation but we do not specify this in the config fragment for them, at least mkdirty and ksm_functional_tests have this requirement. This has been working fine in practice since PROC_MEM_ALWAYS_FORCE was the default setting but commit 599bbba5a36f ("proc: make PROC_MEM_FORCE_PTRACE the Kconfig default") that is no longer the case, meaning that tests run on kernels built based on defconfigs have started having the new more restrictive default and failing. Add PROC_MEM_ALWAYS_FORCE to the config fragment for the mm selftests. Thanks to Aishwarya TCV for spotting the issue and identifying the commit that introduced it. Link: https://lore.kernel.org/20260416-selftests-mm-proc-mem-always-force-v1-1-3f5865153c67@kernel.org Fixes: 599bbba5a36f ("proc: make PROC_MEM_FORCE_PTRACE the Kconfig default") Signed-off-by: Mark Brown Reported-by: Aishwarya TCV Reviewed-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand (Arm) Reviewed-by: Anshuman Khandual Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Shuah Khan Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index 1dbe2b4558ab..06f78bd232e2 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -13,3 +13,4 @@ CONFIG_PROFILING=y CONFIG_UPROBES=y CONFIG_MEMORY_FAILURE=y CONFIG_HWPOISON_INJECT=m +CONFIG_PROC_MEM_ALWAYS_FORCE=y -- cgit v1.2.3 From 64a140afa5ed1c6f5ba6d451512cbdbbab1ba339 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 19 Apr 2026 09:10:00 -0700 Subject: mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values Patch series "mm/damon/modules: detect and use fresh status", v3. DAMON modules including DAMON_RECLAIM, DAMON_LRU_SORT and DAMON_STAT commonly expose the kdamond running status via their parameters. Under certain scenarios including wrong user inputs and memory allocation failures, those parameter values can be stale. It can confuse users. For DAMON_RECLAIM and DAMON_LRU_SORT, it even makes the kdamond unable to be restarted before the system reboot. The problem comes from the fact that there are multiple events for the status changes and it is difficult to follow up all the scenarios. Fix the issue by detecting and using the status on demand, instead of using a cached status that is difficult to be updated. Patches 1-3 fix the bugs in DAMON_RECLAIM, DAMON_LRU_SORT and DAMON_STAT in the order. This patch (of 3): DAMON_RECLAIM updates 'enabled' and 'kdamond_pid' parameter values, which represents the running status of its kdamond, when the user explicitly requests start/stop of the kdamond. The kdamond can, however, be stopped in events other than the explicit user request in the following three events. 1. ctx->regions_score_histogram allocation failure at beginning of the execution, 2. damon_commit_ctx() failure due to invalid user input, and 3. damon_commit_ctx() failure due to its internal allocation failures. Hence, if the kdamond is stopped by the above three events, the values of the status parameters can be stale. Users could show the stale values and be confused. This is already bad, but the real consequence is worse. DAMON_RECLAIM avoids unnecessary damon_start() and damon_stop() calls based on the 'enabled' parameter value. And the update of 'enabled' parameter value depends on the damon_start() and damon_stop() call results. Hence, once the kdamond has stopped by the unintentional events, the user cannot restart the kdamond before the system reboot. For example, the issue can be reproduced via below steps. # cd /sys/module/damon_reclaim/parameters # # # start DAMON_RECLAIM # echo Y > enabled # ps -ef | grep kdamond root 806 2 0 17:53 ? 00:00:00 [kdamond.0] root 808 803 0 17:53 pts/4 00:00:00 grep kdamond # # # commit wrong input to stop kdamond withou explicit stop request # echo 3 > addr_unit # echo Y > commit_inputs bash: echo: write error: Invalid argument # # # confirm kdamond is stopped # ps -ef | grep kdamond root 811 803 0 17:53 pts/4 00:00:00 grep kdamond # # # users casn now show stable status # cat enabled Y # cat kdamond_pid 806 # # # even after fixing the wrong parameter, # # kdamond cannot be restarted. # echo 1 > addr_unit # echo Y > enabled # ps -ef | grep kdamond root 815 803 0 17:54 pts/4 00:00:00 grep kdamond The problem will only rarely happen in real and common setups for the following reasons. The allocation failures are unlikely in such setups since those allocations are arguably too small to fail. Also sane users on real production environments may not commit wrong input parameters. But once it happens, the consequence is quite bad. And the bug is a bug. The issue stems from the fact that there are multiple events that can change the status, and following all the events is challenging. Dynamically detect and use the fresh status for the parameters when those are requested. Link: https://lore.kernel.org/20260419161003.79176-1-sj@kernel.org Link: https://lore.kernel.org/20260419161003.79176-2-sj@kernel.org Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update") Co-developed-by: Liew Rui Yan Signed-off-by: Liew Rui Yan Signed-off-by: SeongJae Park Cc: # 5.19.x Signed-off-by: Andrew Morton --- mm/damon/reclaim.c | 85 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 86da14778658..fe7fce26cf6c 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -144,15 +144,6 @@ static unsigned long addr_unit __read_mostly = 1; static bool skip_anon __read_mostly; module_param(skip_anon, bool, 0600); -/* - * PID of the DAMON thread - * - * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. - * Else, -1. - */ -static int kdamond_pid __read_mostly = -1; -module_param(kdamond_pid, int, 0400); - static struct damos_stat damon_reclaim_stat; DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_reclaim_stat, reclaim_tried_regions, reclaimed_regions, quota_exceeds); @@ -288,12 +279,8 @@ static int damon_reclaim_turn(bool on) { int err; - if (!on) { - err = damon_stop(&ctx, 1); - if (!err) - kdamond_pid = -1; - return err; - } + if (!on) + return damon_stop(&ctx, 1); err = damon_reclaim_apply_parameters(); if (err) @@ -302,9 +289,6 @@ static int damon_reclaim_turn(bool on) err = damon_start(&ctx, 1, true); if (err) return err; - kdamond_pid = damon_kdamond_pid(ctx); - if (kdamond_pid < 0) - return kdamond_pid; return damon_call(ctx, &call_control); } @@ -332,42 +316,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600); MODULE_PARM_DESC(addr_unit, "Scale factor for DAMON_RECLAIM to ops address conversion (default: 1)"); +static bool damon_reclaim_enabled(void) +{ + if (!ctx) + return false; + return damon_is_running(ctx); +} + static int damon_reclaim_enabled_store(const char *val, const struct kernel_param *kp) { - bool is_enabled = enabled; - bool enable; int err; - err = kstrtobool(val, &enable); + err = kstrtobool(val, &enabled); if (err) return err; - if (is_enabled == enable) + if (damon_reclaim_enabled() == enabled) return 0; /* Called before init function. The function will handle this. */ if (!damon_initialized()) - goto set_param_out; + return 0; - err = damon_reclaim_turn(enable); - if (err) - return err; + return damon_reclaim_turn(enabled); +} -set_param_out: - enabled = enable; - return err; +static int damon_reclaim_enabled_load(char *buffer, + const struct kernel_param *kp) +{ + return sprintf(buffer, "%c\n", damon_reclaim_enabled() ? 'Y' : 'N'); } static const struct kernel_param_ops enabled_param_ops = { .set = damon_reclaim_enabled_store, - .get = param_get_bool, + .get = damon_reclaim_enabled_load, }; module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); MODULE_PARM_DESC(enabled, "Enable or disable DAMON_RECLAIM (default: disabled)"); +static int damon_reclaim_kdamond_pid_store(const char *val, + const struct kernel_param *kp) +{ + /* + * kdamond_pid is read-only, but kernel command line could write it. + * Do nothing here. + */ + return 0; +} + +static int damon_reclaim_kdamond_pid_load(char *buffer, + const struct kernel_param *kp) +{ + int kdamond_pid = -1; + + if (ctx) { + kdamond_pid = damon_kdamond_pid(ctx); + if (kdamond_pid < 0) + kdamond_pid = -1; + } + return sprintf(buffer, "%d\n", kdamond_pid); +} + +static const struct kernel_param_ops kdamond_pid_param_ops = { + .set = damon_reclaim_kdamond_pid_store, + .get = damon_reclaim_kdamond_pid_load, +}; + +/* + * PID of the DAMON thread + * + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400); + static int __init damon_reclaim_init(void) { int err; -- cgit v1.2.3 From b98b7ff6025ae82570d4915e083f0cbd8d48b3cf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 19 Apr 2026 09:10:01 -0700 Subject: mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values DAMON_LRU_SORT updates 'enabled' and 'kdamond_pid' parameter values, which represents the running status of its kdamond, when the user explicitly requests start/stop of the kdamond. The kdamond can, however, be stopped in events other than the explicit user request in the following three events. 1. ctx->regions_score_histogram allocation failure at beginning of the execution, 2. damon_commit_ctx() failure due to invalid user input, and 3. damon_commit_ctx() failure due to its internal allocation failures. Hence, if the kdamond is stopped by the above three events, the values of the status parameters can be stale. Users could show the stale values and be confused. This is already bad, but the real consequence is worse. DAMON_LRU_SORT avoids unnecessary damon_start() and damon_stop() calls based on the 'enabled' parameter value. And the update of 'enabled' parameter value depends on the damon_start() and damon_stop() call results. Hence, once the kdamond has stopped by the unintentional events, the user cannot restart the kdamond before the system reboot. For example, the issue can be reproduced via below steps. # cd /sys/module/damon_lru_sort/parameters # # # start DAMON_LRU_SORT # echo Y > enabled # ps -ef | grep kdamond root 806 2 0 17:53 ? 00:00:00 [kdamond.0] root 808 803 0 17:53 pts/4 00:00:00 grep kdamond # # # commit wrong input to stop kdamond withou explicit stop request # echo 3 > addr_unit # echo Y > commit_inputs bash: echo: write error: Invalid argument # # # confirm kdamond is stopped # ps -ef | grep kdamond root 811 803 0 17:53 pts/4 00:00:00 grep kdamond # # # users casn now show stable status # cat enabled Y # cat kdamond_pid 806 # # # even after fixing the wrong parameter, # # kdamond cannot be restarted. # echo 1 > addr_unit # echo Y > enabled # ps -ef | grep kdamond root 815 803 0 17:54 pts/4 00:00:00 grep kdamond The problem will only rarely happen in real and common setups for the following reasons. The allocation failures are unlikely in such setups since those allocations are arguably too small to fail. Also sane users on real production environments may not commit wrong input parameters. But once it happens, the consequence is quite bad. And the bug is a bug. The issue stems from the fact that there are multiple events that can change the status, and following all the events is challenging. Dynamically detect and use the fresh status for the parameters when those are requested. Link: https://lore.kernel.org/20260419161003.79176-3-sj@kernel.org Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Co-developed-by: Liew Rui Yan Signed-off-by: Liew Rui Yan Signed-off-by: SeongJae Park Cc: # 6.0.x Signed-off-by: Andrew Morton --- mm/damon/lru_sort.c | 85 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 554559d72976..8494040b1ee4 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -161,15 +161,6 @@ module_param(monitor_region_end, ulong, 0600); */ static unsigned long addr_unit __read_mostly = 1; -/* - * PID of the DAMON thread - * - * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. - * Else, -1. - */ -static int kdamond_pid __read_mostly = -1; -module_param(kdamond_pid, int, 0400); - static struct damos_stat damon_lru_sort_hot_stat; DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat, lru_sort_tried_hot_regions, lru_sorted_hot_regions, @@ -386,12 +377,8 @@ static int damon_lru_sort_turn(bool on) { int err; - if (!on) { - err = damon_stop(&ctx, 1); - if (!err) - kdamond_pid = -1; - return err; - } + if (!on) + return damon_stop(&ctx, 1); err = damon_lru_sort_apply_parameters(); if (err) @@ -400,9 +387,6 @@ static int damon_lru_sort_turn(bool on) err = damon_start(&ctx, 1, true); if (err) return err; - kdamond_pid = damon_kdamond_pid(ctx); - if (kdamond_pid < 0) - return kdamond_pid; return damon_call(ctx, &call_control); } @@ -430,42 +414,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600); MODULE_PARM_DESC(addr_unit, "Scale factor for DAMON_LRU_SORT to ops address conversion (default: 1)"); +static bool damon_lru_sort_enabled(void) +{ + if (!ctx) + return false; + return damon_is_running(ctx); +} + static int damon_lru_sort_enabled_store(const char *val, const struct kernel_param *kp) { - bool is_enabled = enabled; - bool enable; int err; - err = kstrtobool(val, &enable); + err = kstrtobool(val, &enabled); if (err) return err; - if (is_enabled == enable) + if (damon_lru_sort_enabled() == enabled) return 0; /* Called before init function. The function will handle this. */ if (!damon_initialized()) - goto set_param_out; + return 0; - err = damon_lru_sort_turn(enable); - if (err) - return err; + return damon_lru_sort_turn(enabled); +} -set_param_out: - enabled = enable; - return err; +static int damon_lru_sort_enabled_load(char *buffer, + const struct kernel_param *kp) +{ + return sprintf(buffer, "%c\n", damon_lru_sort_enabled() ? 'Y' : 'N'); } static const struct kernel_param_ops enabled_param_ops = { .set = damon_lru_sort_enabled_store, - .get = param_get_bool, + .get = damon_lru_sort_enabled_load, }; module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); MODULE_PARM_DESC(enabled, "Enable or disable DAMON_LRU_SORT (default: disabled)"); +static int damon_lru_sort_kdamond_pid_store(const char *val, + const struct kernel_param *kp) +{ + /* + * kdamond_pid is read-only, but kernel command line could write it. + * Do nothing here. + */ + return 0; +} + +static int damon_lru_sort_kdamond_pid_load(char *buffer, + const struct kernel_param *kp) +{ + int kdamond_pid = -1; + + if (ctx) { + kdamond_pid = damon_kdamond_pid(ctx); + if (kdamond_pid < 0) + kdamond_pid = -1; + } + return sprintf(buffer, "%d\n", kdamond_pid); +} + +static const struct kernel_param_ops kdamond_pid_param_ops = { + .set = damon_lru_sort_kdamond_pid_store, + .get = damon_lru_sort_kdamond_pid_load, +}; + +/* + * PID of the DAMON thread + * + * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400); + static int __init damon_lru_sort_init(void) { int err; -- cgit v1.2.3 From f98590bc08d4aea435e1c2213e38bae0d9e9a7bb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 19 Apr 2026 09:10:02 -0700 Subject: mm/damon/stat: detect and use fresh enabled value DAMON_STAT updates 'enabled' parameter value, which represents the running status of its kdamond, when the user explicitly requests start/stop of the kdamond. The kdamond can, however, be stopped even if the user explicitly requested the stop, if ctx->regions_score_histogram allocation failure at beginning of the execution of the kdamond. Hence, if the kdamond is stopped by the allocation failure, the value of the parameter can be stale. Users could show the stale value and be confused. The problem will only rarely happen in real and common setups because the allocation is arguably too small to fail. Also, unlike the similar bugs that are now fixed in DAMON_RECLAIM and DAMON_LRU_SORT, kdamond can be restarted in this case, because DAMON_STAT force-updates the enabled parameter value for user inputs. The bug is a bug, though. The issue stems from the fact that there are multiple events that can change the status, and following all the events is challenging. Dynamically detect and use the fresh status for the parameters when those are requested. The issue was dicovered [1] by Sashiko. Link: https://lore.kernel.org/20260419161003.79176-4-sj@kernel.org Link: https://lore.kernel.org/20260416040602.88665-1-sj@kernel.org [1] Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module") Signed-off-by: SeongJae Park Cc: Liew Rui Yan Cc: # 6.17.x Signed-off-by: Andrew Morton --- mm/damon/stat.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/mm/damon/stat.c b/mm/damon/stat.c index 99ba346f9e32..3951b762cbdd 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -19,14 +19,17 @@ static int damon_stat_enabled_store( const char *val, const struct kernel_param *kp); +static int damon_stat_enabled_load(char *buffer, + const struct kernel_param *kp); + static const struct kernel_param_ops enabled_param_ops = { .set = damon_stat_enabled_store, - .get = param_get_bool, + .get = damon_stat_enabled_load, }; static bool enabled __read_mostly = IS_ENABLED( CONFIG_DAMON_STAT_ENABLED_DEFAULT); -module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +module_param_cb(enabled, &enabled_param_ops, NULL, 0600); MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT"); static unsigned long estimated_memory_bandwidth __read_mostly; @@ -273,17 +276,23 @@ static void damon_stat_stop(void) damon_stat_context = NULL; } +static bool damon_stat_enabled(void) +{ + if (!damon_stat_context) + return false; + return damon_is_running(damon_stat_context); +} + static int damon_stat_enabled_store( const char *val, const struct kernel_param *kp) { - bool is_enabled = enabled; int err; err = kstrtobool(val, &enabled); if (err) return err; - if (is_enabled == enabled) + if (damon_stat_enabled() == enabled) return 0; if (!damon_initialized()) @@ -293,16 +302,17 @@ static int damon_stat_enabled_store( */ return 0; - if (enabled) { - err = damon_stat_start(); - if (err) - enabled = false; - return err; - } + if (enabled) + return damon_stat_start(); damon_stat_stop(); return 0; } +static int damon_stat_enabled_load(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%c\n", damon_stat_enabled() ? 'Y' : 'N'); +} + static int __init damon_stat_init(void) { int err = 0; -- cgit v1.2.3 From ba13b28decbb09ce5296a3303e85235e120147f2 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Sat, 18 Apr 2026 13:32:26 +0530 Subject: MAINTAINERS: remove stale kdump project URL The kdump project URL in MAINTAINERS points to http://lse.sourceforge.net/kdump/, but it is no longer maintained. Remove this outdated link to avoid confusion and keep the file up to date. Discussion to remove this link: https://lore.kernel.org/all/e1e9e200-17d7-4ae9-b0eb-71300f4eb1ac@linux.ibm.com/ Link: https://lore.kernel.org/20260418080226.40415-1-sourabhjain@linux.ibm.com Signed-off-by: Sourabh Jain Acked-by: Baoquan He Cc: Dave Young Cc: Vivek Goyal Cc: Mike Rapoport Cc: Pasha Tatashin Cc: Pratyush Yadav Signed-off-by: Andrew Morton --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 30f21a90b1da..710c3acb5d9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13860,7 +13860,6 @@ M: Pratyush Yadav R: Dave Young L: kexec@lists.infradead.org S: Maintained -W: http://lse.sourceforge.net/kdump/ F: Documentation/admin-guide/kdump/ F: fs/proc/vmcore.c F: include/linux/crash_core.h -- cgit v1.2.3 From 292411fda25bdcb8cd0cb513fa5583a36dd36354 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 24 Apr 2026 19:36:38 +0100 Subject: mm/userfaultfd: detect VMA type change after copy retry in mfill_copy_folio_retry() mfill_copy_folio_retry() drops mmap_lock for the copy_from_user() call. During this window, the VMA can be replaced with a different type (e.g. hugetlb), making the caller's ops pointer stale. Subsequent use of the stale ops would dispatch into the wrong per-vma handlers. Capture the VMA's ops via vma_uffd_ops() before dropping the lock and compare against the current vma_uffd_ops() after re-acquiring it. Return -EAGAIN if they differ so the operation can be retried. This avoids comparing against the caller's ops which may have been overridden to anon_uffd_ops for MAP_PRIVATE file-backed mappings. Link: https://lore.kernel.org/20260424183638.196227-1-devnexen@gmail.com Fixes: 6ab703034f14 ("userfaultfd: mfill_atomic(): remove retry logic") Reported-by: Usama Arif Closes: https://lore.kernel.org/all/20260410114809.3592720-1-usama.arif@linux.dev/ Signed-off-by: David Carlier Acked-by: Mike Rapoport (Microsoft) Cc: Jann Horn Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Peter Xu Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 885da1e56466..180bad42fc79 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -443,8 +443,10 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr) return ret; } -static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio) +static int mfill_copy_folio_retry(struct mfill_state *state, + struct folio *folio) { + const struct vm_uffd_ops *orig_ops = vma_uffd_ops(state->vma); unsigned long src_addr = state->src_addr; void *kaddr; int err; @@ -465,6 +467,14 @@ static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio if (err) return err; + /* + * The VMA type may have changed while the lock was dropped + * (e.g. replaced with a hugetlb mapping), making the caller's + * ops pointer stale. + */ + if (vma_uffd_ops(state->vma) != orig_ops) + return -EAGAIN; + err = mfill_establish_pmd(state); if (err) return err; -- cgit v1.2.3 From e47029b977e747cb3a9174308fd55762cce70147 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 17 Apr 2026 15:24:39 +0000 Subject: mtd: spi-nor: debugfs: fix out-of-bounds read in spi_nor_params_show() Sashiko noticed an out-of-bounds read [1]. In spi_nor_params_show(), the snor_f_names array is passed to spi_nor_print_flags() using sizeof(snor_f_names). Since snor_f_names is an array of pointers, sizeof() returns the total number of bytes occupied by the pointers (element_count * sizeof(void *)) rather than the element count itself. On 64-bit systems, this makes the passed length 8x larger than intended. Inside spi_nor_print_flags(), the 'names_len' argument is used to bounds-check the 'names' array access. An out-of-bounds read occurs if a flag bit is set that exceeds the array's actual element count but is within the inflated byte-size count. Correct this by using ARRAY_SIZE() to pass the actual number of string pointers in the array. Cc: stable@vger.kernel.org Fixes: 0257be79fc4a ("mtd: spi-nor: expose internal parameters via debugfs") Closes: https://sashiko.dev/#/patchset/20260417-die-erase-fix-v2-1-73bb7004ebad%40infineon.com [1] Signed-off-by: Tudor Ambarus Reviewed-by: Takahiro Kuwano Reviewed-by: Michael Walle Reviewed-by: Pratyush Yadav Signed-off-by: Miquel Raynal --- drivers/mtd/spi-nor/debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c index fa6956144d2e..14ba1680c315 100644 --- a/drivers/mtd/spi-nor/debugfs.c +++ b/drivers/mtd/spi-nor/debugfs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -92,7 +93,8 @@ static int spi_nor_params_show(struct seq_file *s, void *data) seq_printf(s, "address nbytes\t%u\n", nor->addr_nbytes); seq_puts(s, "flags\t\t"); - spi_nor_print_flags(s, nor->flags, snor_f_names, sizeof(snor_f_names)); + spi_nor_print_flags(s, nor->flags, snor_f_names, + ARRAY_SIZE(snor_f_names)); seq_puts(s, "\n"); seq_puts(s, "\nopcodes\n"); -- cgit v1.2.3 From 5e25407b68f460142539536e31fa20338db6146f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 10 Apr 2026 19:41:03 +0200 Subject: mtd: spinand: Add support for packed read data ODTR commands Some devices stuff address bits in the double byte opcode (in place of the repeated byte) in order to be able to increase the size of the devices, without adding extra address bytes. Create a flag to identify those devices. When the flag is set, use the "packed" variant for the read data operation. Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 24 +++++++++++++++++++++--- include/linux/mtd/spinand.h | 7 +++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 8aa3753aaaa1..0b076790bd9d 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -100,6 +100,17 @@ spinand_fill_page_read_op(struct spinand_device *spinand, u64 addr) return op; } +static struct spi_mem_op +spinand_fill_page_read_packed_op(struct spinand_device *spinand, u64 addr) +{ + struct spi_mem_op op = spinand->op_templates->page_read; + + op.cmd.opcode |= addr >> 16; + op.addr.val = addr & 0xFFFF; + + return op; +} + struct spi_mem_op spinand_fill_prog_exec_op(struct spinand_device *spinand, u64 addr) { @@ -453,7 +464,10 @@ static int spinand_load_page_op(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); unsigned int row = nanddev_pos_to_row(nand, &req->pos); - struct spi_mem_op op = SPINAND_OP(spinand, page_read, row); + bool packed = spinand->flags & SPINAND_ODTR_PACKED_PAGE_READ; + struct spi_mem_op op = packed ? + SPINAND_OP(spinand, page_read_packed, row) : + SPINAND_OP(spinand, page_read, row); return spi_mem_exec_op(spinand->spimem, &op); } @@ -1489,9 +1503,13 @@ static int spinand_init_odtr_instruction_set(struct spinand_device *spinand) if (!spi_mem_supports_op(spinand->spimem, &tmpl->blk_erase)) return -EOPNOTSUPP; - tmpl->page_read = (struct spi_mem_op)SPINAND_PAGE_READ_8D_8D_0_OP(0); - if (!spi_mem_supports_op(spinand->spimem, &tmpl->page_read)) + if (spinand->flags & SPINAND_ODTR_PACKED_PAGE_READ) + tmpl->page_read = (struct spi_mem_op)SPINAND_PAGE_READ_PACKED_8D_8D_0_OP(0); + else + tmpl->page_read = (struct spi_mem_op)SPINAND_PAGE_READ_8D_8D_0_OP(0); + if (!spi_mem_supports_op(spinand->spimem, &tmpl->page_read)) { return -EOPNOTSUPP; + } tmpl->prog_exec = (struct spi_mem_op)SPINAND_PROG_EXEC_8D_8D_0_OP(0); if (!spi_mem_supports_op(spinand->spimem, &tmpl->prog_exec)) diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 58abd306ebe3..782984ba3a20 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -290,6 +290,12 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) +#define SPINAND_PAGE_READ_PACKED_8D_8D_0_OP(addr) \ + SPI_MEM_OP(SPI_MEM_DTR_OP_PACKED_CMD(0x13, addr >> 16, 8), \ + SPI_MEM_DTR_OP_ADDR(2, addr & 0xffff, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_NO_DATA) + #define SPINAND_PAGE_READ_FROM_CACHE_8D_8D_8D_OP(addr, ndummy, buf, len, freq) \ SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x9d, 8), \ SPI_MEM_DTR_OP_ADDR(2, addr, 8), \ @@ -483,6 +489,7 @@ struct spinand_ecc_info { #define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) #define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) #define SPINAND_NO_RAW_ACCESS BIT(4) +#define SPINAND_ODTR_PACKED_PAGE_READ BIT(5) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure -- cgit v1.2.3 From 8d655748aba1b603c54053a20322401dc1e5d782 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 10 Apr 2026 19:41:04 +0200 Subject: mtd: spinand: winbond: Set the packed page read flag to W35N02/04JW Both W35N02JW and W35N04JW diverge from W35N01JW when it comes to the "data read" operation in ODTR mode. In order to stuff more address bits (up to 18), the second command byte is replaced by the most significant address bits, keeping the number of address bytes to 2. Fixes: 44a2f49b9bdc ("mtd: spinand: winbond: W35N octal DTR support") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/winbond.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index ad22774096e6..ea62fecea661 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -518,7 +518,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, &update_cache_octal_variants), - 0, + SPINAND_ODTR_PACKED_PAGE_READ, SPINAND_INFO_VENDOR_OPS(&winbond_w35_ops), SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL), SPINAND_CONFIGURE_CHIP(w35n0xjw_vcr_cfg)), @@ -529,7 +529,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, &update_cache_octal_variants), - 0, + SPINAND_ODTR_PACKED_PAGE_READ, SPINAND_INFO_VENDOR_OPS(&winbond_w35_ops), SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL), SPINAND_CONFIGURE_CHIP(w35n0xjw_vcr_cfg)), -- cgit v1.2.3 From 135ac3b84bcedae1860e7a9512d63166f42b736e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 10 Apr 2026 19:41:05 +0200 Subject: mtd: spinand: winbond: Fix ODTR write VCR on W35NxxJW In most scenarios this variant is actually unused (VCR is written in SSDR mode), but we need to provide an octal variant. The address is 24 bits but is sent over 4 bytes MSB first. This means we need to shift the register address by one extra byte for the address to be correct. I didn't catch this initially because the volatile register region is 256 bytes wide, so the write-then-read procedure did work with the small register addresses I was using at that time: 0 and 1. Fixes: 44a2f49b9bdc ("mtd: spinand: winbond: W35N octal DTR support") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/winbond.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index ea62fecea661..7cc0f0091430 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -99,7 +99,7 @@ static SPINAND_OP_VARIANTS(update_cache_variants, #define SPINAND_WINBOND_WRITE_VCR_8D_8D_8D(reg, buf) \ SPI_MEM_OP(SPI_MEM_DTR_OP_RPT_CMD(0x81, 8), \ - SPI_MEM_DTR_OP_ADDR(4, reg, 8), \ + SPI_MEM_DTR_OP_ADDR(4, reg << 8, 8), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_DTR_OP_DATA_OUT(2, buf, 8)) -- cgit v1.2.3 From 5dfd429591f8d7185bf63a08b5c30863fb605611 Mon Sep 17 00:00:00 2001 From: Brajesh Gupta Date: Mon, 27 Apr 2026 11:01:37 +0530 Subject: drm/imagination: Fix segfault when updating ftrace mask Fix invalid data access by passing right data for debugfs entry. [ 171.549793] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 171.559248] Mem abort info: [ 171.562173] ESR = 0x0000000096000044 [ 171.566227] EC = 0x25: DABT (current EL), IL = 32 bits [ 171.573108] SET = 0, FnV = 0 [ 171.576448] EA = 0, S1PTW = 0 [ 171.579745] FSC = 0x04: level 0 translation fault [ 171.584760] Data abort info: [ 171.588012] ISV = 0, ISS = 0x00000044, ISS2 = 0x00000000 [ 171.593734] CM = 0, WnR = 1, TnD = 0, TagAccess = 0 [ 171.598962] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 171.604471] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000083837000 [ 171.611358] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 [ 171.618500] Internal error: Oops: 0000000096000044 [#1] SMP [ 171.624222] Modules linked in: powervr drm_shmem_helper drm_gpuvm... [ 171.656580] CPU: 0 UID: 0 PID: 549 Comm: bash Not tainted 7.0.0-rc2-g730b257ba723-dirty #13 PREEMPT [ 171.665773] Hardware name: BeagleBoard.org BeaglePlay (DT) [ 171.671296] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 171.678306] pc : pvr_fw_trace_mask_set+0x78/0x154 [powervr] [ 171.683959] lr : pvr_fw_trace_mask_set+0x4c/0x154 [powervr] [ 171.689593] sp : ffff8000835ebb90 [ 171.692929] x29: ffff8000835ebc00 x28: ffff000005c60f80 x27: 0000000000000000 [ 171.700130] x26: 0000000000000000 x25: ffff00000504af28 x24: 0000000000000000 [ 171.707324] x23: ffff00000504af50 x22: 0000000000000203 x21: 0000000000000000 [ 171.714518] x20: ffff000005c44a80 x19: ffff000005c457b8 x18: 0000000000000000 [ 171.721715] x17: 0000000000000000 x16: 0000000000000000 x15: 0000aaaae8887580 [ 171.728908] x14: 0000000000000000 x13: 0000000000000000 x12: ffff8000835ebc30 [ 171.736095] x11: ffff00000504af2a x10: ffff00008504af29 x9 : 0fffffffffffffff [ 171.743286] x8 : ffff8000835ebbf8 x7 : 0000000000000000 x6 : 000000000000002a [ 171.750479] x5 : ffff00000504af2e x4 : 0000000000000000 x3 : 0000000000000010 [ 171.757674] x2 : 0000000000000203 x1 : 0000000000000000 x0 : ffff8000835ebba0 [ 171.764871] Call trace: [ 171.767342] pvr_fw_trace_mask_set+0x78/0x154 [powervr] (P) [ 171.772984] simple_attr_write_xsigned.isra.0+0xe0/0x19c [ 171.778341] simple_attr_write+0x18/0x24 [ 171.782296] debugfs_attr_write+0x50/0x98 [ 171.786341] full_proxy_write+0x6c/0xa8 [ 171.790208] vfs_write+0xd4/0x350 [ 171.793561] ksys_write+0x70/0x108 [ 171.796995] __arm64_sys_write+0x1c/0x28 [ 171.800952] invoke_syscall+0x48/0x10c [ 171.804740] el0_svc_common.constprop.0+0x40/0xe0 [ 171.809487] do_el0_svc+0x1c/0x28 [ 171.812834] el0_svc+0x34/0x108 [ 171.816013] el0t_64_sync_handler+0xa0/0xe4 [ 171.820237] el0t_64_sync+0x198/0x19c [ 171.823939] Code: 32000262 b90ac293 1a931056 9134e293 (b9000036) [ 171.830073] ---[ end trace 0000000000000000 ]--- Fixes: a331631496a0 ("drm/imagination: Simplify module parameters") Signed-off-by: Brajesh Gupta Reviewed-by: Alessio Belle Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260427-ftrace_fix-v3-1-e081530759a8@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_fw_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index e154cb35f604..6193811ef7be 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -558,6 +558,6 @@ pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir) &pvr_fw_trace_fops); } - debugfs_create_file("trace_mask", 0600, dir, fw_trace, + debugfs_create_file("trace_mask", 0600, dir, pvr_dev, &pvr_fw_trace_mask_fops); } -- cgit v1.2.3 From b5198fcdc195fa531adff7bbfbe40dd27c8d0e89 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sun, 26 Apr 2026 13:02:31 +0900 Subject: ntfs: fix NULL dereference in ntfs_index_walk_down() ntfs_index_walk_down() allocates ictx->ib when descending from the root into an index allocation block. If that allocation fails, the old code still passes the NULL buffer to ntfs_ib_read(), which can write through it via ntfs_inode_attr_pread(). Allocate the index block into a temporary pointer and return -ENOMEM before changing the index context on allocation failure. Also propagate ERR_PTR() through ntfs_index_next() and ntfs_readdir() so walk-down allocation or index block read failures are not mistaken for normal index iteration inside the filesystem. ntfs_readdir() keeps the existing userspace-visible behavior of suppressing readdir errors after marking end_in_iterate; this change only prevents the walk-down failure path from dereferencing NULL internally. The failure was reproduced with failslab fail-nth injection on getdents64; the original module hits a NULL pointer dereference in memcpy_orig through ntfs_ib_read(), while the patched module reaches the same ntfs_index_walk_down() allocation failure without crashing. Fixes: 0a8ac0c1fa0b ("ntfs: update directory operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/dir.c | 13 ++++++++++--- fs/ntfs/index.c | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index bfa904d2ce66..20f5c7074bdd 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -911,8 +911,8 @@ static int ntfs_readdir(struct file *file, struct dir_context *actor) if (next->flags & INDEX_ENTRY_NODE) { next = ntfs_index_walk_down(next, ictx); - if (!next) { - err = -EIO; + if (IS_ERR(next)) { + err = PTR_ERR(next); goto out; } } @@ -920,7 +920,14 @@ static int ntfs_readdir(struct file *file, struct dir_context *actor) if (next && !(next->flags & INDEX_ENTRY_END)) goto nextdir; - while ((next = ntfs_index_next(next, ictx)) != NULL) { + while (1) { + next = ntfs_index_next(next, ictx); + if (IS_ERR(next)) { + err = PTR_ERR(next); + goto out; + } + if (!next) + break; nextdir: /* Check the consistency of an index entry */ if (ntfs_index_entry_inconsistent(ictx, vol, next, COLLATION_FILE_NAME, diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 2080f3969137..a547bdcfa456 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -1969,15 +1969,19 @@ err_out: struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_index_context *ictx) { struct index_entry *entry; + struct index_block *ib; s64 vcn; entry = ie; do { vcn = ntfs_ie_get_vcn(entry); if (ictx->is_in_root) { + ib = kvzalloc(ictx->block_size, GFP_NOFS); + if (!ib) + return ERR_PTR(-ENOMEM); /* down from level zero */ ictx->ir = NULL; - ictx->ib = kvzalloc(ictx->block_size, GFP_NOFS); + ictx->ib = ib; ictx->pindex = 1; ictx->is_in_root = false; } else { @@ -1991,8 +1995,8 @@ struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_ind ictx->entry = ntfs_ie_get_first(&ictx->ib->index); entry = ictx->entry; } else - entry = NULL; - } while (entry && (entry->flags & INDEX_ENTRY_NODE)); + entry = ERR_PTR(-EIO); + } while (!IS_ERR(entry) && (entry->flags & INDEX_ENTRY_NODE)); return entry; } @@ -2097,10 +2101,15 @@ struct index_entry *ntfs_index_next(struct index_entry *ie, struct ntfs_index_co /* walk down if it has a subnode */ if (flags & INDEX_ENTRY_NODE) { - if (!ictx->ia_ni) + if (!ictx->ia_ni) { ictx->ia_ni = ntfs_ia_open(ictx, ictx->idx_ni); + if (!ictx->ia_ni) + return ERR_PTR(-EIO); + } next = ntfs_index_walk_down(next, ictx); + if (IS_ERR(next)) + return next; } else { /* walk up it has no subnode, nor data */ -- cgit v1.2.3 From 2dd8c1662e38f7bb68a102f1acad9b518c09aeab Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sun, 26 Apr 2026 13:02:32 +0900 Subject: ntfs: fix WSL symlink target leak on reparse failure ntfs_reparse_set_wsl_symlink() converts the symlink target into an allocated NLS string and transfers ownership to ni->target only after ntfs_set_ntfs_reparse_data() succeeds. If setting the reparse data fails, the converted target is left unreferenced and leaks. Free the converted target on the reparse update failure path. Use kfree() for the other local failure path as well, matching the ntfs_ucstonls() allocation contract. Fixes: fc053f05ca28 ("ntfs: add reparse and ea operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/reparse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ntfs/reparse.c b/fs/ntfs/reparse.c index 8f60ec6f66c1..74713716813f 100644 --- a/fs/ntfs/reparse.c +++ b/fs/ntfs/reparse.c @@ -505,7 +505,6 @@ int ntfs_reparse_set_wsl_symlink(struct ntfs_inode *ni, struct reparse_point *reparse; struct wsl_link_reparse_data *data; - utarget = (char *)NULL; len = ntfs_ucstonls(ni->vol, target, target_len, &utarget, 0); if (len <= 0) return -EINVAL; @@ -514,7 +513,7 @@ int ntfs_reparse_set_wsl_symlink(struct ntfs_inode *ni, reparse = kvzalloc(reparse_len, GFP_NOFS); if (!reparse) { err = -ENOMEM; - kvfree(utarget); + kfree(utarget); } else { data = (struct wsl_link_reparse_data *)reparse->reparse_data; reparse->reparse_tag = IO_REPARSE_TAG_LX_SYMLINK; @@ -528,6 +527,8 @@ int ntfs_reparse_set_wsl_symlink(struct ntfs_inode *ni, kvfree(reparse); if (!err) ni->target = utarget; + else + kfree(utarget); } return err; } -- cgit v1.2.3 From cad7c6f0a5147680dd2081256cf8da54fb445d94 Mon Sep 17 00:00:00 2001 From: Zhan Xusheng Date: Thu, 23 Apr 2026 12:52:26 +0800 Subject: ntfs: fix VCN overflow in ntfs_mapping_pairs_decompress() In ntfs_mapping_pairs_decompress(), lowest_vcn is read from on-disk metadata and used as the initial vcn without validation. A malformed value can introduce an invalid (e.g. negative) vcn, corrupting the runlist from the start. Additionally, the accumulation vcn += deltaxcn does not check for s64 overflow. A crafted mapping pairs array can wrap vcn to a negative value, breaking the monotonically- increasing invariant relied upon by ntfs_rl_vcn_to_lcn() and related helpers. Fix this by validating lowest_vcn and using check_add_overflow() for vcn accumulation. Signed-off-by: Zhan Xusheng Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/runlist.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index b213b4976d2b..be6ca3d374bb 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -15,6 +15,8 @@ * Copyright (c) 2007-2022 Jean-Pierre Andre */ +#include + #include "ntfs.h" #include "attrib.h" @@ -739,6 +741,7 @@ struct runlist_element *ntfs_mapping_pairs_decompress(const struct ntfs_volume * int rlsize; /* Size of runlist buffer. */ u16 rlpos; /* Current runlist position in units of struct runlist_elements. */ u8 b; /* Current byte offset in buf. */ + u64 lowest_vcn; /* Raw on-disk lowest_vcn. */ #ifdef DEBUG /* Make sure attr exists and is non-resident. */ @@ -747,8 +750,14 @@ struct runlist_element *ntfs_mapping_pairs_decompress(const struct ntfs_volume * return ERR_PTR(-EINVAL); } #endif + lowest_vcn = le64_to_cpu(attr->data.non_resident.lowest_vcn); + /* Validate lowest_vcn from on-disk metadata to ensure it is sane. */ + if (overflows_type(lowest_vcn, vcn)) { + ntfs_error(vol->sb, "Invalid lowest_vcn in mapping pairs."); + goto err_out; + } /* Start at vcn = lowest_vcn and lcn 0. */ - vcn = le64_to_cpu(attr->data.non_resident.lowest_vcn); + vcn = lowest_vcn; lcn = 0; /* Get start of the mapping pairs array. */ buf = (u8 *)attr + @@ -823,8 +832,17 @@ struct runlist_element *ntfs_mapping_pairs_decompress(const struct ntfs_volume * * element. */ rl[rlpos].length = deltaxcn; - /* Increment the current vcn by the current run length. */ - vcn += deltaxcn; + /* + * Increment the current vcn by the current run length. + * Guard against s64 overflow from a crafted mapping + * pairs array to preserve the monotonically-increasing + * vcn invariant. + */ + if (unlikely(check_add_overflow(vcn, deltaxcn, &vcn))) { + ntfs_error(vol->sb, "VCN overflow in mapping pairs array."); + goto err_out; + } + /* * There might be no lcn change at all, as is the case for * sparse clusters on NTFS 3.0+, in which case we set the lcn -- cgit v1.2.3 From 785bc568161d96fdbd4326294d427a48e66fe60f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 27 Apr 2026 22:58:52 +0900 Subject: ntfs: fix error handling in ntfs_write_iomap_end_resident() When ntfs_attr_get_search_ctx() fails and returns NULL, the function returned early without calling put_page(ipage). Fix this by jumping to err_out label on error. The err_out path now properly releases the page and the mutex, with a NULL check for the search context. Reported-by: DaeMyung Kang Signed-off-by: Namjae Jeon --- fs/ntfs/iomap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ntfs/iomap.c b/fs/ntfs/iomap.c index 74a4d3e971f4..dc7d8c893a69 100644 --- a/fs/ntfs/iomap.c +++ b/fs/ntfs/iomap.c @@ -788,8 +788,7 @@ static int ntfs_write_iomap_end_resident(struct inode *inode, loff_t pos, ctx = ntfs_attr_get_search_ctx(ni, NULL); if (!ctx) { written = -ENOMEM; - mutex_unlock(&ni->mrec_lock); - return written; + goto err_out; } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, @@ -810,7 +809,8 @@ static int ntfs_write_iomap_end_resident(struct inode *inode, loff_t pos, memcpy(kattr + pos, iomap_inline_data(iomap, pos), written); mark_mft_record_dirty(ctx->ntfs_ino); err_out: - ntfs_attr_put_search_ctx(ctx); + if (ctx) + ntfs_attr_put_search_ctx(ctx); put_page(ipage); mutex_unlock(&ni->mrec_lock); return written; -- cgit v1.2.3 From 3f91484f6c13c434bd573ca6b6779c26adb0ddab Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 13 Apr 2026 21:49:12 +0300 Subject: USB: omap_udc: DMA: Don't enable burst 4 mode Commit 65111084c63d7 ("USB: more omap_udc updates (dma and omap1710)") added setting for DMA burst 4 mode. But I think this should be undone for two reasons: - It breaks DMA on 15xx boards - transfers just silently stall. - On newer OMAP1 boards, like Nokia 770 (omap1710), there is no measurable performance impact when testing TCP throughput with g_ether with large 15000 byte MTU size. It's also worth noting that when the original change was made, the OMAP_DMA_DATA_BURST_4 handling in arch/arm/plat-omap/dma.c was broken, and actually resulted in the same as the OMAP_DMA_DATA_BURST_DIS i.e. burst disabled. This was fixed not until a couple kernel releases later in an unrelated commit 1a8bfa1eb998a ("[ARM] 3142/1: OMAP 2/5: Update files common to omap1 and omap2"). So based on this it seems there was never really a very good reason to enable this burst mode in omap_udc, so remove it now to allow 15xx DMA to work again (it provides 2x throughput compared to PIO mode). Fixes: 65111084c63d ("[PATCH] USB: more omap_udc updates (dma and omap1710)") Cc: stable Signed-off-by: Aaro Koskinen Link: https://patch.msgid.link/ad06qHLclWHeSGnV@darkstar.musicnaut.iki.fi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/omap_udc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 91139ae668f4..f3ca79cece1b 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -733,8 +733,6 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) if (status == 0) { omap_writew(reg, UDC_TXDMA_CFG); /* EMIFF or SDRC */ - omap_set_dma_src_burst_mode(ep->lch, - OMAP_DMA_DATA_BURST_4); omap_set_dma_src_data_pack(ep->lch, 1); /* TIPB */ omap_set_dma_dest_params(ep->lch, @@ -756,8 +754,6 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel) UDC_DATA_DMA, 0, 0); /* EMIFF or SDRC */ - omap_set_dma_dest_burst_mode(ep->lch, - OMAP_DMA_DATA_BURST_4); omap_set_dma_dest_data_pack(ep->lch, 1); } } -- cgit v1.2.3 From 0b9fcab1b8608d429e5f239afb197de928d4de7d Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 7 Apr 2026 21:21:22 +0800 Subject: usb: ulpi: fix memory leak on ulpi_register() error paths Commit 01af542392b5 ("usb: ulpi: fix double free in ulpi_register_interface() error path") removed kfree(ulpi) from ulpi_register_interface() to fix a double-free when device_register() fails. But when ulpi_of_register() or ulpi_read_id() fail before device_register() is called, the ulpi allocation is leaked. Add kfree(ulpi) on both error paths to properly clean up the allocation. Fixes: 01af542392b5 ("usb: ulpi: fix double free in ulpi_register_interface() error path") Cc: stable Signed-off-by: Felix Gu Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/20260407-ulpi-v1-1-f3fafe53f7b2@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index b34fb65813c4..9b69148128e5 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -286,12 +286,15 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) ACPI_COMPANION_SET(&ulpi->dev, ACPI_COMPANION(dev)); ret = ulpi_of_register(ulpi); - if (ret) + if (ret) { + kfree(ulpi); return ret; + } ret = ulpi_read_id(ulpi); if (ret) { of_node_put(ulpi->dev.of_node); + kfree(ulpi); return ret; } -- cgit v1.2.3 From 2909f0d4994fb4306bf116df5ccee797791fce2c Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Tue, 14 Apr 2026 00:58:32 +0000 Subject: usb: typec: tcpm: reset internal port states on soft reset AMS Reset internal port states (such as vdm_sm_running and explicit_contract) on soft reset AMS as the port needs to negotiate a new contract. The consequence of leaving the states in as-is cond are as follows: * port is in SRC power role and an explicit contract is negotiated with the port partner (in sink role) * port partner sends a Soft Reset AMS while VDM State Machine is running * port accepts the Soft Reset request and the port advertises src caps * port partner sends a Request message but since the explicit_contract and vdm_sm_running are true from previous negotiation, the port ends up sending Soft Reset instead of Accept msg. Stub Log: [ 203.653942] AMS DISCOVER_IDENTITY start [ 203.653947] PD TX, header: 0x176f [ 203.655901] PD TX complete, status: 0 [ 203.657470] PD RX, header: 0x124f [1] [ 203.657477] Rx VDM cmd 0xff008081 type 2 cmd 1 len 1 [ 203.657482] AMS DISCOVER_IDENTITY finished [ 203.657484] cc:=4 [ 204.155698] PD RX, header: 0x144f [1] [ 204.155718] Rx VDM cmd 0xeeee8001 type 0 cmd 1 len 1 [ 204.155741] PD TX, header: 0x196f [ 204.157622] PD TX complete, status: 0 [ 204.160060] PD RX, header: 0x4d [1] [ 204.160066] state change SRC_READY -> SOFT_RESET [rev2 SOFT_RESET_AMS] [ 204.160076] PD TX, header: 0x163 [ 204.162486] PD TX complete, status: 0 [ 204.162832] AMS SOFT_RESET_AMS finished [ 204.162840] cc:=4 [ 204.162891] AMS POWER_NEGOTIATION start [ 204.162896] state change SOFT_RESET -> AMS_START [rev2 POWER_NEGOTIATION] [ 204.162908] state change AMS_START -> SRC_SEND_CAPABILITIES [rev2 POWER_NEGOTIATION] [ 204.162913] PD TX, header: 0x1361 [ 204.165529] PD TX complete, status: 0 [ 204.165571] pending state change SRC_SEND_CAPABILITIES -> SRC_SEND_CAPABILITIES_TIMEOUT @ 60 ms [rev2 POWER_NEGOTIATION] [ 204.166996] PD RX, header: 0x1242 [1] [ 204.167009] state change SRC_SEND_CAPABILITIES -> SRC_SOFT_RESET_WAIT_SNK_TX [rev2 POWER_NEGOTIATION] [ 204.167019] AMS POWER_NEGOTIATION finished [ 204.167020] cc:=4 [ 204.167083] AMS SOFT_RESET_AMS start [ 204.167086] state change SRC_SOFT_RESET_WAIT_SNK_TX -> SOFT_RESET_SEND [rev2 SOFT_RESET_AMS] [ 204.167092] PD TX, header: 0x16d [ 204.168824] PD TX complete, status: 0 [ 204.168854] pending state change SOFT_RESET_SEND -> HARD_RESET_SEND @ 60 ms [rev2 SOFT_RESET_AMS] [ 204.171876] PD RX, header: 0x43 [1] [ 204.171879] AMS SOFT_RESET_AMS finished This causes COMMON.PROC.PD.11.2 check failure for TEST.PD.VDM.SRC.2_Rev2Src test on the PD compliance tester. Signed-off-by: Amit Sunil Dhamne Fixes: 8d3a0578ad1a ("usb: typec: tcpm: Respond Wait if VDM state machine is running") Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Cc: stable Reviewed-by: Badhri Jagan Sridharan Acked-by: Heikki Krogerus Link: https://patch.msgid.link/20260414-fix-soft-reset-v1-1-01d7cb9764e2@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index dfbb94ddc98a..cd5560d6f19e 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5935,6 +5935,8 @@ static void run_state_machine(struct tcpm_port *port) /* remove existing capabilities */ tcpm_partner_source_caps_reset(port); tcpm_pd_send_control(port, PD_CTRL_ACCEPT, TCPC_TX_SOP); + port->vdm_sm_running = false; + port->explicit_contract = false; tcpm_ams_finish(port); if (port->pwr_role == TYPEC_SOURCE) { port->upcoming_state = SRC_SEND_CAPABILITIES; -- cgit v1.2.3 From f6ec9bb4acc7182b25a793ad094a764e1cb819a7 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 24 Apr 2026 15:40:09 +0800 Subject: usb: typec: tcpm: fix debug accessory mode detection for sink ports The port in debug accessory mode can be either a source or sink. The previous tcpm_port_is_debug() function only checked for source port. Commit 8db73e6a42b6 ("usb: typec: tcpm: allow sink (ufp) to toggle into accessory mode debug") changed the detection logic to support both roles, but left some logic in _tcpm_cc_change() unchanged, This causes the state machine to transition to an incorrect state when operating as a sink in debug accessory mode. Log as below: [ 978.637541] CC1: 0 -> 5, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 978.637567] state change TOGGLING -> SRC_ATTACH_WAIT [rev1 NONE_AMS] [ 978.637596] pending state change SRC_ATTACH_WAIT -> DEBUG_ACC_ATTACHED @ 180 ms [rev1 NONE_AMS] [ 978.647098] CC1: 5 -> 0, CC2: 5 -> 5 [state SRC_ATTACH_WAIT, polarity 0, connected] [ 978.647115] state change SRC_ATTACH_WAIT -> SRC_ATTACH_WAIT [rev1 NONE_AMS] It should go to SNK_ATTACH_WAIT instead of SRC_ATTACH_WAIT state. To fix this, add tcpm_port_is_debug_source() and tcpm_port_is_debug_sink() helper to explicitly identify the power mode in debug accessory mode. Update the state transition logic in _tcpm_cc_change() to ensure the state machine transitions comply with Type-C specification. Also update the logic in run_state_machine() to keep consistency. Fixes: 8db73e6a42b6 ("usb: typec: tcpm: allow sink (ufp) to toggle into accessory mode debug") Cc: stable Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Reviewed-by: Amit Sunil Dhamne Link: https://patch.msgid.link/20260424074009.2979266-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index cd5560d6f19e..55fee96d3342 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -732,9 +732,14 @@ static const char * const pd_rev[] = { (tcpm_cc_is_source((port)->cc2) && \ !tcpm_cc_is_source((port)->cc1))) +#define tcpm_port_is_debug_source(port) \ + (tcpm_cc_is_source((port)->cc1) && tcpm_cc_is_source((port)->cc2)) + +#define tcpm_port_is_debug_sink(port) \ + (tcpm_cc_is_sink((port)->cc1) && tcpm_cc_is_sink((port)->cc2)) + #define tcpm_port_is_debug(port) \ - ((tcpm_cc_is_source((port)->cc1) && tcpm_cc_is_source((port)->cc2)) || \ - (tcpm_cc_is_sink((port)->cc1) && tcpm_cc_is_sink((port)->cc2))) + (tcpm_port_is_debug_source(port) || tcpm_port_is_debug_sink(port)) #define tcpm_port_is_audio(port) \ (tcpm_cc_is_audio((port)->cc1) && tcpm_cc_is_audio((port)->cc2)) @@ -5176,7 +5181,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SNK_UNATTACHED, PD_T_DRP_SNK); break; case SRC_ATTACH_WAIT: - if (tcpm_port_is_debug(port)) + if (tcpm_port_is_debug_source(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, port->timings.cc_debounce_time); else if (tcpm_port_is_audio(port)) @@ -5434,7 +5439,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SRC_UNATTACHED, PD_T_DRP_SRC); break; case SNK_ATTACH_WAIT: - if (tcpm_port_is_debug(port)) + if (tcpm_port_is_debug_sink(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, PD_T_CC_DEBOUNCE); else if (tcpm_port_is_audio(port)) @@ -5454,7 +5459,7 @@ static void run_state_machine(struct tcpm_port *port) if (tcpm_port_is_disconnected(port)) tcpm_set_state(port, SNK_UNATTACHED, PD_T_PD_DEBOUNCE); - else if (tcpm_port_is_debug(port)) + else if (tcpm_port_is_debug_sink(port)) tcpm_set_state(port, DEBUG_ACC_ATTACHED, PD_T_CC_DEBOUNCE); else if (tcpm_port_is_audio(port)) @@ -6362,10 +6367,10 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, switch (port->state) { case TOGGLING: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_source(port) || tcpm_port_is_audio(port) || tcpm_port_is_source(port)) tcpm_set_state(port, SRC_ATTACH_WAIT, 0); - else if (tcpm_port_is_sink(port)) + else if (tcpm_port_is_debug_sink(port) || tcpm_port_is_sink(port)) tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; case CHECK_CONTAMINANT: @@ -6373,9 +6378,11 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, break; case SRC_UNATTACHED: case ACC_UNATTACHED: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_source(port) || tcpm_port_is_audio(port) || tcpm_port_is_source(port)) tcpm_set_state(port, SRC_ATTACH_WAIT, 0); + else if (tcpm_port_is_debug_sink(port)) + tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; case SRC_ATTACH_WAIT: if (tcpm_port_is_disconnected(port) || @@ -6397,7 +6404,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, } break; case SNK_UNATTACHED: - if (tcpm_port_is_debug(port) || tcpm_port_is_audio(port) || + if (tcpm_port_is_debug_sink(port) || tcpm_port_is_audio(port) || tcpm_port_is_sink(port)) tcpm_set_state(port, SNK_ATTACH_WAIT, 0); break; -- cgit v1.2.3 From aad35f9c926ec220b0742af1ada45666ae667956 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Fri, 17 Apr 2026 12:03:11 +0530 Subject: usb: dwc3: Move GUID programming after PHY initialization The Linux Version Code is currently written to the GUID register before PHY initialization. Certain PHY implementations (such as Synopsys eUSB PHY performing link_sw_reset) clear the GUID register to its default value during initialization, causing the kernel version information to be lost. Move the GUID register programming to occur after PHY initialization completes to ensure the Linux version information persists. Fixes: fa0ea13e9f1c ("usb: dwc3: core: write LINUX_VERSION_CODE to our GUID register") Cc: stable Reported-by: Pritam Manohar Sutar Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Link: https://patch.msgid.link/20260417063314.2359-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 58899b1fa96d..65213896de99 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1359,12 +1359,6 @@ int dwc3_core_init(struct dwc3 *dwc) hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); - /* - * Write Linux Version Code to our GUID register so it's easy to figure - * out which kernel version a bug was found. - */ - dwc3_writel(dwc, DWC3_GUID, LINUX_VERSION_CODE); - ret = dwc3_phy_setup(dwc); if (ret) return ret; @@ -1398,6 +1392,12 @@ int dwc3_core_init(struct dwc3 *dwc) if (ret) goto err_exit_phy; + /* + * Write Linux Version Code to our GUID register so it's easy to figure + * out which kernel version a bug was found. + */ + dwc3_writel(dwc, DWC3_GUID, LINUX_VERSION_CODE); + dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); -- cgit v1.2.3 From 7a400c6fe3617e31e690e3f7ca37bb335e0498f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Apr 2026 18:11:03 +0200 Subject: usb: usblp: fix heap leak in IEEE 1284 device ID via short response usblp_ctrl_msg() collapses the usb_control_msg() return value to 0/-errno, discarding the actual number of bytes transferred. A broken printer can complete the GET_DEVICE_ID control transfer short and the driver has no way to know. usblp_cache_device_id_string() reads the 2-byte big-endian length prefix from the response and trusts it (clamped only to the buffer bounds). The buffer is kmalloc(1024) at probe time. A device that sends exactly two bytes (e.g. 0x03 0xFF, claiming a 1023-byte ID) leaves device_id_string[2..1022] holding stale kmalloc heap. That stale data is then exposed: - via the ieee1284_id sysfs attribute (sprintf("%s", buf+2), truncated at the first NUL in the stale heap), and - via the IOCNR_GET_DEVICE_ID ioctl, which copy_to_user()s the full claimed length regardless of NULs, up to 1021 bytes of uninitialized heap, with the leak size chosen by the device. Fix this up by just zapping the buffer with zeros before each request sent to the device. Cc: Pete Zaitcev Assisted-by: gkh_clanker_t1000 Cc: stable Link: https://patch.msgid.link/2026042002-unicorn-greedily-3c63@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 669b9e6879bf..e9b848622a3a 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1377,6 +1377,7 @@ static int usblp_cache_device_id_string(struct usblp *usblp) { int err, length; + memset(usblp->device_id_string, 0, USBLP_DEVICE_ID_SIZE); err = usblp_get_id(usblp, 0, usblp->device_id_string, USBLP_DEVICE_ID_SIZE - 1); if (err < 0) { dev_dbg(&usblp->intf->dev, -- cgit v1.2.3 From b38e53cbfb9d84732e5984fbd73e128d592415c5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Apr 2026 18:11:04 +0200 Subject: usb: usblp: fix uninitialized heap leak via LPGETSTATUS ioctl Just like in a previous problem in this driver, usblp_ctrl_msg() will collapse the usb_control_msg() return value to 0/-errno, discarding the actual number of bytes transferred. Ideally that short command should be detected and error out, but many printers are known to send "incorrect" responses back so we can't just do that. statusbuf is kmalloc(8) at probe time and never filled before the first LPGETSTATUS ioctl. usblp_read_status() requests 1 byte. If a malicious printer responds with zero bytes, *statusbuf is one byte of stale kmalloc heap, sign-extended into the local int status, which the LPGETSTATUS path then copy_to_user()s directly to the ioctl caller. Fix this all by just zapping out the memory buffer when allocated at probe time. If a later call does a short read, the data will be identical to what the device sent it the last time, so there is no "leak" of information happening. Cc: Pete Zaitcev Assisted-by: gkh_clanker_t1000 Cc: stable Link: https://patch.msgid.link/2026042011-shredder-savage-48c6@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index e9b848622a3a..746414763da5 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1178,7 +1178,7 @@ static int usblp_probe(struct usb_interface *intf, } /* Allocate buffer for printer status */ - usblp->statusbuf = kmalloc(STATUS_BUF_SIZE, GFP_KERNEL); + usblp->statusbuf = kzalloc(STATUS_BUF_SIZE, GFP_KERNEL); if (!usblp->statusbuf) { retval = -ENOMEM; goto abort; -- cgit v1.2.3 From 74f192205c48333de054620a79d7ce9f4515fb0b Mon Sep 17 00:00:00 2001 From: Sarthak Sharma Date: Mon, 27 Apr 2026 16:54:47 +0530 Subject: selftests: kselftest: fix wrong test number in ksft_exit_skip ksft_exit_skip() increments ksft_xskip before printing the KTAP result. As a result, ksft_test_num() already includes the skipped test. Adding 1 to ksft_test_num() increments the printed test number again, producing an incorrect test number and wrong KTAP output. Drop the extra increment and print ksft_test_num() directly. Link: https://lore.kernel.org/r/20260427112447.147985-1-sarthak.sharma@arm.com Fixes: b85d387c9b09 ("kselftest: fix TAP output for skipped tests") Signed-off-by: Sarthak Sharma Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 6d809f08ab7b..60838b61a2da 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -450,7 +450,7 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ... */ if (ksft_plan || ksft_test_num()) { ksft_cnt.ksft_xskip++; - printf("ok %u # SKIP ", 1 + ksft_test_num()); + printf("ok %u # SKIP ", ksft_test_num()); } else { printf("1..0 # SKIP "); } -- cgit v1.2.3 From 465b05bae5ac553c13315681c1490dc565337771 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 22 Apr 2026 14:32:33 +0200 Subject: selftests: harness: Restore order of test functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent addition of explicit constructor orders for fixture tests broke the ordering of those relative to non-fixture tests and the reverse-constructor-order detection. Restore the ordering of the test functions relative to each other by using the same explicit test order for all test registrations and __constructor_order_first(). Rename the constant, as it is not specific to TEST_F() anymore. Link: https://lore.kernel.org/r/20260422-kselftests-harness-order-v2-1-93ea980ea3ac@linutronix.de Fixes: 6be268151426 ("selftests/harness: order TEST_F and XFAIL_ADD constructors") Signed-off-by: Thomas Weißschuh Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/kselftest_harness.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 75fb016cd190..cfdce9cd252e 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -76,7 +76,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) memset(s, c, n); } -#define KSELFTEST_PRIO_TEST_F 20000 +#define KSELFTEST_PRIO_TEST 20000 #define KSELFTEST_PRIO_XFAIL 20001 #define TEST_TIMEOUT_DEFAULT 30 @@ -194,7 +194,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) .fixture = &_fixture_global, \ .termsig = _signal, \ .timeout = TEST_TIMEOUT_DEFAULT, }; \ - static void __attribute__((constructor)) _register_##test_name(void) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) _register_##test_name(void) \ { \ __register_test(&_##test_name##_object); \ } \ @@ -238,7 +238,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) FIXTURE_VARIANT(fixture_name); \ static struct __fixture_metadata _##fixture_name##_fixture_object = \ { .name = #fixture_name, }; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) \ _register_##fixture_name##_data(void) \ { \ __register_fixture(&_##fixture_name##_fixture_object); \ @@ -364,7 +364,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) _##fixture_name##_##variant_name##_object = \ { .name = #variant_name, \ .data = &_##fixture_name##_##variant_name##_variant}; \ - static void __attribute__((constructor)) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) \ _register_##fixture_name##_##variant_name(void) \ { \ __register_fixture_variant(&_##fixture_name##_fixture_object, \ @@ -468,7 +468,7 @@ static inline void __kselftest_memset_safe(void *s, int c, size_t n) fixture_name##_teardown(_metadata, self, variant); \ } \ static struct __test_metadata *_##fixture_name##_##test_name##_object; \ - static void __attribute__((constructor(KSELFTEST_PRIO_TEST_F))) \ + static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) \ _register_##fixture_name##_##test_name(void) \ { \ struct __test_metadata *object = mmap(NULL, sizeof(*object), \ @@ -1323,7 +1323,7 @@ static int test_harness_run(int argc, char **argv) return KSFT_FAIL; } -static void __attribute__((constructor)) __constructor_order_first(void) +static void __attribute__((constructor(KSELFTEST_PRIO_TEST))) __constructor_order_first(void) { __constructor_order_forward = true; } -- cgit v1.2.3 From 981cd338614c96070cf9854679014fd027c1fb1d Mon Sep 17 00:00:00 2001 From: Petr Vaněk Date: Sat, 25 Apr 2026 10:03:54 +0200 Subject: docs: cgroup: fix typo 'protetion' -> 'protection' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a small typo in the description of the memory_hugetlb_accounting mount option. Signed-off-by: Petr Vaněk Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 8ad0b2781317..6efd0095ed99 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -220,7 +220,7 @@ cgroup v2 currently supports the following mount options. memory_hugetlb_accounting Count HugeTLB memory usage towards the cgroup's overall memory usage for the memory controller (for the purpose of - statistics reporting and memory protetion). This is a new + statistics reporting and memory protection). This is a new behavior that could regress existing setups, so it must be explicitly opted in with this mount option. -- cgit v1.2.3 From 3e256d4c40742e98132c0ef830b8cad4d50502d0 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Mon, 20 Apr 2026 18:48:07 +0530 Subject: riscv: dts: starfive: jh7110: Drop CAMSS node The starfive-camss driver and bindings were dropped, as they were no longer being worked upon for destaging. Drop the relevant node as well to avoid the following build warning: "failed to match any schema with compatible: ['starfive,jh7110-camss']" Reported-by: Conor Dooley Closes: https://lore.kernel.org/all/20260420-very-cartel-645595ffd1c7@spud/ Signed-off-by: Jai Luthra Reviewed-by: Changhuang Liang Reviewed-by: Laurent Pinchart Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7110-common.dtsi | 27 +----------------------- arch/riscv/boot/dts/starfive/jh7110.dtsi | 28 ------------------------- 2 files changed, 1 insertion(+), 54 deletions(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index 8cfe8033305d..a7a1c09a2c90 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -135,29 +135,6 @@ clock-frequency = <49152000>; }; -&camss { - assigned-clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>, - <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>; - assigned-clock-rates = <49500000>, <198000000>; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - }; - - port@1 { - reg = <1>; - - camss_from_csi2rx: endpoint { - remote-endpoint = <&csi2rx_to_camss>; - }; - }; - }; -}; - &csi2rx { assigned-clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>; assigned-clock-rates = <297000000>; @@ -175,9 +152,7 @@ port@1 { reg = <1>; - csi2rx_to_camss: endpoint { - remote-endpoint = <&camss_from_csi2rx>; - }; + /* remote CAMSS endpoint */ }; }; }; diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi index 6e56e9d20bb0..9c3e4598747e 100644 --- a/arch/riscv/boot/dts/starfive/jh7110.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -1199,34 +1199,6 @@ #phy-cells = <0>; }; - camss: isp@19840000 { - compatible = "starfive,jh7110-camss"; - reg = <0x0 0x19840000 0x0 0x10000>, - <0x0 0x19870000 0x0 0x30000>; - reg-names = "syscon", "isp"; - clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>, - <&ispcrg JH7110_ISPCLK_ISPV2_TOP_WRAPPER_C>, - <&ispcrg JH7110_ISPCLK_DVP_INV>, - <&ispcrg JH7110_ISPCLK_VIN_P_AXI_WR>, - <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>, - <&syscrg JH7110_SYSCLK_ISP_TOP_CORE>, - <&syscrg JH7110_SYSCLK_ISP_TOP_AXI>; - clock-names = "apb_func", "wrapper_clk_c", "dvp_inv", - "axiwr", "mipi_rx0_pxl", "ispcore_2x", - "isp_axi"; - resets = <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_P>, - <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_C>, - <&ispcrg JH7110_ISPRST_VIN_P_AXI_RD>, - <&ispcrg JH7110_ISPRST_VIN_P_AXI_WR>, - <&syscrg JH7110_SYSRST_ISP_TOP>, - <&syscrg JH7110_SYSRST_ISP_TOP_AXI>; - reset-names = "wrapper_p", "wrapper_c", "axird", - "axiwr", "isp_top_n", "isp_top_axi"; - power-domains = <&pwrc JH7110_PD_ISP>; - interrupts = <92>, <87>, <90>, <88>; - status = "disabled"; - }; - voutcrg: clock-controller@295c0000 { compatible = "starfive,jh7110-voutcrg"; reg = <0x0 0x295c0000 0x0 0x10000>; -- cgit v1.2.3 From 0df8aa2b9aec5cd21e8c71d9cc1227e57bea43b3 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 20 Apr 2026 12:14:31 +0100 Subject: riscv: dts: microchip: fix icicle i2c pinctrl configuration Unfortunately, an erratum with engineering sample that I was not aware of was exposed by adding pinctrl configuration to the icicle kit. When routed to MSS IOs, i2c signals are never anything other than tied low. Being an FPGA, a Libero workaround for this problem was created, that involves routing i2c signals to the FPGA fabric when the MSS IO option is selected in the configurator and then back to the intended pin using the debug "fabric test" capability. This is invisible to user facing information in the tooling and not mentioned in reference designs documentation. It manifests solely in the .xml output from the MSS configuration that the HSS firmware uses to configure the device, which Linux now overwrites using the pinctrl information. As a result, I never noticed this. My original submission had the engineering sample configuration, but I modified it on application after I was told it didn't work, not realising that the report came from a colleague with a production device, where the erratum was fixed and the workaround not automatically implemented by Libero when creating a design. Move this part of the pinctrl configuration out of the shared portion of the icicle device trees, into the portions that are specific to engineering sample and production devices so that the different settings for i2c pins can be dealt with. Although the reference design only has this workaround in place for i2c1, as i2c0 is genuinely fabric routed, move it too since the erratum affects both controllers. Link: https://ww1.microchip.com/downloads/aemDocuments/documents/FPGA/ProductDocuments/Errata/polarfiresoc/microsemi_polarfire_soc_fpga_egineering_samples_errata_er0219_v1.pdf [3.3] Fixes: 123f4276b521a ("riscv: dts: microchip: add pinctrl nodes for mpfs/icicle kit") Signed-off-by: Conor Dooley --- .../boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi | 10 ---------- .../riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts | 10 ++++++++++ arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi index 2d14e92f068d..9078e5b1e49c 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-fabric.dtsi @@ -101,16 +101,6 @@ status = "okay"; }; -&i2c0 { - pinctrl-names = "default"; - pinctrl-0 = <&i2c0_fabric>; -}; - -&i2c1 { - pinctrl-names = "default"; - pinctrl-0 = <&i2c1_mssio>; -}; - &mmuart1 { pinctrl-names = "default"; pinctrl-0 = <&uart1_fabric>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts index 8afedece89d1..636493f6584d 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit-prod.dts @@ -14,6 +14,16 @@ "microchip,mpfs"; }; +&i2c0 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_fabric>; +}; + +&i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_mssio>; +}; + &syscontroller { microchip,bitstream-flash = <&sys_ctrl_flash>; }; diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 556aa9638282..6fadce815c9a 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -11,3 +11,22 @@ "microchip,mpfs-icicle-kit", "microchip,mpfs"; }; + +&i2c0 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c0_fabric>; +}; + +/* + * Due to silicon errata, routing via MSS IOs doesn't work on ES devices. + * Instead, i2c1, appearing on B1/C1, which are normally MSS IOs, is routed + * via the fabric and back to B1/C1 via "fabric-test" functionality. + * This is done silently by Libero, so the iomux0 setting for i2c1 has to + * be fabric IO, despite tooling etc saying that MSS IOs are used. + * + * See Section 3.3 of https://ww1.microchip.com/downloads/aemDocuments/documents/FPGA/ProductDocuments/Errata/polarfiresoc/microsemi_polarfire_soc_fpga_egineering_samples_errata_er0219_v1.pdf + */ +&i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_fabric>; +}; -- cgit v1.2.3 From 1f6008538384453eb4c13a3d7ff9e37ee8aee6b9 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 21 Apr 2026 08:02:15 -0700 Subject: ACPICA: Provide #defines for EINJV2 error types EINJV2 defined new error types by moving the severity (correctable, uncorrectable non-fatal, uncorrectable fatal) out of the "type". ACPI 6.5 introduced EINJV2 and defined a vendor defined error type using bit 31. This was dropped in ACPI 6.6. Link: https://github.com/acpica/acpica/commit/e82d2d2fd145 Signed-off-by: Tony Luck Link: https://patch.msgid.link/20260421150216.11666-2-tony.luck@intel.com Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4e15583e0d25..f72e00517eb3 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1386,6 +1386,12 @@ enum acpi_einj_command_status { #define ACPI_EINJ_CXL_MEM_FATAL (1<<17) #define ACPI_EINJ_VENDOR_DEFINED (1<<31) +/* EINJV2 error types from EINJV2_GET_ERROR_TYPE (ACPI 6.6) */ + +#define ACPI_EINJV2_PROCESSOR (1) +#define ACPI_EINJV2_MEMORY (1<<1) +#define ACPI_EINJV2_PCIE (1<<2) + /******************************************************************************* * * ERST - Error Record Serialization Table (ACPI 4.0) -- cgit v1.2.3 From 0c00cfbcfcffa7085e4f0c7fd7a4caada4e7a90f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 21 Apr 2026 08:02:16 -0700 Subject: ACPI: APEI: EINJ: Fix EINJV2 memory error injection Error types in EINJV2 use different bit positions for each flavor of injection from legacy EINJ. Two issues: 1) The address sanity checks in einj_error_inject() were skipped for EINJV2 injections. Noted by sashiko[1] 2) __einj_error_trigger() failed to drop the entry of the target physical address from the list of resources that need to be requested. Add a helper function that checks if an injection is to memory and use it to solve each of these issues. Note that the old test in __einj_error_trigger() checked that param2 was not zero. This isn't needed because the sanity checks in einj_error_inject() reject memory injections with param2 == 0. Fixes: b47610296d17 ("ACPI: APEI: EINJ: Enable EINJv2 error injections") Reported-by: sashiko Reported-by: Herman Li Signed-off-by: Tony Luck Tested-by: "Lai, Yi1" Link: https://sashiko.dev/#/patchset/20260415163620.12957-1-tony.luck%40intel.com # [1] Reviewed-by: Jiaqi Yan Reviewed-by: Zaid Alali Link: https://patch.msgid.link/20260421150216.11666-3-tony.luck@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 45 ++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index a9248af078f6..1f3fa2278584 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -401,8 +401,18 @@ static struct acpi_generic_address *einj_get_trigger_parameter_region( return NULL; } + +static bool is_memory_injection(u32 type, u32 flags) +{ + if (flags & SETWA_FLAGS_EINJV2) + return !!(type & ACPI_EINJV2_MEMORY); + if (type & ACPI5_VENDOR_BIT) + return !!(vendor_flags & SETWA_FLAGS_MEM); + return !!(type & MEM_ERROR_MASK) || !!(flags & SETWA_FLAGS_MEM); +} + /* Execute instructions in trigger error action table */ -static int __einj_error_trigger(u64 trigger_paddr, u32 type, +static int __einj_error_trigger(u64 trigger_paddr, u32 type, u32 flags, u64 param1, u64 param2) { struct acpi_einj_trigger trigger_tab; @@ -480,7 +490,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type, * This will cause resource conflict with regular memory. So * remove it from trigger table resources. */ - if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) { + if ((param_extension || acpi5) && is_memory_injection(type, flags)) { struct apei_resources addr_resources; apei_resources_init(&addr_resources); @@ -660,7 +670,7 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, return rc; trigger_paddr = apei_exec_ctx_get_output(&ctx); if (notrigger == 0) { - rc = __einj_error_trigger(trigger_paddr, type, param1, param2); + rc = __einj_error_trigger(trigger_paddr, type, flags, param1, param2); if (rc) return rc; } @@ -718,35 +728,30 @@ int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, SETWA_FLAGS_PCIE_SBDF | SETWA_FLAGS_EINJV2))) return -EINVAL; + /* + * Injections targeting a CXL 1.0/1.1 port have to be injected + * via the einj_cxl_rch_error_inject() path as that does the proper + * validation of the given RCRB base (MMIO) address. + */ + if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)) + return -EINVAL; + /* check if type is a valid EINJv2 error type */ if (is_v2) { if (!(type & available_error_type_v2)) return -EINVAL; } - /* - * We need extra sanity checks for memory errors. - * Other types leap directly to injection. - */ /* ensure param1/param2 existed */ if (!(param_extension || acpi5)) goto inject; - /* ensure injection is memory related */ - if (type & ACPI5_VENDOR_BIT) { - if (vendor_flags != SETWA_FLAGS_MEM) - goto inject; - } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) { - goto inject; - } - /* - * Injections targeting a CXL 1.0/1.1 port have to be injected - * via the einj_cxl_rch_error_inject() path as that does the proper - * validation of the given RCRB base (MMIO) address. + * We need extra sanity checks for memory errors. + * Other types leap directly to injection. */ - if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)) - return -EINVAL; + if (!is_memory_injection(type, flags)) + goto inject; /* * Disallow crazy address masks that give BIOS leeway to pick -- cgit v1.2.3 From 75141a770f4f8225d316f6c7e146723a32e9720e Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 17 Apr 2026 12:01:12 +0800 Subject: ACPI: CPPC: Fix related_cpus inconsistency during CPU hotplug When concurrently bringing up and down two SMT threads of a physical core, many warning call traces occur as below: The issue timeline is as follows: 1. When the system starts, cpufreq: CPU: 220, policy->related_cpus: 220-221, policy->cpus: 220-221 2. Offline CPU 220 and CPU 221. 3. Online CPU 220 - CPU 221 is now offline, as acpi_get_psd_map() use for_each_online_cpu(), so the cpu_data->shared_cpu_map, policy->cpus, and related_cpus has only CPU 220. cpufreq: CPU: 220, policy->related_cpus: 220, policy->cpus: 220 4. Offline CPU 220 5. Online CPU 221, the below call trace occurs: - Since CPU 220 and CPU 221 share one policy, and policy->related_cpus = 220 after step 3, so CPU 221 is not in policy->related_cpus but per_cpu(cpufreq_cpu_data, cpu221) is not NULL. After reverting commit 56eb0c0ed345 ("ACPI: CPPC: Fix remaining for_each_possible_cpu() to use online CPUs"), the issue disappeared. The _PSD (P-State Dependency) defines the hardware-level dependency of frequency control across CPU cores. Since this relationship is a physical attribute of the hardware topology, it remains constant regardless of the online or offline status of the CPUs. Using for_each_online_cpu() in acpi_get_psd_map() is problematic. If a CPU is offline, it will be excluded from the shared_cpu_map. Consequently, if that CPU is brought online later, the kernel will fail to recognize it as part of any shared frequency domain. Switch back to for_each_possible_cpu() to ensure that all cores defined in the ACPI tables are correctly mapped into their respective performance domains from the start. This aligns with the logic of policy->related_cpus, which must encompass all potentially available cores in the domain to prevent logic gaps during CPU hotplug operations. To resolve the original issue regarding the "nosmt" or "nosmt=force" boot parameter, as send_pcc_cmd() function already does if (!desc) continue, so reverting that loop back to for_each_possible_cpu() is ok, only need to change the match_cpc_ptr NULL case in acpi_get_psd_map() to continue as Sean suggested. How to reproduce, on arm64 machine with SMT support which use acpi cppc cpufreq driver: bash test.sh 220 & bash test.sh 221 & The test.sh is as below: while true do echo 0 > /sys/devices/system/cpu/cpu${1}/online sleep 0.5 cat /sys/devices/system/cpu/cpu${1}/cpufreq/related_cpus echo 1 > /sys/devices/system/cpu/cpu${1}/online cat /sys/devices/system/cpu/cpu${1}/cpufreq/related_cpus done CPU: 221 PID: 1119 Comm: cpuhp/221 Kdump: loaded Not tainted 6.6.0debug+ #5 Hardware name: To be filled by O.E.M. S920X20/BC83AMDA01-7270Z, BIOS 20.39 09/04/2024 pstate: a1400009 (NzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : cpufreq_online+0x8ac/0xa90 lr : cpuhp_cpufreq_online+0x18/0x30 sp : ffff80008739bce0 x29: ffff80008739bce0 x28: 0000000000000000 x27: ffff28400ca32200 x26: 0000000000000000 x25: 0000000000000003 x24: ffffd483503ff000 x23: ffffd483504051a0 x22: ffffd48350024a00 x21: 00000000000000dd x20: 000000000000001d x19: ffff28400ca32000 x18: 0000000000000000 x17: 0000000000000020 x16: ffffd4834e6a3fc8 x15: 0000000000000020 x14: 0000000000000008 x13: 0000000000000001 x12: 00000000ffffffff x11: 0000000000000040 x10: ffffd48350430728 x9 : ffffd4834f087c78 x8 : 0000000000000001 x7 : ffff2840092bdf00 x6 : ffffd483504264f0 x5 : ffffd48350405000 x4 : ffff283f7f95cc60 x3 : 0000000000000000 x2 : ffff53bc2f94b000 x1 : 00000000000000dd x0 : 0000000000000000 Call trace: cpufreq_online+0x8ac/0xa90 cpuhp_cpufreq_online+0x18/0x30 cpuhp_invoke_callback+0x128/0x580 cpuhp_thread_fun+0x110/0x1b0 smpboot_thread_fn+0x140/0x190 kthread+0xec/0x100 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- Cc: All applicable Fixes: 56eb0c0ed345 ("ACPI: CPPC: Fix remaining for_each_possible_cpu() to use online CPUs") Co-developed-by: Sean Kelley Signed-off-by: Sean Kelley Signed-off-by: Jinjie Ruan [ rjw: Changelog edits ] Link: https://patch.msgid.link/20260417040112.3727756-1-ruanjinjie@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 2e91c5a97761..f370be8715ae 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -362,7 +362,7 @@ static int send_pcc_cmd(int pcc_ss_id, u16 cmd) end: if (cmd == CMD_WRITE) { if (unlikely(ret)) { - for_each_online_cpu(i) { + for_each_possible_cpu(i) { struct cpc_desc *desc = per_cpu(cpc_desc_ptr, i); if (!desc) @@ -524,13 +524,13 @@ int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data) else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ANY; - for_each_online_cpu(i) { + for_each_possible_cpu(i) { if (i == cpu) continue; match_cpc_ptr = per_cpu(cpc_desc_ptr, i); if (!match_cpc_ptr) - goto err_fault; + continue; match_pdomain = &(match_cpc_ptr->domain_info); if (match_pdomain->domain != pdomain->domain) -- cgit v1.2.3 From 88b2670ea6505c6cfd1478ba34b041c60c4281dc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Apr 2026 17:24:08 +0200 Subject: ACPI: TAD: Use __ATTRIBUTE_GROUPS() macro Recent commit 93afe8ba9b01 ("ACPI: TAD: Use dev_groups in struct device_driver") switched over the ACPI TAD driver to using device attribute groups instead of creating and removing the device sysfs attributes directly, but it might go one step farther and use the __ATTRIBUTE_GROUPS() macro which would reduce the code size slightly. Do it now. Signed-off-by: Rafael J. Wysocki [ rjw: Fixed typo in the changelog ] Link: https://patch.msgid.link/1961102.tdWV9SEqCh@rafael.j.wysocki Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_tad.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c index b406d7a98996..91bdbf669aaf 100644 --- a/drivers/acpi/acpi_tad.c +++ b/drivers/acpi/acpi_tad.c @@ -605,15 +605,12 @@ static umode_t acpi_tad_attr_is_visible(struct kobject *kobj, return 0; } -static const struct attribute_group acpi_tad_attr_group = { +static const struct attribute_group acpi_tad_group = { .attrs = acpi_tad_attrs, .is_visible = acpi_tad_attr_is_visible, }; -static const struct attribute_group *acpi_tad_attr_groups[] = { - &acpi_tad_attr_group, - NULL, -}; +__ATTRIBUTE_GROUPS(acpi_tad); #ifdef CONFIG_RTC_CLASS /* RTC class device interface */ @@ -885,7 +882,7 @@ static struct platform_driver acpi_tad_driver = { .driver = { .name = "acpi-tad", .acpi_match_table = acpi_tad_ids, - .dev_groups = acpi_tad_attr_groups, + .dev_groups = acpi_tad_groups, }, .probe = acpi_tad_probe, .remove = acpi_tad_remove, -- cgit v1.2.3 From 77dd14ab1b575328745644136ba758d394e10fbc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Apr 2026 17:25:46 +0200 Subject: ACPI: TAD: Use devres for all driver cleanup The code in acpi_tad_remove() needs to run after the unregistration of the devres-managed RTC class device so that it doesn't race with the class callbacks of the latter. To make that happen, pass it to devm_add_action_or_reset() before registering the RTC class device. Fixes: 7572dcabe38d ("ACPI: TAD: Add alarm support to the RTC class device interface") Fixes: 8a1e7f4b1764 ("ACPI: TAD: Add RTC class device interface") Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/14001754.uLZWGnKmhe@rafael.j.wysocki --- drivers/acpi/acpi_tad.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c index 91bdbf669aaf..2aaef50a2d0f 100644 --- a/drivers/acpi/acpi_tad.c +++ b/drivers/acpi/acpi_tad.c @@ -792,9 +792,9 @@ static int acpi_tad_disable_timer(struct device *dev, u32 timer_id) return acpi_tad_wake_set(dev, "_STV", timer_id, ACPI_TAD_WAKE_DISABLED); } -static void acpi_tad_remove(struct platform_device *pdev) +static void acpi_tad_remove(void *data) { - struct device *dev = &pdev->dev; + struct device *dev = data; struct acpi_tad_driver_data *dd = dev_get_drvdata(dev); device_init_wakeup(dev, false); @@ -821,6 +821,7 @@ static int acpi_tad_probe(struct platform_device *pdev) struct acpi_tad_driver_data *dd; acpi_status status; unsigned long long caps; + int ret; /* * Initialization failure messages are mostly about firmware issues, so @@ -867,6 +868,14 @@ static int acpi_tad_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_suspend(dev); + /* + * acpi_tad_remove() needs to run after unregistering the RTC class + * device to avoid racing with the latter's callbacks. + */ + ret = devm_add_action_or_reset(&pdev->dev, acpi_tad_remove, &pdev->dev); + if (ret) + return ret; + if (caps & ACPI_TAD_RT) acpi_tad_register_rtc(dev, caps); @@ -885,7 +894,6 @@ static struct platform_driver acpi_tad_driver = { .dev_groups = acpi_tad_groups, }, .probe = acpi_tad_probe, - .remove = acpi_tad_remove, }; MODULE_DEVICE_TABLE(acpi, acpi_tad_ids); -- cgit v1.2.3 From e0d219010477fb19d23b60970b2c03fe5985717c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Apr 2026 17:26:49 +0200 Subject: ACPI: TAD: RTC: Refine timer value computations and checks Since rtc_tm_to_ktime() may overflow for large RTC time values and full second granularity is sufficient in timer value computations in acpi_tad_rtc_set_alarm() and acpi_tad_rtc_read_alarm(), use rtc_tm_to_time64() instead of that function, which also allows the computations to be simplified. Moreover, U32_MAX is a special "timer disabled" value, so make acpi_tad_rtc_set_alarm() reject it when attempting to program the alarm timers. Fixes: 7572dcabe38d ("ACPI: TAD: Add alarm support to the RTC class device interface") Signed-off-by: Rafael J. Wysocki Reviewed-by: Alexandre Belloni Link: https://patch.msgid.link/3414608.aeNJFYEL58@rafael.j.wysocki --- drivers/acpi/acpi_tad.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c index 2aaef50a2d0f..bdf8695c00f3 100644 --- a/drivers/acpi/acpi_tad.c +++ b/drivers/acpi/acpi_tad.c @@ -680,9 +680,8 @@ static int acpi_tad_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t) acpi_tad_rt_to_tm(&rt, &tm_now); - value = ktime_divns(ktime_sub(rtc_tm_to_ktime(t->time), - rtc_tm_to_ktime(tm_now)), NSEC_PER_SEC); - if (value <= 0 || value > U32_MAX) + value = rtc_tm_to_time64(&t->time) - rtc_tm_to_time64(&tm_now); + if (value <= 0 || value >= U32_MAX) return -EINVAL; } @@ -745,8 +744,7 @@ static int acpi_tad_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t) if (retval != ACPI_TAD_WAKE_DISABLED) { t->enabled = 1; - t->time = rtc_ktime_to_tm(ktime_add_ns(rtc_tm_to_ktime(tm_now), - (u64)retval * NSEC_PER_SEC)); + rtc_time64_to_tm(rtc_tm_to_time64(&tm_now) + retval, &t->time); } else { t->enabled = 0; t->time = tm_now; -- cgit v1.2.3 From ad034486ada5860081565e2d7a2e41aa91c302c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Apr 2026 17:27:32 +0200 Subject: ACPI: TAD: Fix up a comment in acpi_tad_probe() Fix grammar in the comment preceding the pm_runtime_set_active() call in acpi_tad_probe(). Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/8678306.T7Z3S40VBb@rafael.j.wysocki --- drivers/acpi/acpi_tad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c index bdf8695c00f3..cac07e997028 100644 --- a/drivers/acpi/acpi_tad.c +++ b/drivers/acpi/acpi_tad.c @@ -859,8 +859,8 @@ static int acpi_tad_probe(struct platform_device *pdev) } /* - * The platform bus type layer tells the ACPI PM domain powers up the - * device, so set the runtime PM status of it to "active". + * The platform bus type probe callback tells the ACPI PM domain to + * power up the device, so set the runtime PM status of it to "active". */ pm_runtime_set_active(dev); pm_runtime_enable(dev); -- cgit v1.2.3 From 4b506ea5351a1f5937ac632a4a5c35f6f796cc41 Mon Sep 17 00:00:00 2001 From: Shivam Kalra Date: Sun, 26 Apr 2026 19:38:41 +0530 Subject: ACPI: video: force native backlight on HP OMEN 16 (8A44) The HP OMEN 16 Gaming Laptop (board name 8A44) has a mux-less hybrid GPU configuration with AMD Rembrandt (Radeon 680M) and NVIDIA GA104 (RTX 3070 Ti). The internal eDP panel is wired to the AMD iGPU. When Nouveau loads without GSP firmware, the ACPI video backlight device (acpi_video0) gets registered alongside the native AMD backlight (amdgpu_bl2). In this state, writes to amdgpu_bl2 update the software brightness value but fail to change the physical panel brightness. Force native backlight to prevent acpi_video0 from registering. Confirmed that booting with acpi_backlight=native resolves the issue. Cc: All applicable Signed-off-by: Shivam Kalra Link: https://patch.msgid.link/20260426-omen-16-backlight-fix-v1-1-62364f268ea6@zohomail.in Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 0a3c8232d15d..458efa4fe9d4 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -916,6 +916,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82K8"), }, }, + { + .callback = video_detect_force_native, + /* HP OMEN Gaming Laptop 16-n0xxx */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "OMEN by HP Gaming Laptop 16-n0xxx"), + }, + }, /* * x86 android tablets which directly control the backlight through -- cgit v1.2.3 From ea216d3ae7305ad2c8256524e65b7219492d8685 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 Apr 2026 13:22:38 +0200 Subject: ACPI: bus: add missing forward declaration to acpi_bus.h The header references struct notifier_block but neither includes linux/notifier.h nor contains the relevant forward declaration. Add the latter for correctness. Signed-off-by: Bartosz Golaszewski [ rjw: Subject tweak ] Link: https://patch.msgid.link/20260427112238.132419-1-bartosz.golaszewski@oss.qualcomm.com Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b701b5f972cb..c41d9a7565cf 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -17,6 +17,8 @@ #include #include +struct notifier_block; + struct acpi_handle_list { u32 count; acpi_handle *handles; -- cgit v1.2.3 From 0898a817621a2f0cddca8122d9b974003fe5036d Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Mon, 27 Apr 2026 22:01:39 +0100 Subject: cdrom, scsi: sr: propagate read-only status to block layer via set_disk_ro() The cdrom core never calls set_disk_ro() for a registered device, so BLKROGET on a CD-ROM device always returns 0 (writable), even when the drive has no write capabilities and writes will inevitably fail. This causes problems for userspace that relies on BLKROGET to determine whether a block device is read-only. For example, systemd's loop device setup uses BLKROGET to decide whether to create a loop device with LO_FLAGS_READ_ONLY. Without the read-only flag, writes pass through the loop device to the CD-ROM and fail with I/O errors. systemd-fsck similarly checks BLKROGET to decide whether to run fsck in no-repair mode (-n). The write-capability bits in cdi->mask come from two different sources: CDC_DVD_RAM and CDC_CD_RW are populated by the driver from the MODE SENSE capabilities page (page 0x2A) before register_cdrom() is called, while CDC_MRW_W and CDC_RAM require the MMC GET CONFIGURATION command and were only probed by cdrom_open_write() at device open time. This meant that any attempt to compute the writable state from the full mask at probe time was incorrect, because the GET CONFIGURATION bits were still unset (and cdi->mask is initialized such that capabilities are assumed present). Fix this by factoring the GET CONFIGURATION probing out of cdrom_open_write() into a new exported helper, cdrom_probe_write_features(), and having sr call it from sr_probe() right after get_capabilities() has populated the MODE SENSE bits. register_cdrom() then calls set_disk_ro() based on the full write-capability mask (CDC_DVD_RAM | CDC_MRW_W | CDC_RAM | CDC_CD_RW) so the block layer reflects the drive's actual write support. The feature queries used (CDF_MRW and CDF_RWRT via GET CONFIGURATION with RT=00) report drive-level capabilities that are persistent across media, so a single probe before register_cdrom() is sufficient and the redundant probe at open time is dropped. With set_disk_ro() now accurate, the long-vestigial cd->writeable flag in sr can go: get_capabilities() used to set cd->writeable based on the same four mask bits, but because CDC_MRW_W and CDC_RAM default to "capability present" in cdi->mask and aren't touched by MODE SENSE, the condition that gated cd->writeable was always true, making it unconditionally 1. Replace the corresponding gate in sr_init_command() with get_disk_ro(cd->disk), which turns a previously no-op check into a real one and also catches kernel-internal bio writers that bypass blkdev_write_iter()'s bdev_read_only() check. The sd driver (SCSI disks) does not have this problem because it checks the MODE SENSE Write Protect bit and calls set_disk_ro() accordingly. The sr driver cannot use the same approach because the MMC specification does not define the WP bit in the MODE SENSE device-specific parameter byte for CD-ROM devices. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daan De Meyer Reviewed-by: Phillip Potter Reviewed-by: Martin K. Petersen Signed-off-by: Phillip Potter Link: https://patch.msgid.link/20260427210139.1400-2-phil@philpotter.co.uk Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 73 +++++++++++++++++++++++++++++++++------------------ drivers/scsi/sr.c | 11 ++------ drivers/scsi/sr.h | 1 - include/linux/cdrom.h | 1 + 4 files changed, 51 insertions(+), 35 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index fc049612d6dc..62934cf4b10d 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -631,6 +631,16 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi) WARN_ON(!cdo->generic_packet); + /* + * Propagate the drive's write support to the block layer so BLKROGET + * reflects actual write capability. Drivers that use GET CONFIGURATION + * features (CDC_MRW_W, CDC_RAM) must have called + * cdrom_probe_write_features() before register_cdrom() so the mask is + * complete here. + */ + set_disk_ro(disk, !CDROM_CAN(CDC_DVD_RAM | CDC_MRW_W | CDC_RAM | + CDC_CD_RW)); + cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); mutex_lock(&cdrom_mutex); list_add(&cdi->list, &cdrom_list); @@ -742,6 +752,44 @@ static int cdrom_is_random_writable(struct cdrom_device_info *cdi, int *write) return 0; } +/* + * Probe write-related MMC features via GET CONFIGURATION and update + * cdi->mask accordingly. Drivers that populate cdi->mask from the MODE SENSE + * capabilities page (e.g. sr) should call this after those MODE SENSE bits + * have been set but before register_cdrom(), so that the full set of + * write-capability bits is known by the time register_cdrom() decides on the + * initial read-only state of the disk. + */ +void cdrom_probe_write_features(struct cdrom_device_info *cdi) +{ + int mrw, mrw_write, ram_write; + + mrw = 0; + if (!cdrom_is_mrw(cdi, &mrw_write)) + mrw = 1; + + if (CDROM_CAN(CDC_MO_DRIVE)) + ram_write = 1; + else + (void) cdrom_is_random_writable(cdi, &ram_write); + + if (mrw) + cdi->mask &= ~CDC_MRW; + else + cdi->mask |= CDC_MRW; + + if (mrw_write) + cdi->mask &= ~CDC_MRW_W; + else + cdi->mask |= CDC_MRW_W; + + if (ram_write) + cdi->mask &= ~CDC_RAM; + else + cdi->mask |= CDC_RAM; +} +EXPORT_SYMBOL(cdrom_probe_write_features); + static int cdrom_media_erasable(struct cdrom_device_info *cdi) { disc_information di; @@ -894,33 +942,8 @@ static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi) */ static int cdrom_open_write(struct cdrom_device_info *cdi) { - int mrw, mrw_write, ram_write; int ret = 1; - mrw = 0; - if (!cdrom_is_mrw(cdi, &mrw_write)) - mrw = 1; - - if (CDROM_CAN(CDC_MO_DRIVE)) - ram_write = 1; - else - (void) cdrom_is_random_writable(cdi, &ram_write); - - if (mrw) - cdi->mask &= ~CDC_MRW; - else - cdi->mask |= CDC_MRW; - - if (mrw_write) - cdi->mask &= ~CDC_MRW_W; - else - cdi->mask |= CDC_MRW_W; - - if (ram_write) - cdi->mask &= ~CDC_RAM; - else - cdi->mask |= CDC_RAM; - if (CDROM_CAN(CDC_MRW_W)) ret = cdrom_mrw_open_write(cdi); else if (CDROM_CAN(CDC_DVD_RAM)) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 7adb2573f50d..c36c54ecd354 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -395,7 +395,7 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) switch (req_op(rq)) { case REQ_OP_WRITE: - if (!cd->writeable) + if (get_disk_ro(cd->disk)) goto out; SCpnt->cmnd[0] = WRITE_10; cd->cdi.media_written = 1; @@ -681,6 +681,7 @@ static int sr_probe(struct scsi_device *sdev) error = -ENOMEM; if (get_capabilities(cd)) goto fail_minor; + cdrom_probe_write_features(&cd->cdi); sr_vendor_init(cd); set_capacity(disk, cd->capacity); @@ -899,14 +900,6 @@ static int get_capabilities(struct scsi_cd *cd) /*else I don't think it can close its tray cd->cdi.mask |= CDC_CLOSE_TRAY; */ - /* - * if DVD-RAM, MRW-W or CD-RW, we are randomly writable - */ - if ((cd->cdi.mask & (CDC_DVD_RAM | CDC_MRW_W | CDC_RAM | CDC_CD_RW)) != - (CDC_DVD_RAM | CDC_MRW_W | CDC_RAM | CDC_CD_RW)) { - cd->writeable = 1; - } - kfree(buffer); return 0; } diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h index dc899277b3a4..2d92f9cb6fec 100644 --- a/drivers/scsi/sr.h +++ b/drivers/scsi/sr.h @@ -35,7 +35,6 @@ typedef struct scsi_cd { struct scsi_device *device; unsigned int vendor; /* vendor code, see sr_vendor.c */ unsigned long ms_offset; /* for reading multisession-CD's */ - unsigned writeable : 1; unsigned use:1; /* is this device still supportable */ unsigned xa_flag:1; /* CD has XA sectors ? */ unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index b907e6c2307d..260d7968cf72 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -108,6 +108,7 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, unsigned int clearing); +extern void cdrom_probe_write_features(struct cdrom_device_info *cdi); extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi); extern void unregister_cdrom(struct cdrom_device_info *cdi); -- cgit v1.2.3 From cfcbfe5cb11650d53f7cafd7adfd556690b77114 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 21 Apr 2026 08:06:44 -0700 Subject: PCI: Don't fallback to bus reset after failed slot reset If a bus has hotplug slots that implement the slot's reset_slot callback, it is not safe to do the non-slot specific bus reset, so don't fallback to it. If a slot reset does fail, the subsequent bus reset will attempt a 2nd link reset on top of previous and fail to handle the hotplug events. Fixes: 8238cb69c01fe ("PCI: Make reset_subordinate hotplug safe") Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260421150644.3543733-1-kbusch@meta.com --- drivers/pci/pci.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 8f7cfcc00090..d34266651ad0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5607,13 +5607,14 @@ static int pci_try_reset_bus(struct pci_bus *bus) * reset for affected devices * * This function will first try to reset the slots on this bus if the method is - * available. If slot reset fails or is not available, this will fall back to a + * available. If slot reset is not available, this will fall back to a * secondary bus reset. */ static int pci_reset_bridge(struct pci_dev *bridge, bool restore) { struct pci_bus *bus = bridge->subordinate; struct pci_slot *slot; + int ret = 0; if (!bus) return -ENOTTY; @@ -5627,19 +5628,17 @@ static int pci_reset_bridge(struct pci_dev *bridge, bool restore) goto bus_reset; list_for_each_entry(slot, &bus->slots, list) { - int ret; - if (restore) ret = pci_try_reset_slot(slot); else ret = pci_slot_reset(slot, PCI_RESET_DO_RESET); if (ret) - goto bus_reset; + break; } mutex_unlock(&pci_slot_mutex); - return 0; + return ret; bus_reset: mutex_unlock(&pci_slot_mutex); -- cgit v1.2.3 From 032e70aff025d7c519af9ab791cd084380619263 Mon Sep 17 00:00:00 2001 From: Zongyao Chen Date: Fri, 24 Apr 2026 15:37:53 +0800 Subject: selinux: use sk blob accessor in socket permission helpers SELinux socket state lives in the composite LSM socket blob. sock_has_perm() and nlmsg_sock_has_extended_perms() currently dereference sk->sk_security directly, which assumes the SELinux socket blob is at offset zero. In stacked configurations that assumption does not hold. If another LSM allocates socket blob storage before SELinux, these helpers may read the wrong blob and feed invalid SID and class values into AVC checks. Use selinux_sock() instead of accessing sk->sk_security directly. Fixes: d1d991efaf34 ("selinux: Add netlink xperm support") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Zongyao Chen Signed-off-by: Paul Moore --- security/selinux/hooks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 97801966bf32..49c482e3fa3f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4920,7 +4920,7 @@ static bool sock_skip_has_perm(u32 sid) static int sock_has_perm(struct sock *sk, u32 perms) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); struct common_audit_data ad; struct lsm_network_audit net; @@ -6227,7 +6227,7 @@ static unsigned int selinux_ip_postroute(void *priv, static int nlmsg_sock_has_extended_perms(struct sock *sk, u32 perms, u16 nlmsg_type) { - struct sk_security_struct *sksec = sk->sk_security; + struct sk_security_struct *sksec = selinux_sock(sk); struct common_audit_data ad; u8 driver; u8 xperm; -- cgit v1.2.3 From 3618442d54f366eeee8f6c83a47861ca22918dfe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 23 Apr 2026 15:08:57 -0700 Subject: MAINTAINERS: add pcnet_cs to PCMCIA Per discussion under the Link make sure Dominik can help with the patches to drivers/net/ethernet/8390/pcnet_cs.c cc: linux@dominikbrodowski.net Link: https://lore.kernel.org/aeomUh5JqFvkLTH7@scops.dominikbrodowski.net Acked-by: Dominik Brodowski Link: https://patch.msgid.link/20260423220857.3490118-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..21288a3a7d93 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20774,6 +20774,7 @@ M: Dominik Brodowski S: Odd Fixes T: git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git F: Documentation/pcmcia/ +F: drivers/net/ethernet/8390/pcnet_cs.c F: drivers/pcmcia/ F: include/pcmcia/ F: tools/pcmcia/ -- cgit v1.2.3 From 1e5a8eed7821e7a43a31b4c1b3675a91be6bc6f6 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Sun, 26 Apr 2026 19:23:49 -0400 Subject: selinux: don't reserve xattr slot when we won't fill it Move lsm_get_xattr_slot() below the SBLABEL_MNT check so we don't leave a NULL-named slot in the array when returning -EOPNOTSUPP; filesystem initxattrs() callbacks stop iterating at the first NULL ->name, silently dropping xattrs installed by later LSMs. Cc: stable@vger.kernel.org Signed-off-by: David Windsor Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 49c482e3fa3f..59942d39ada7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2966,7 +2966,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, { const struct cred_security_struct *crsec = selinux_cred(current_cred()); struct superblock_security_struct *sbsec; - struct xattr *xattr = lsm_get_xattr_slot(xattrs, xattr_count); + struct xattr *xattr; u32 newsid, clen; u16 newsclass; int rc; @@ -2992,6 +2992,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, !(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; + xattr = lsm_get_xattr_slot(xattrs, xattr_count); if (xattr) { rc = security_sid_to_context_force(newsid, &context, &clen); -- cgit v1.2.3 From f5c6a272b699b9a0698535e1a56e683207e50030 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 28 Apr 2026 00:33:04 +0800 Subject: spi: axiado: replace usleep_range() with udelay() in IRQ path ax_spi_fill_tx_fifo() can be called from ax_spi_irq() which is a hard irq handler. Replace usleep_range(10, 10) with udelay(10) in atomic context. Fixes: e75a6b00ad79 ("spi: axiado: Add driver for Axiado SPI DB controller") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260428-axiado-v1-1-cd767500af72@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-axiado.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-axiado.c b/drivers/spi/spi-axiado.c index 9057a0a8df4a..649f149617ce 100644 --- a/drivers/spi/spi-axiado.c +++ b/drivers/spi/spi-axiado.c @@ -201,7 +201,7 @@ static void ax_spi_fill_tx_fifo(struct ax_spi *xspi) * then spi control did't work thoroughly, add one byte delay */ if (ax_spi_read(xspi, AX_SPI_IVR) & AX_SPI_IVR_TFOV) - usleep_range(10, 10); + udelay(10); if (xspi->tx_buf) ax_spi_write_b(xspi, AX_SPI_TXFIFO, *xspi->tx_buf++); else -- cgit v1.2.3 From 35eaa6d8d6c2ee65e96f507add856e0eacf24591 Mon Sep 17 00:00:00 2001 From: "Nikola Z. Ivanov" Date: Sun, 26 Apr 2026 23:14:34 +0300 Subject: netdevsim: zero initialize struct iphdr in dummy sk_buff Syzbot reports a KMSAN uninit-value originating from nsim_dev_trap_skb_build, with the allocation also being performed in the same function. Fix this by calling skb_put_zero instead of skb_put to guarantee zero initialization of the whole IP header. Closes: https://syzkaller.appspot.com/bug?extid=23d7fcd204e3837866ff Fixes: da58f90f11f5 ("netdevsim: Add devlink-trap support") Signed-off-by: Nikola Z. Ivanov Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260426201434.742030-1-zlatistiv@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 1e06e781c835..f00fc2f9ebde 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -829,7 +829,7 @@ static struct sk_buff *nsim_dev_trap_skb_build(void) skb->protocol = htons(ETH_P_IP); skb_set_network_header(skb, skb->len); - iph = skb_put(skb, sizeof(struct iphdr)); + iph = skb_put_zero(skb, sizeof(struct iphdr)); iph->protocol = IPPROTO_UDP; iph->saddr = in_aton("192.0.2.1"); iph->daddr = in_aton("198.51.100.1"); -- cgit v1.2.3 From 732b463449fd0ef90acd13cda68eab1c91adb00c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:39 -0700 Subject: net/sched: netem: fix probability gaps in 4-state loss model The 4-state Markov chain in loss_4state() has gaps at the boundaries between transition probability ranges. The comparisons use: if (rnd < a4) else if (a4 < rnd && rnd < a1 + a4) When rnd equals a boundary value exactly, neither branch matches and no state transition occurs. The redundant lower-bound check (a4 < rnd) is already implied by being in the else branch. Remove the unnecessary lower-bound comparisons so the ranges are contiguous and every random value produces a transition, matching the GI (General and Intuitive) loss model specification. This bug goes back to original implementation of this model. Fixes: 661b79725fea ("netem: revised correlated loss generator") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-2-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 20df1c08b1e9..8ee72cac1faf 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -227,10 +227,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = LOST_IN_GAP_PERIOD; return true; - } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { + } else if (rnd < clg->a1 + clg->a4) { clg->state = LOST_IN_BURST_PERIOD; return true; - } else if (clg->a1 + clg->a4 < rnd) { + } else { clg->state = TX_IN_GAP_PERIOD; } @@ -247,9 +247,9 @@ static bool loss_4state(struct netem_sched_data *q) case LOST_IN_BURST_PERIOD: if (rnd < clg->a3) clg->state = TX_IN_BURST_PERIOD; - else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { + else if (rnd < clg->a2 + clg->a3) { clg->state = TX_IN_GAP_PERIOD; - } else if (clg->a2 + clg->a3 < rnd) { + } else { clg->state = LOST_IN_BURST_PERIOD; return true; } -- cgit v1.2.3 From 4185701fcce6b426b6c3630b25330dddd9c47b0d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:40 -0700 Subject: net/sched: netem: fix queue limit check to include reordered packets The queue limit check in netem_enqueue() uses q->t_len which only counts packets in the internal tfifo. Packets placed in sch->q by the reorder path (__qdisc_enqueue_head) are not counted, allowing the total queue occupancy to exceed sch->limit under reordering. Include sch->q.qlen in the limit check. Fixes: f8d4bc455047 ("net/sched: netem: account for backlog updates from child qdisc") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-3-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 8ee72cac1faf..d400a730eadd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -524,7 +524,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1 << get_random_u32_below(8); } - if (unlikely(q->t_len >= sch->limit)) { + if (unlikely(sch->q.qlen >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); -- cgit v1.2.3 From 986afaf809940577224a99c3a08d97a15eb37e93 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:41 -0700 Subject: net/sched: netem: only reseed PRNG when seed is explicitly provided netem_change() unconditionally reseeds the PRNG on every tc change command. If TCA_NETEM_PRNG_SEED is not specified, a new random seed is generated, destroying reproducibility for users who set a deterministic seed on a previous change. Move the initial random seed generation to netem_init() and only reseed in netem_change() when TCA_NETEM_PRNG_SEED is explicitly provided by the user. Fixes: 4072d97ddc44 ("netem: add prng attribute to netem_sched_data") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-4-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d400a730eadd..556f9747f0e7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1112,11 +1112,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - if (tb[TCA_NETEM_PRNG_SEED]) + if (tb[TCA_NETEM_PRNG_SEED]) { q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); - else - q->prng.seed = get_random_u64(); - prandom_seed_state(&q->prng.prng_state, q->prng.seed); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + } unlock: sch_tree_unlock(sch); @@ -1139,6 +1138,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; q->loss_model = CLG_RANDOM; + q->prng.seed = get_random_u64(); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + ret = netem_change(sch, opt, extack); if (ret) pr_info("netem: change failed\n"); -- cgit v1.2.3 From 01801c359a74737b9b1aa28568b60374d857241a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:42 -0700 Subject: net/sched: netem: validate slot configuration Reject slot configurations that have no defensible meaning: - negative min_delay or max_delay - min_delay greater than max_delay - negative dist_delay or dist_jitter - negative max_packets or max_bytes Negative or out-of-order delays underflow in get_slot_next(), producing garbage intervals. Negative limits trip the per-slot accounting (packets_left/bytes_left <= 0) on the first packet of every slot, defeating the rate-limiting half of the slot feature. Note that dist_jitter has been silently coerced to its absolute value by get_slot() since the feature was introduced; rejecting negatives here converts that silent coercion into -EINVAL. The abs() can be removed in a follow-up. Fixes: 836af83b54e3 ("netem: support delivering packets in delayed time slots") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-5-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 556f9747f0e7..640b51be807a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -827,6 +827,29 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) return 0; } +static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + const struct tc_netem_slot *c = nla_data(attr); + + if (c->min_delay < 0 || c->max_delay < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay"); + return -EINVAL; + } + if (c->min_delay > c->max_delay) { + NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay"); + return -EINVAL; + } + if (c->dist_delay < 0 || c->dist_jitter < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay"); + return -EINVAL; + } + if (c->max_packets < 0 || c->max_bytes < 0) { + NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit"); + return -EINVAL; + } + return 0; +} + static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_slot *c = nla_data(attr); @@ -1040,6 +1063,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, goto table_free; } + if (tb[TCA_NETEM_SLOT]) { + ret = validate_slot(tb[TCA_NETEM_SLOT], extack); + if (ret) + goto table_free; + } + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; -- cgit v1.2.3 From 51e94e1e2fef351c74d69eb53666df808d26af95 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:43 -0700 Subject: net/sched: netem: fix slot delay calculation overflow get_slot_next() computes a random delay between min_delay and max_delay using: get_random_u32() * (max_delay - min_delay) >> 32 This overflows signed 64-bit arithmetic when the delay range exceeds approximately 2.1 seconds (2^31 nanoseconds), producing a negative result that effectively disables slot-based pacing. This is a realistic configuration for WAN emulation (e.g., slot 1s 5s). Use mul_u64_u32_shr() which handles the widening multiply without overflow. Fixes: 0a9fe5c375b5 ("netem: slotting with non-uniform distribution") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-6-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 640b51be807a..475c14b3dbdb 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -659,9 +659,8 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) if (!q->slot_dist) next_delay = q->slot_config.min_delay + - (get_random_u32() * - (q->slot_config.max_delay - - q->slot_config.min_delay) >> 32); + mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay, + get_random_u32(), 32); else next_delay = tabledist(q->slot_config.dist_delay, (s32)(q->slot_config.dist_jitter), -- cgit v1.2.3 From 90be9fedb218ee95a1cf59050d1306fbfb0e8b87 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 17 Apr 2026 20:19:44 -0700 Subject: net/sched: netem: check for negative latency and jitter Reject requests with negative latency or jitter. A negative value added to current timestamp (u64) wraps to an enormous time_to_send, disabling dequeue. The original UAPI used u32 for these values; the conversion to 64-bit time values via TCA_NETEM_LATENCY64 and TCA_NETEM_JITTER64 allowed signed values to reach the kernel without validation. Jitter is already silently clamped by an abs() in netem_change(); that abs() can be removed in a follow-up once this rejection is in place. Fixes: 99803171ef04 ("netem: add uapi to express delay and jitter in nanoseconds") Signed-off-by: Stephen Hemminger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260418032027.900913-7-stephen@networkplumber.org Signed-off-by: Jakub Kicinski --- net/sched/sch_netem.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 475c14b3dbdb..bc18e1976b6e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -826,6 +826,16 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) return 0; } +static int validate_time(const struct nlattr *attr, const char *name, + struct netlink_ext_ack *extack) +{ + if (nla_get_s64(attr) < 0) { + NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name); + return -EINVAL; + } + return 0; +} + static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) { const struct tc_netem_slot *c = nla_data(attr); @@ -1068,6 +1078,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, goto table_free; } + if (tb[TCA_NETEM_LATENCY64]) { + ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_JITTER64]) { + ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack); + if (ret) + goto table_free; + } + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; -- cgit v1.2.3 From 2d9f5a118205da2683ffcec78b9347f1f01a820e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Apr 2026 08:35:11 +0200 Subject: net: airoha: fix BQL imbalance in TX path Fix a possible BQL imbalance in airoha_dev_xmit(), where inflight packets are accounted only for the AIROHA_NUM_TX_RING netdev TX queues. The queue index is computed as: qid = skb_get_queue_mapping(skb) % ARRAY_SIZE(qdma->q_tx) txq = netdev_get_tx_queue(dev, qid); However, airoha_qdma_tx_napi_poll() accounts completions across all netdev TX queues (num_tx_queues), leading to inconsistent BQL accounting. Also reset all netdev TX queues in the ndo_stop callback. Fixes: 1d304174106c ("net: airoha: Implement BQL support") Fixes: c9f947769b77 ("net: airoha: Reset BQL stopping the netdevice") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260421-airoha-fix-bql-v1-1-f135afe4275b@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 2bb0a3ff9810..daae15aa078c 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -929,10 +929,9 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) q->queued--; if (skb) { - u16 queue = skb_get_queue_mapping(skb); struct netdev_queue *txq; - txq = netdev_get_tx_queue(skb->dev, queue); + txq = skb_get_tx_queue(skb->dev, skb); netdev_tx_completed_queue(txq, 1, skb->len); dev_kfree_skb_any(skb); } @@ -1744,7 +1743,7 @@ static int airoha_dev_stop(struct net_device *dev) if (err) return err; - for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) + for (i = 0; i < dev->num_tx_queues; i++) netdev_tx_reset_subqueue(dev, i); airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id), @@ -2039,7 +2038,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, spin_lock_bh(&q->lock); - txq = netdev_get_tx_queue(dev, qid); + txq = skb_get_tx_queue(dev, skb); nr_frags = 1 + skb_shinfo(skb)->nr_frags; if (q->queued + nr_frags >= q->ndesc) { -- cgit v1.2.3 From 3854de7b38be742cf7558476956d12414cb274f2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Apr 2026 08:43:07 +0200 Subject: net: airoha: stop net_device TX queue before updating CPU index Currently, airoha_eth driver updates the CPU index register prior of verifying whether the number of free descriptors has fallen below the threshold. Move net_device TX queue length check before updating the TX CPU index in order to update TX CPU index even if there are more packets to be transmitted but the net_device TX queue is going to be stopped accounting the inflight packets. Fixes: 1d304174106c ("net: airoha: Implement BQL support") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260421-airoha-xmit-stop-condition-v1-1-e670d6a48467@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index daae15aa078c..27c85cd95750 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2094,17 +2094,16 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); netdev_tx_sent_queue(txq, skb->len); + if (q->ndesc - q->queued < q->free_thr) { + netif_tx_stop_queue(txq); + q->txq_stopped = true; + } if (netif_xmit_stopped(txq) || !netdev_xmit_more()) airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK, FIELD_PREP(TX_RING_CPU_IDX_MASK, index)); - if (q->ndesc - q->queued < q->free_thr) { - netif_tx_stop_queue(txq); - q->txq_stopped = true; - } - spin_unlock_bh(&q->lock); return NETDEV_TX_OK; -- cgit v1.2.3 From e070aac63b42bf81f4dc565f9f841ff47e6c992f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Apr 2026 10:53:33 +0200 Subject: net: airoha: Do not wake all netdev TX queues in airoha_qdma_wake_netdev_txqs() Do not wake every netdev TX queue across all ports sharing the QDMA running netif_tx_wake_all_queues routine in airoha_qdma_wake_netdev_txqs() but only the ones that are mapped the specific QDMA stopped hw TX queue. This patch can potentially avoid waking already stopped netdev TX queues that are mapped to a different QDMA hw TX queue. Introduce airoha_qdma_get_txq utility routine. Fixes: b94769eb2f30 ("net: airoha: Fix possible TX queue stall in airoha_qdma_tx_napi_poll()") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260421-airoha-wake_netdev_txqs-optmization-v1-1-e0be95115d53@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 19 +++++++++++++++---- drivers/net/ethernet/airoha/airoha_eth.h | 5 +++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 27c85cd95750..905bcbf90752 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -847,13 +847,24 @@ static void airoha_qdma_wake_netdev_txqs(struct airoha_queue *q) { struct airoha_qdma *qdma = q->qdma; struct airoha_eth *eth = qdma->eth; - int i; + int i, qid = q - &qdma->q_tx[0]; for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { struct airoha_gdm_port *port = eth->ports[i]; + int j; + + if (!port) + continue; - if (port && port->qdma == qdma) - netif_tx_wake_all_queues(port->dev); + if (port->qdma != qdma) + continue; + + for (j = 0; j < port->dev->num_tx_queues; j++) { + if (airoha_qdma_get_txq(qdma, j) != qid) + continue; + + netif_wake_subqueue(port->dev, j); + } } q->txq_stopped = false; } @@ -2001,7 +2012,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, u16 index; u8 fport; - qid = skb_get_queue_mapping(skb) % ARRAY_SIZE(qdma->q_tx); + qid = airoha_qdma_get_txq(qdma, skb_get_queue_mapping(skb)); tag = airoha_get_dsa_tag(skb, dev); msg0 = FIELD_PREP(QDMA_ETH_TXMSG_CHAN_MASK, diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index e389d2fe3b86..4fad3acc3ccf 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -631,6 +631,11 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); #define airoha_qdma_clear(qdma, offset, val) \ airoha_rmw((qdma)->regs, (offset), (val), 0) +static inline u16 airoha_qdma_get_txq(struct airoha_qdma *qdma, u16 qid) +{ + return qid % ARRAY_SIZE(qdma->q_tx); +} + static inline bool airoha_is_lan_gdm_port(struct airoha_gdm_port *port) { /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. -- cgit v1.2.3 From bde34e84edc8b5571fbde7e941e175a4293ee1eb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 24 Apr 2026 11:00:28 +0200 Subject: net: airoha: Do not read uninitialized fragment address in airoha_dev_xmit() The transmit loop in airoha_dev_xmit() reads fragment address and length during its final iteration, when the loop index equals skb_shinfo(skb)->nr_frags, at which point the fragment data is uninitialized. While these values are never consumed, the read itself is unsafe and may trigger a page fault. Fix this by avoiding the fragment read on the last iteration. Additionally, move the skb pointer from the first to the last used packet descriptor, so that airoha_qdma_tx_napi_poll() defers freeing the skb until the final descriptor is processed. Fixes: 23020f0493270 ("net: airoha: Introduce ethernet support for EN7581 SoC") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260424-airoha-xmit-fix-read-frag-v1-1-fdc0a83c79e8@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 905bcbf90752..5effb4a4ae84 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2007,8 +2007,8 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct netdev_queue *txq; struct airoha_queue *q; LIST_HEAD(tx_list); + int i = 0, qid; void *data; - int i, qid; u16 index; u8 fport; @@ -2067,7 +2067,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, list); index = e - q->entry; - for (i = 0; i < nr_frags; i++) { + while (true) { struct airoha_qdma_desc *desc = &q->desc[index]; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t addr; @@ -2079,7 +2079,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, goto error_unmap; list_move_tail(&e->list, &tx_list); - e->skb = i ? NULL : skb; + e->skb = i == nr_frags - 1 ? skb : NULL; e->dma_addr = addr; e->dma_len = len; @@ -2098,6 +2098,9 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, WRITE_ONCE(desc->msg1, cpu_to_le32(msg1)); WRITE_ONCE(desc->msg2, cpu_to_le32(0xffff)); + if (++i == nr_frags) + break; + data = skb_frag_address(frag); len = skb_frag_size(frag); } -- cgit v1.2.3 From d3aeb889dcbd78e95f500d383799a23d949796e0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Apr 2026 06:28:39 +0000 Subject: net/sched: sch_choke: annotate data-races in choke_dump_stats() choke_dump_stats() only runs with RTNL held. It reads fields that can be changed in qdisc fast path. Add READ_ONCE()/WRITE_ONCE() annotations. Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260423062839.2524324-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_choke.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 94df8e741a97..2875bcdb18a4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -229,7 +229,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Draw a packet at random from queue and compare flow */ if (choke_match_random(q, skb, &idx)) { - q->stats.matched++; + WRITE_ONCE(q->stats.matched, q->stats.matched + 1); choke_drop_by_idx(sch, idx, to_free); goto congestion_drop; } @@ -241,11 +241,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (use_harddrop(q) || !use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.forced_drop++; + WRITE_ONCE(q->stats.forced_drop, + q->stats.forced_drop + 1); goto congestion_drop; } - q->stats.forced_mark++; + WRITE_ONCE(q->stats.forced_mark, + q->stats.forced_mark + 1); } else if (++q->vars.qcount) { if (red_mark_probability(p, &q->vars, q->vars.qavg)) { q->vars.qcount = 0; @@ -253,11 +255,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.prob_drop++; + WRITE_ONCE(q->stats.prob_drop, + q->stats.prob_drop + 1); goto congestion_drop; } - q->stats.prob_mark++; + WRITE_ONCE(q->stats.prob_mark, + q->stats.prob_mark + 1); } } else q->vars.qR = red_random(p); @@ -272,7 +276,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } - q->stats.pdrop++; + WRITE_ONCE(q->stats.pdrop, q->stats.pdrop + 1); return qdisc_drop(skb, sch, to_free); congestion_drop: @@ -461,10 +465,12 @@ static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct choke_sched_data *q = qdisc_priv(sch); struct tc_choke_xstats st = { - .early = q->stats.prob_drop + q->stats.forced_drop, - .marked = q->stats.prob_mark + q->stats.forced_mark, - .pdrop = q->stats.pdrop, - .matched = q->stats.matched, + .early = READ_ONCE(q->stats.prob_drop) + + READ_ONCE(q->stats.forced_drop), + .marked = READ_ONCE(q->stats.prob_mark) + + READ_ONCE(q->stats.forced_mark), + .pdrop = READ_ONCE(q->stats.pdrop), + .matched = READ_ONCE(q->stats.matched), }; return gnet_stats_copy_app(d, &st, sizeof(st)); -- cgit v1.2.3 From 59b145771c7982cfe9020d4e9e22da92d6b5ae31 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Apr 2026 06:35:27 +0000 Subject: net/sched: sch_fq_pie: annotate data-races in fq_pie_dump_stats() fq_codel_dump_stats() acquires the qdisc spinlock a bit too late. Move this acquisition before we fill tc_fq_pie_xstats with live data. Alternative would be to add READ_ONCE() and WRITE_ONCE() annotations, but the spinlock is needed anyway to scan q->new_flows and q->old_flows. Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Signed-off-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260423063527.2568262-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq_pie.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 154c70f489f2..7becbf5362b3 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -509,18 +509,19 @@ nla_put_failure: static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_pie_sched_data *q = qdisc_priv(sch); - struct tc_fq_pie_xstats st = { - .packets_in = q->stats.packets_in, - .overlimit = q->stats.overlimit, - .overmemory = q->overmemory, - .dropped = q->stats.dropped, - .ecn_mark = q->stats.ecn_mark, - .new_flow_count = q->new_flow_count, - .memory_usage = q->memory_usage, - }; + struct tc_fq_pie_xstats st = { 0 }; struct list_head *pos; sch_tree_lock(sch); + + st.packets_in = q->stats.packets_in; + st.overlimit = q->stats.overlimit; + st.overmemory = q->overmemory; + st.dropped = q->stats.dropped; + st.ecn_mark = q->stats.ecn_mark; + st.new_flow_count = q->new_flow_count; + st.memory_usage = q->memory_usage; + list_for_each(pos, &q->new_flows) st.new_flows_len++; -- cgit v1.2.3 From 2674d603a9e6970463b2b9ebcf8e31e90beae169 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 23 Apr 2026 09:36:07 +0300 Subject: vrf: Fix a potential NPD when removing a port from a VRF RCU readers that identified a net device as a VRF port using netif_is_l3_slave() assume that a subsequent call to netdev_master_upper_dev_get_rcu() will return a VRF device. They then continue to dereference its l3mdev operations. This assumption is not always correct and can result in a NPD [1]. There is no RCU synchronization when removing a port from a VRF, so it is possible for an RCU reader to see a new master device (e.g., a bridge) that does not have l3mdev operations. Fix by adding RCU synchronization after clearing the IFF_L3MDEV_SLAVE flag. Skip this synchronization when a net device is removed from a VRF as part of its deletion and when the VRF device itself is deleted. In the latter case an RCU grace period will pass by the time RTNL is released. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] RIP: 0010:l3mdev_fib_table_rcu (net/l3mdev/l3mdev.c:181) [...] Call Trace: l3mdev_fib_table_by_index (net/l3mdev/l3mdev.c:201 net/l3mdev/l3mdev.c:189) __inet_bind (net/ipv4/af_inet.c:499 (discriminator 3)) inet_bind_sk (net/ipv4/af_inet.c:469) __sys_bind (./include/linux/file.h:62 (discriminator 1) ./include/linux/file.h:83 (discriminator 1) net/socket.c:1951 (discriminator 1)) __x64_sys_bind (net/socket.c:1969 (discriminator 1) net/socket.c:1967 (discriminator 1) net/socket.c:1967 (discriminator 1)) do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1) arch/x86/entry/syscall_64.c:94 (discriminator 1)) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fixes: fdeea7be88b1 ("net: vrf: Set slave's private flag before linking") Reported-by: Haoze Xie Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Yuan Tan Closes: https://lore.kernel.org/netdev/20260419145332.3988923-1-n05ec@lzu.edu.cn/ Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/20260423063607.1208202-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 2cf2dbd1c12f..46209917ae4d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1034,6 +1034,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, err: port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; + synchronize_net(); return ret; } @@ -1053,10 +1054,16 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, } /* inverse of do_vrf_add_slave */ -static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) +static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev, + bool needs_sync) { netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; + /* Make sure that concurrent RCU readers that identified the device + * as a VRF port see a VRF master or no master at all. + */ + if (needs_sync) + synchronize_net(); cycle_netdev(port_dev, NULL); @@ -1065,7 +1072,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { - return do_vrf_del_slave(dev, port_dev); + return do_vrf_del_slave(dev, port_dev, true); } static void vrf_dev_uninit(struct net_device *dev) @@ -1619,7 +1626,7 @@ static void vrf_dellink(struct net_device *dev, struct list_head *head) struct list_head *iter; netdev_for_each_lower_dev(dev, port_dev, iter) - vrf_del_slave(dev, port_dev); + do_vrf_del_slave(dev, port_dev, false); vrf_map_unregister_dev(dev); @@ -1751,7 +1758,7 @@ static int vrf_device_event(struct notifier_block *unused, goto out; vrf_dev = netdev_master_upper_dev_get(dev); - vrf_del_slave(vrf_dev, dev); + do_vrf_del_slave(vrf_dev, dev, false); } out: return NOTIFY_DONE; -- cgit v1.2.3 From 9e6bf146b55999a095bb14f73a843942456d1adc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Apr 2026 15:16:33 +0200 Subject: ipv6: rpl: reserve mac_len headroom when recompressed SRH grows ipv6_rpl_srh_rcv() decompresses an RFC 6554 Source Routing Header, swaps the next segment into ipv6_hdr->daddr, recompresses, then pulls the old header and pushes the new one plus the IPv6 header back. The recompressed header can be larger than the received one when the swap reduces the common-prefix length the segments share with daddr (CmprI=0, CmprE>0, seg[0][0] != daddr[0] gives the maximum +8 bytes). pskb_expand_head() was gated on segments_left == 0, so on earlier segments the push consumed unchecked headroom. Once skb_push() leaves fewer than skb->mac_len bytes in front of data, skb_mac_header_rebuild()'s call to: skb_set_mac_header(skb, -skb->mac_len); will store (data - head) - mac_len into the u16 mac_header field, which wraps to ~65530, and the following memmove() writes mac_len bytes ~64KiB past skb->head. A single AF_INET6/SOCK_RAW/IPV6_HDRINCL packet over lo with a two segment type-3 SRH (CmprI=0, CmprE=15) reaches headroom 8 after one pass; KASAN reports a 14-byte OOB write in ipv6_rthdr_rcv. Fix this by expanding the head whenever the remaining room is less than the push size plus mac_len, and request that much extra so the rebuilt MAC header fits afterwards. Fixes: 8610c7c6e3bd ("net: ipv6: add support for rpl sr exthdr") Cc: stable Reported-by: Anthropic Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026042133-gout-unvented-1bd9@gregkh Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 95558fd6f447..03cbce842c1a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -491,6 +491,7 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; + unsigned int chdr_len; unsigned char *buf; int accept_rpl_seg; int i, err; @@ -592,8 +593,10 @@ looped_back: skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); - if (unlikely(!hdr->segments_left)) { - if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, + chdr_len = sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3); + if (unlikely(!hdr->segments_left || + skb_headroom(skb) < chdr_len + skb->mac_len)) { + if (pskb_expand_head(skb, chdr_len + skb->mac_len, 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -603,7 +606,7 @@ looped_back: oldhdr = ipv6_hdr(skb); } - skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); + skb_push(skb, chdr_len); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); -- cgit v1.2.3 From 23f0e34c64acba15cad4d23e50f41f533da195fa Mon Sep 17 00:00:00 2001 From: Zhan Jun Date: Thu, 23 Apr 2026 08:49:12 +0800 Subject: net: usb: rtl8150: fix use-after-free in rtl8150_start_xmit() syzbot reported a KASAN slab-use-after-free read in rtl8150_start_xmit() when accessing skb->len for tx statistics after usb_submit_urb() has been called: BUG: KASAN: slab-use-after-free in rtl8150_start_xmit+0x71f/0x760 drivers/net/usb/rtl8150.c:712 Read of size 4 at addr ffff88810eb7a930 by task kworker/0:4/5226 The URB completion handler write_bulk_callback() frees the skb via dev_kfree_skb_irq(dev->tx_skb). The URB may complete on another CPU in softirq context before usb_submit_urb() returns in the submitter, so by the time the submitter reads skb->len the skb has already been queued to the per-CPU completion_queue and freed by net_tx_action(): CPU A (xmit) CPU B (USB completion softirq) ------------ ------------------------------ dev->tx_skb = skb; usb_submit_urb() --+ |-------> write_bulk_callback() | dev_kfree_skb_irq(dev->tx_skb) | net_tx_action() | napi_skb_cache_put() <-- free netdev->stats.tx_bytes | += skb->len; <-- UAF read Fix it by caching skb->len before submitting the URB and using the cached value when updating the tx_bytes counter. The pre-existing tx_bytes semantics are preserved: the counter tracks the original frame length (skb->len), not the ETH_ZLEN/USB-alignment padded "count" value that is handed to the device. Changing that would be a user-visible accounting change and is out of scope for this UAF fix. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+3f46c095ac0ca048cb71@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69e69ee7.050a0220.24bfd3.002b.GAE@google.com/ Closes: https://syzkaller.appspot.com/bug?extid=3f46c095ac0ca048cb71 Reviewed-by: Andrew Lunn Signed-off-by: Zhan Jun Link: https://patch.msgid.link/809895186B866C10+20260423004913.136655-1-zhangdandan@uniontech.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rtl8150.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 4cda0643afb6..1bbfdeab4d62 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -683,6 +683,7 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, struct net_device *netdev) { rtl8150_t *dev = netdev_priv(netdev); + unsigned int skb_len; int count, res; /* pad the frame and ensure terminating USB packet, datasheet 9.2.3 */ @@ -694,6 +695,8 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + skb_len = skb->len; + netif_stop_queue(netdev); dev->tx_skb = skb; usb_fill_bulk_urb(dev->tx_urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), @@ -709,7 +712,7 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, } } else { netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; + netdev->stats.tx_bytes += skb_len; netif_trans_update(netdev); } -- cgit v1.2.3 From adbe2cdf75461891e50dbe11896ac78e9af1f874 Mon Sep 17 00:00:00 2001 From: Morduan Zang Date: Fri, 24 Apr 2026 09:55:17 +0800 Subject: net: usb: rtl8150: free skb on usb_submit_urb() failure in xmit When rtl8150_start_xmit() fails to submit the tx URB, the URB is never handed to the USB core and write_bulk_callback() will not run. The driver returns NETDEV_TX_OK, which tells the networking stack that the skb has been consumed, but nothing actually frees the skb on this error path: dev->tx_skb = skb; ... if ((res = usb_submit_urb(dev->tx_urb, GFP_ATOMIC))) { ... /* no kfree_skb here */ } return NETDEV_TX_OK; This leaks the skb on every submit failure and also leaves dev->tx_skb pointing at memory that the driver itself may later free, which is fragile. Free the skb with dev_kfree_skb_any() in the error path and clear dev->tx_skb so no stale pointer is left behind. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Andrew Lunn Signed-off-by: Morduan Zang Link: https://patch.msgid.link/E7D3E1C013C5A859+20260424015517.9574-1-zhangdandan@uniontech.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rtl8150.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 1bbfdeab4d62..c880c95c41a5 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -710,6 +710,13 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, netdev->stats.tx_errors++; netif_start_queue(netdev); } + /* + * The URB was not submitted, so write_bulk_callback() will + * never run to free dev->tx_skb. Drop the skb here and + * clear tx_skb to avoid leaving a stale pointer. + */ + dev->tx_skb = NULL; + dev_kfree_skb_any(skb); } else { netdev->stats.tx_packets++; netdev->stats.tx_bytes += skb_len; -- cgit v1.2.3 From 8d0189c1ea98b56481eb809e3d1bdbf85557e819 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 28 Apr 2026 01:42:00 +0800 Subject: spi: amlogic-spisg: initialize completion before requesting IRQ Move init_completion(&spisg->completion) to before devm_request_irq() to avoid a potential race condition where an interrupt could fire before the completion structure is initialized. Fixes: cef9991e04ae ("spi: Add Amlogic SPISG driver") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260428-amlogic-spisg-v1-1-8eecc3b446d6@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-amlogic-spisg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-amlogic-spisg.c b/drivers/spi/spi-amlogic-spisg.c index 19c5eba412ef..f9de2d2c9213 100644 --- a/drivers/spi/spi-amlogic-spisg.c +++ b/drivers/spi/spi-amlogic-spisg.c @@ -794,6 +794,7 @@ static int aml_spisg_probe(struct platform_device *pdev) dma_set_max_seg_size(&pdev->dev, SPISG_BLOCK_MAX); + init_completion(&spisg->completion); ret = devm_request_irq(&pdev->dev, irq, aml_spisg_irq, 0, NULL, spisg); if (ret) { dev_err(&pdev->dev, "irq request failed\n"); @@ -806,8 +807,6 @@ static int aml_spisg_probe(struct platform_device *pdev) goto out_clk; } - init_completion(&spisg->completion); - pm_runtime_put(&spisg->pdev->dev); return 0; -- cgit v1.2.3 From a9bc28aa4e64320668131349436a650bf42591a5 Mon Sep 17 00:00:00 2001 From: Paul Geurts Date: Wed, 22 Apr 2026 12:09:30 +0200 Subject: NFC: trf7970a: Ignore antenna noise when checking for RF field The main channel Received Signal Strength Indicator (RSSI) measurement is used to determine whether an RF field is present or not. RSSI != 0 is interpreted as an RF Field is present. This does not take RF noise and measurement inaccuracy into account, and results in false positives in the field. Define a noise level and make sure the RF field is only interpreted as present when the RSSI is above the noise level. Fixes: 851ee3cbf850 ("NFC: trf7970a: Don't turn on RF if there is already an RF field") Signed-off-by: Paul Geurts Reviewed-by: Krzysztof Kozlowski Reviewed-by: Mark Greer Link: https://patch.msgid.link/20260422100930.581237-1-paul.geurts@prodrive-technologies.com Signed-off-by: Jakub Kicinski --- drivers/nfc/trf7970a.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index d17c701c7888..08c27bb438b5 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -317,6 +317,7 @@ #define TRF7970A_RSSI_OSC_STATUS_RSSI_MASK (BIT(2) | BIT(1) | BIT(0)) #define TRF7970A_RSSI_OSC_STATUS_RSSI_X_MASK (BIT(5) | BIT(4) | BIT(3)) #define TRF7970A_RSSI_OSC_STATUS_RSSI_OSC_OK BIT(6) +#define TRF7970A_RSSI_OSC_STATUS_RSSI_NOISE_LEVEL 1 #define TRF7970A_SPECIAL_FCN_REG1_COL_7_6 BIT(0) #define TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL BIT(1) @@ -1300,7 +1301,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field) if (ret) return ret; - if (rssi & TRF7970A_RSSI_OSC_STATUS_RSSI_MASK) + if ((rssi & TRF7970A_RSSI_OSC_STATUS_RSSI_MASK) > TRF7970A_RSSI_OSC_STATUS_RSSI_NOISE_LEVEL) *is_rf_field = true; else *is_rf_field = false; -- cgit v1.2.3 From 3d07ca5c0fae311226f737963984bd94bb159a87 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 23 Apr 2026 00:19:58 +0800 Subject: net/sched: taprio: fix NULL pointer dereference in class dump When a TAPRIO child qdisc is deleted via RTM_DELQDISC, taprio_graft() is called with new == NULL and stores NULL into q->qdiscs[cl - 1]. Subsequent RTM_GETTCLASS dump operations walk all classes via taprio_walk() and call taprio_dump_class(), which calls taprio_leaf() returning the NULL pointer, then dereferences it to read child->handle, causing a kernel NULL pointer dereference. The bug is reachable with namespace-scoped CAP_NET_ADMIN on any kernel with CONFIG_NET_SCH_TAPRIO enabled. On systems with unprivileged user namespaces enabled, an unprivileged local user can trigger a kernel panic by creating a taprio qdisc inside a new network namespace, grafting an explicit child qdisc, deleting it, and requesting a class dump. The RTM_GETTCLASS dump itself requires no capability. Oops: general protection fault, probably for non-canonical address 0xdffffc0000000007: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] RIP: 0010:taprio_dump_class (net/sched/sch_taprio.c:2478) Call Trace: tc_fill_tclass (net/sched/sch_api.c:1966) qdisc_class_dump (net/sched/sch_api.c:2326) taprio_walk (net/sched/sch_taprio.c:2514) tc_dump_tclass_qdisc (net/sched/sch_api.c:2352) tc_dump_tclass_root (net/sched/sch_api.c:2370) tc_dump_tclass (net/sched/sch_api.c:2431) rtnl_dumpit (net/core/rtnetlink.c:6864) netlink_dump (net/netlink/af_netlink.c:2325) rtnetlink_rcv_msg (net/core/rtnetlink.c:6959) netlink_rcv_skb (net/netlink/af_netlink.c:2550) Fix this by substituting &noop_qdisc when new is NULL in taprio_graft(), a common pattern used by other qdiscs (e.g., multiq_graft()) to ensure the q->qdiscs[] slots are never NULL. This makes control-plane dump paths safe without requiring individual NULL checks. Since the data-plane paths (taprio_enqueue and taprio_dequeue_from_txq) previously had explicit NULL guards that would drop/skip the packet cleanly, update those checks to test for &noop_qdisc instead. Without this, packets would reach taprio_enqueue_one() which increments the root qdisc's qlen and backlog before calling the child's enqueue; noop_qdisc drops the packet but those counters are never rolled back, permanently inflating the root qdisc's statistics. After this change *old can be a valid qdisc, NULL, or &noop_qdisc. Only call qdisc_put(*old) in the first case to avoid decreasing noop_qdisc's refcount, which was never increased. Fixes: 665338b2a7a0 ("net/sched: taprio: dump class stats for the actual q->qdiscs[]") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Acked-by: Jamal Hadi Salim Tested-by: Weiming Shi Link: https://patch.msgid.link/20260422161958.2517539-3-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a47a09d76400..45245157e00a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -634,7 +634,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, queue = skb_get_queue_mapping(skb); child = q->qdiscs[queue]; - if (unlikely(!child)) + if (unlikely(child == &noop_qdisc)) return qdisc_drop(skb, sch, to_free); if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) { @@ -717,7 +717,7 @@ static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq, int len; u8 tc; - if (unlikely(!child)) + if (unlikely(child == &noop_qdisc)) return NULL; if (TXTIME_ASSIST_IS_ENABLED(q->flags)) @@ -2184,6 +2184,9 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, if (!dev_queue) return -EINVAL; + if (!new) + new = &noop_qdisc; + if (dev->flags & IFF_UP) dev_deactivate(dev, false); @@ -2197,14 +2200,14 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, *old = q->qdiscs[cl - 1]; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); - if (new) + if (new != &noop_qdisc) qdisc_refcount_inc(new); - if (*old) + if (*old && *old != &noop_qdisc) qdisc_put(*old); } q->qdiscs[cl - 1] = new; - if (new) + if (new != &noop_qdisc) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) -- cgit v1.2.3 From a469feed399da791f890b3448622121e97a07f3b Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 23 Apr 2026 00:19:59 +0800 Subject: selftests/tc-testing: add taprio test for class dump after child delete Add a regression test for the NULL pointer dereference fixed in the previous commit. Before the fix, taprio_graft() stored NULL into q->qdiscs[cl - 1] when an explicitly grafted child qdisc was deleted via RTM_DELQDISC; the next RTM_GETTCLASS dump then crashed the kernel in taprio_dump_class() while reading child->handle. The test installs a taprio root qdisc on a multi-queue netdevsim device, grafts a pfifo child onto class 8001:1, deletes that child, and then performs a class dump. On a fixed kernel the dump succeeds and all eight taprio classes are listed; on an unpatched kernel the class dump crashes, which surfaces as a test failure. Signed-off-by: Weiming Shi Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260422161958.2517539-4-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/taprio.json | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 557fb074acf0..cd19d05925e4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -302,5 +302,31 @@ "$TC qdisc del dev $ETH root", "echo \"1\" > /sys/bus/netdevsim/del_device" ] + }, + { + "id": "c7e1", + "name": "Class dump after graft and delete of explicit child qdisc", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", + "$TC qdisc add dev $ETH parent 8001:1 handle 8002: pfifo", + "$TC qdisc del dev $ETH parent 8001:1 handle 8002:" + ], + "cmdUnderTest": "$TC class show dev $ETH", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $ETH", + "matchPattern": "class taprio 8001:[0-9]+ root", + "matchCount": "8", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ] -- cgit v1.2.3 From 5b0c911bcdbd982f7748d11c0b39ec5808eae2de Mon Sep 17 00:00:00 2001 From: Morduan Zang Date: Thu, 23 Apr 2026 09:05:57 +0800 Subject: net: phonet: do not BUG_ON() in pn_socket_autobind() on failed bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot reported a kernel BUG triggered from pn_socket_sendmsg() via pn_socket_autobind(): kernel BUG at net/phonet/socket.c:213! RIP: 0010:pn_socket_autobind net/phonet/socket.c:213 [inline] RIP: 0010:pn_socket_sendmsg+0x240/0x250 net/phonet/socket.c:421 Call Trace: sock_sendmsg_nosec+0x112/0x150 net/socket.c:797 __sock_sendmsg net/socket.c:812 [inline] __sys_sendto+0x402/0x590 net/socket.c:2280 ... pn_socket_autobind() calls pn_socket_bind() with port 0 and, on -EINVAL, assumes the socket was already bound and asserts that the port is non-zero: err = pn_socket_bind(sock, ..., sizeof(struct sockaddr_pn)); if (err != -EINVAL) return err; BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); return 0; /* socket was already bound */ However pn_socket_bind() also returns -EINVAL when sk->sk_state is not TCP_CLOSE, even when the socket has never been bound and pn_port() is still 0. In that case the BUG_ON() fires and panics the kernel from a user-triggerable path. Treat the "bind returned -EINVAL but pn_port() is still 0" case as a regular error and propagate -EINVAL to the caller instead of crashing. Existing callers already translate a non-zero return from pn_socket_autobind() into -ENOBUFS/-EAGAIN, so returning -EINVAL here only changes behaviour from panic to a normal errno. Fixes: ba113a94b750 ("Phonet: common socket glue") Reported-by: syzbot+706f5eb79044e686c794@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=706f5eb79044e686c794 Suggested-by: Remi Denis-Courmont Signed-off-by: Morduan Zang Signed-off-by: zhanjun Acked-by: Rémi Denis-Courmont Link: https://patch.msgid.link/87A8960A2045AF3C+20260423010557.138124-1-zhangdandan@uniontech.com Signed-off-by: Jakub Kicinski --- net/phonet/socket.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c4af26357144..631a99cdbd00 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -208,9 +208,15 @@ static int pn_socket_autobind(struct socket *sock) sa.spn_family = AF_PHONET; err = pn_socket_bind(sock, (struct sockaddr_unsized *)&sa, sizeof(struct sockaddr_pn)); - if (err != -EINVAL) + /* + * pn_socket_bind() also returns -EINVAL when sk_state != TCP_CLOSE + * without a prior bind, so -EINVAL alone is not sufficient to infer + * that the socket was already bound. Only treat it as "already + * bound" when the port is non-zero; otherwise propagate the error + * instead of crashing the kernel. + */ + if (err != -EINVAL || unlikely(!pn_port(pn_sk(sock->sk)->sobject))) return err; - BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); return 0; /* socket was already bound */ } -- cgit v1.2.3 From b3b6babf47517fde6b6de2493dea28e8831b9347 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 23 Apr 2026 05:34:54 +0000 Subject: ipmr: Free mr_table after RCU grace period. With CONFIG_IP_MROUTE_MULTIPLE_TABLES=n, ipmr_fib_lookup() does not check if net->ipv4.mrt is NULL. Since default_device_exit_batch() is called after ->exit_rtnl(), a device could receive IGMP packets and access net->ipv4.mrt during/after ipmr_rules_exit_rtnl(). If ipmr_rules_exit_rtnl() had already cleared it and freed the memory, the access would trigger null-ptr-deref or use-after-free. Let's fix it by using RCU helper and free mrt after RCU grace period. In addition, check_net(net) is added to mroute_clean_tables() and ipmr_cache_unresolved() to synchronise via mfc_unres_lock. This prevents ipmr_cache_unresolved() from putting skb into c->_c.mfc_un.unres.unresolved after mroute_clean_tables() purges it. For the same reason, timer_shutdown_sync() is moved after mroute_clean_tables(). Since rhltable_destroy() holds mutex internally, rcu_work is used, and it is placed as the first member because rcu_head must be placed within <4K offset. mr_table is alraedy 3864 bytes without rcu_work. Note that IP6MR is not yet converted to ->exit_rtnl(), so this change is not needed for now but will be. Fixes: b22b01867406 ("ipmr: Convert ipmr_net_exit_batch() to ->exit_rtnl().") Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260423053456.4097409-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 3 ++ net/ipv4/ipmr.c | 108 ++++++++++++++++++++++++-------------------- net/ipv4/ipmr_base.c | 16 +++++++ 3 files changed, 77 insertions(+), 50 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index cf3374580f74..5d75cc5b057e 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -226,6 +226,7 @@ struct mr_table_ops { /** * struct mr_table - a multicast routing table + * @work: used for table destruction * @list: entry within a list of multicast routing tables * @net: net where this table belongs * @ops: protocol specific operations @@ -243,6 +244,7 @@ struct mr_table_ops { * @mroute_reg_vif_num: PIM-device vif index */ struct mr_table { + struct rcu_work work; struct list_head list; possible_net_t net; struct mr_table_ops ops; @@ -274,6 +276,7 @@ void vif_device_init(struct vif_device *v, unsigned short flags, unsigned short get_iflink_mask); +void mr_table_free(struct mr_table *mrt); struct mr_table * mr_table_alloc(struct net *net, u32 id, struct mr_table_ops *ops, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8a08d09b4c30..2058ca860294 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -151,16 +151,6 @@ static struct mr_table *__ipmr_get_table(struct net *net, u32 id) return NULL; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) -{ - struct mr_table *mrt; - - rcu_read_lock(); - mrt = __ipmr_get_table(net, id); - rcu_read_unlock(); - return mrt; -} - static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { @@ -293,7 +283,7 @@ static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct mr_table *mrt, *next; list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { - list_del(&mrt->list); + list_del_rcu(&mrt->list); ipmr_free_table(mrt, dev_kill_list); } } @@ -315,28 +305,30 @@ bool ipmr_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL(ipmr_rule_default); #else -#define ipmr_for_each_table(mrt, net) \ - for (mrt = net->ipv4.mrt; mrt; mrt = NULL) - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { if (!mrt) - return net->ipv4.mrt; + return rcu_dereference(net->ipv4.mrt); return NULL; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) +static struct mr_table *__ipmr_get_table(struct net *net, u32 id) { - return net->ipv4.mrt; + return rcu_dereference_check(net->ipv4.mrt, + lockdep_rtnl_is_held() || + !rcu_access_pointer(net->ipv4.mrt)); } -#define __ipmr_get_table ipmr_get_table +#define ipmr_for_each_table(mrt, net) \ + for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL) static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { - *mrt = net->ipv4.mrt; + *mrt = rcu_dereference(net->ipv4.mrt); + if (!*mrt) + return -EAGAIN; return 0; } @@ -347,7 +339,8 @@ static int __net_init ipmr_rules_init(struct net *net) mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); if (IS_ERR(mrt)) return PTR_ERR(mrt); - net->ipv4.mrt = mrt; + + rcu_assign_pointer(net->ipv4.mrt, mrt); return 0; } @@ -358,9 +351,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { - ipmr_free_table(net->ipv4.mrt, dev_kill_list); + struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1); - net->ipv4.mrt = NULL; + RCU_INIT_POINTER(net->ipv4.mrt, NULL); + ipmr_free_table(mrt, dev_kill_list); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -381,6 +375,17 @@ bool ipmr_rule_default(const struct fib_rule *rule) EXPORT_SYMBOL(ipmr_rule_default); #endif +static struct mr_table *ipmr_get_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + + rcu_read_lock(); + mrt = __ipmr_get_table(net, id); + rcu_read_unlock(); + + return mrt; +} + static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { @@ -441,12 +446,11 @@ static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_lis WARN_ON_ONCE(!mr_can_free_table(net)); - timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, &ipmr_dev_kill_list); - rhltable_destroy(&mrt->mfc_hash); - kfree(mrt); + timer_shutdown_sync(&mrt->ipmr_expire_timer); + mr_table_free(mrt); WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); list_splice(&ipmr_dev_kill_list, dev_kill_list); @@ -1135,12 +1139,19 @@ static int ipmr_cache_report(const struct mr_table *mrt, static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb, struct net_device *dev) { + struct net *net = read_pnet(&mrt->net); const struct iphdr *iph = ip_hdr(skb); - struct mfc_cache *c; + struct mfc_cache *c = NULL; bool found = false; int err; spin_lock_bh(&mfc_unres_lock); + + if (!check_net(net)) { + err = -EINVAL; + goto err; + } + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) { @@ -1153,10 +1164,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* Create a new entry if allowable */ c = ipmr_cache_alloc_unres(); if (!c) { - spin_unlock_bh(&mfc_unres_lock); - - kfree_skb(skb); - return -ENOBUFS; + err = -ENOBUFS; + goto err; } /* Fill in the new cache entry */ @@ -1166,17 +1175,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* Reflect first query at mrouted. */ err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); - - if (err < 0) { - /* If the report failed throw the cache entry - out - Brad Parker - */ - spin_unlock_bh(&mfc_unres_lock); - - ipmr_cache_free(c); - kfree_skb(skb); - return err; - } + if (err < 0) + goto err; atomic_inc(&mrt->cache_resolve_queue_len); list_add(&c->_c.list, &mrt->mfc_unres_queue); @@ -1189,18 +1189,26 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, /* See if we can append the packet */ if (c->_c.mfc_un.unres.unresolved.qlen > 3) { - kfree_skb(skb); + c = NULL; err = -ENOBUFS; - } else { - if (dev) { - skb->dev = dev; - skb->skb_iif = dev->ifindex; - } - skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); - err = 0; + goto err; + } + + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; } + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); + spin_unlock_bh(&mfc_unres_lock); + return 0; + +err: + spin_unlock_bh(&mfc_unres_lock); + if (c) + ipmr_cache_free(c); + kfree_skb(skb); return err; } @@ -1346,7 +1354,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, } if (flags & MRT_FLUSH_MFC) { - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || !check_net(net)) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 37a3c144276c..3930d612c3de 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -28,6 +28,20 @@ void vif_device_init(struct vif_device *v, v->link = dev->ifindex; } +static void __mr_free_table(struct work_struct *work) +{ + struct mr_table *mrt = container_of(to_rcu_work(work), + struct mr_table, work); + + rhltable_destroy(&mrt->mfc_hash); + kfree(mrt); +} + +void mr_table_free(struct mr_table *mrt) +{ + queue_rcu_work(system_unbound_wq, &mrt->work); +} + struct mr_table * mr_table_alloc(struct net *net, u32 id, struct mr_table_ops *ops, @@ -50,6 +64,8 @@ mr_table_alloc(struct net *net, u32 id, kfree(mrt); return ERR_PTR(err); } + + INIT_RCU_WORK(&mrt->work, __mr_free_table); INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); -- cgit v1.2.3 From 4438113be604ee67a7bf4f81da6e1cca41332ce4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Apr 2026 16:58:38 +0200 Subject: neigh: let neigh_xmit take skb ownership neigh_xmit always releases the skb, except when no neighbour table is found. But even the first added user of neigh_xmit (mpls) relied on neigh_xmit to release the skb (or queue it for tx). sashiko reported: If neigh_xmit() is called with an uninitialized neighbor table (for example, NEIGH_ND_TABLE when IPv6 is disabled), it returns -EAFNOSUPPORT and bypasses its internal out_kfree_skb error path. Because the return value of neigh_xmit() is ignored here, does this leak the SKB? Assume full ownership and remove the last code path that doesn't xmit or free skb. Fixes: 4fd3d7d9e868 ("neigh: Add helper function neigh_xmit") Signed-off-by: Florian Westphal Reviewed-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260424145843.74055-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9e12524b67fa..5d9216016507 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3210,8 +3210,10 @@ int neigh_xmit(int index, struct net_device *dev, rcu_read_lock(); tbl = rcu_dereference(neigh_tables[index]); - if (!tbl) - goto out_unlock; + if (!tbl) { + rcu_read_unlock(); + goto out_kfree_skb; + } if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3227,7 +3229,6 @@ int neigh_xmit(int index, struct net_device *dev, goto out_kfree_skb; } err = READ_ONCE(neigh->output)(neigh, skb); -out_unlock: rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { @@ -3237,11 +3238,10 @@ out_unlock: goto out_kfree_skb; err = dev_queue_xmit(skb); } -out: return err; out_kfree_skb: kfree_skb(skb); - goto out; + return err; } EXPORT_SYMBOL(neigh_xmit); -- cgit v1.2.3 From cc427d24ac6442ffdeafd157a63c7c5b73ed4de4 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Fri, 24 Apr 2026 09:29:17 -0700 Subject: ibmveth: Disable GSO for packets with small MSS Some physical adapters on Power systems do not support segmentation offload when the MSS is less than 224 bytes. Attempting to send such packets causes the adapter to freeze, stopping all traffic until manually reset. Implement ndo_features_check to disable GSO for packets with small MSS values. The network stack will perform software segmentation instead. The 224-byte minimum matches ibmvnic commit ("ibmvnic: Enforce stronger sanity checks on GSO packets") which uses the same physical adapters in SEA configurations. The issue occurs specifically when the hardware attempts to perform segmentation (gso_segs > 1) with a small MSS. Single-segment GSO packets (gso_segs == 1) do not trigger the problematic LSO code path and are transmitted normally without segmentation. Add an ndo_features_check callback to disable GSO when MSS < 224 bytes. Also call vlan_features_check() to ensure proper handling of VLAN packets, particularly QinQ (802.1ad) configurations where the hardware parser may not support certain offload features. Validated using iptables to force small MSS values. Without the fix, the adapter freezes. With the fix, packets are segmented in software and transmission succeeds. Comprehensive regression testing completedd (MSS tests, performance, stability). Fixes: 8641dd85799f ("ibmveth: Add support for TSO") Cc: stable@vger.kernel.org Reviewed-by: Brian King Tested-by: Shaik Abdulla Tested-by: Naveed Ahmed Signed-off-by: Mingming Cao Link: https://patch.msgid.link/20260424162917.65725-1-mmc@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 22 ++++++++++++++++++++++ drivers/net/ethernet/ibm/ibmveth.h | 1 + 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 58cc3147afe2..73e051d26b9d 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1756,6 +1756,27 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p) return 0; } +static netdev_features_t ibmveth_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + /* Some physical adapters do not support segmentation offload with + * MSS < 224. Disable GSO for such packets to avoid adapter freeze. + * Note: Single-segment packets (gso_segs == 1) don't need this check + * as they bypass the LSO path and are transmitted without segmentation. + */ + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_size < IBMVETH_MIN_LSO_MSS) { + netdev_warn_once(dev, + "MSS %u too small for LSO, disabling GSO\n", + skb_shinfo(skb)->gso_size); + features &= ~NETIF_F_GSO_MASK; + } + } + + return vlan_features_check(skb, features); +} + static const struct net_device_ops ibmveth_netdev_ops = { .ndo_open = ibmveth_open, .ndo_stop = ibmveth_close, @@ -1767,6 +1788,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_set_features = ibmveth_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ibmveth_set_mac_addr, + .ndo_features_check = ibmveth_features_check, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmveth_poll_controller, #endif diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 068f99df133e..d87713668ed3 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -37,6 +37,7 @@ #define IBMVETH_ILLAN_IPV4_TCP_CSUM 0x0000000000000002UL #define IBMVETH_ILLAN_ACTIVE_TRUNK 0x0000000000000001UL +#define IBMVETH_MIN_LSO_MSS 224 /* Minimum MSS for LSO */ /* hcall macros */ #define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \ plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac) -- cgit v1.2.3 From 2b9f6f7065d4cfb65ba19126e0b35ac4544c3f3a Mon Sep 17 00:00:00 2001 From: Altan Hacigumus Date: Thu, 23 Apr 2026 18:46:38 -0700 Subject: tcp: make probe0 timer handle expired user timeout tcp_clamp_probe0_to_user_timeout() computes remaining time in jiffies using subtraction with an unsigned lvalue. If elapsed probing time exceeds the configured TCP_USER_TIMEOUT, the underflow yields a large value. This ends up re-arming the probe timer for a full backoff interval instead of expiring immediately, delaying connection teardown beyond the configured timeout. Fix this by preventing underflow so user-set timeout expiration is handled correctly without extending the probe timer. Fixes: 344db93ae3ee ("tcp: make TCP_USER_TIMEOUT accurate for zero window probes") Link: https://lore.kernel.org/r/20260414013634.43997-1-ahacigu.linux@gmail.com Signed-off-by: Altan Hacigumus Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260424014639.54110-1-ahacigu.linux@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_timer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8d791a954cd6..322db13333c7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -50,7 +50,8 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) { const struct inet_connection_sock *icsk = inet_csk(sk); - u32 remaining, user_timeout; + u32 user_timeout; + s32 remaining; s32 elapsed; user_timeout = READ_ONCE(icsk->icsk_user_timeout); @@ -61,7 +62,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) if (unlikely(elapsed < 0)) elapsed = 0; remaining = msecs_to_jiffies(user_timeout) - elapsed; - remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); + remaining = max_t(int, remaining, TCP_TIMEOUT_MIN); return min_t(u32, remaining, when); } -- cgit v1.2.3 From 3bc179bc7146c26c9dff75d2943d10528274e301 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 24 Apr 2026 08:31:16 -0700 Subject: netpoll: fix IPv6 local-address corruption netpoll_setup() decides whether to auto-populate the local source address by testing np->local_ip.ip, which only inspects the first 4 bytes of the union inet_addr storage. For an IPv6 netpoll whose caller-supplied local address has a zero high-32 bits (::1, ::, IPv4-mapped ::ffff:a.b.c.d, etc.), this misdetects the address as unset (which they are not, but the first 4 bytes are empty), calls netpoll_take_ipv6() and overwrites it with whatever matching link-local/global address the device happens to expose first. Introduce a helper netpoll_local_ip_unset() that picks the correct family-aware test (ipv6_addr_any() for IPv6, !.ip for IPv4) and use it from netpoll_setup(). Reproducer is something like: echo "::2" > local_ip echo 1 > enabled cat local_ip # before this fix: 2001:db8::1 (caller-supplied ::2 was clobbered) # after this fix: ::2 Fixes: b7394d2429c1 ("netpoll: prepare for ipv6") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260424-netpoll_fix-v1-1-3a55348c625f@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cd74beffd209..4381e0fc25bf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -704,6 +704,23 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) return 0; } +/* + * Test whether the caller left np->local_ip unset, so that + * netpoll_setup() should auto-populate it from the egress device. + * + * np->local_ip is a union of __be32 (IPv4) and struct in6_addr (IPv6), + * so an IPv6 address whose first 4 bytes are zero (e.g. ::1, ::2, + * IPv4-mapped ::ffff:a.b.c.d) must not be tested via the IPv4 arm — + * doing so would misclassify a caller-supplied address as unset and + * silently overwrite it with whatever address the device exposes. + */ +static bool netpoll_local_ip_unset(const struct netpoll *np) +{ + if (np->ipv6) + return ipv6_addr_any(&np->local_ip.in6); + return !np->local_ip.ip; +} + int netpoll_setup(struct netpoll *np) { struct net *net = current->nsproxy->net_ns; @@ -747,7 +764,7 @@ int netpoll_setup(struct netpoll *np) rtnl_lock(); } - if (!np->local_ip.ip) { + if (netpoll_local_ip_unset(np)) { if (!np->ipv6) { err = netpoll_take_ipv4(np, ndev); if (err) -- cgit v1.2.3 From 241ee17ecb6be210f7b231b2a81bfb68871950d0 Mon Sep 17 00:00:00 2001 From: wangdicheng Date: Tue, 28 Apr 2026 10:34:08 +0800 Subject: ASoC: aw88395: Fix kernel panic caused by invalid GPIO error pointer In aw88395_i2c_probe(), if `devm_gpiod_get_optional()` fails, it returns an ERR_PTR() error pointer. The current code only prints a message and continues execution, leaving `aw88395->reset_gpio` as an invalid pointer. Later, in `aw88395_hw_reset()`, this invalid pointer is passed to `gpiod_set_value_cansleep()`, which dereferences it and causes a kernel panic. For optional GPIOs, `devm_gpiod_get_optional()` returns NULL if the GPIO is not defined in the DT, which is safe. If it returns an ERR_PTR, it means a real error occurred (e.g., -EPROBE_DEFER) and the probe must be aborted. Also, since the GPIO is optional, remove the dev_err() log in aw88395_hw_reset() when the GPIO is missing to match the optional semantics. This also fixes a potential NULL pointer dereference as aw_pa is not initialized when aw88395_hw_reset() is called. Signed-off-by: wangdicheng Link: https://patch.msgid.link/20260428023408.46420-1-wangdich9700@163.com Signed-off-by: Mark Brown --- sound/soc/codecs/aw88395/aw88395.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/aw88395/aw88395.c b/sound/soc/codecs/aw88395/aw88395.c index 3602b5b9f7d7..dd09bac652f7 100644 --- a/sound/soc/codecs/aw88395/aw88395.c +++ b/sound/soc/codecs/aw88395/aw88395.c @@ -456,8 +456,6 @@ static void aw88395_hw_reset(struct aw88395 *aw88395) usleep_range(AW88395_1000_US, AW88395_1000_US + 10); gpiod_set_value_cansleep(aw88395->reset_gpio, 1); usleep_range(AW88395_1000_US, AW88395_1000_US + 10); - } else { - dev_err(aw88395->aw_pa->dev, "%s failed", __func__); } } @@ -522,9 +520,10 @@ static int aw88395_i2c_probe(struct i2c_client *i2c) i2c_set_clientdata(i2c, aw88395); aw88395->reset_gpio = devm_gpiod_get_optional(&i2c->dev, "reset", GPIOD_OUT_LOW); - if (IS_ERR(aw88395->reset_gpio)) - dev_info(&i2c->dev, "reset gpio not defined\n"); - + if (IS_ERR(aw88395->reset_gpio)) { + return dev_err_probe(&i2c->dev, PTR_ERR(aw88395->reset_gpio), + "failed to get reset gpio\n"); + } /* hardware reset */ aw88395_hw_reset(aw88395); -- cgit v1.2.3 From ada95e5e603bc6e353ee029f2ba7a7d9a42ad018 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 22 Apr 2026 17:06:46 +0300 Subject: tools/selftests: Use a sensible timeout value for iperf3 client The default timeout of cmd() is 5 seconds and Iperf3Runner requests the iperf3 client to run for 10 seconds, which clearly doesn't work since commit [1] enforced the timeout parameter. Use a value derived from duration as timeout (+5 seconds for startup/teardown/various other overhead). [1] commit f0bd19316663 ("selftests: net: fix timeout passed as positional argument to communicate()") Signed-off-by: Cosmin Ratiu Signed-off-by: Steffen Klassert --- tools/testing/selftests/drivers/net/lib/py/load.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index f181fa2d38fc..e24660e5c27f 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -48,7 +48,10 @@ class Iperf3Runner: Starts the iperf3 client with the configured options. """ cmdline = self._build_client(streams, duration, reverse) - return cmd(cmdline, background=background, host=self.env.remote) + kwargs = {"background": background, "host": self.env.remote} + if not background: + kwargs["timeout"] = duration + 5 + return cmd(cmdline, **kwargs) def measure_bandwidth(self, reverse=False): """ -- cgit v1.2.3 From e64e03b478e2da7093564819e903932fca2ddfa1 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 22 Apr 2026 17:06:47 +0300 Subject: tools/selftests: Add a VXLAN+IPsec traffic test There are VXLAN tests and IPsec tests, but there is no test that combines the two protocols and exercises the tunnel-over-ipsec code paths. Fix that by adding a traffic test with VXLAN and IPsec using crypto offload. This is runnable on HW which supports ESP offload (so no nsim unfortunately). Traffic is done with iperf3 and the test validates that there are no packet drops and iperf3 can get to at least 100 Mbps (a very conservative value on today's crypto offload HW, as it can typically reach multi-Gbps rates). Ran right now, the test fails due to a recently exposed bug in xfrm, which will be fixed in the next patch: # ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py TAP version 13 1..4 # Check| At ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py, # line 161, in test_vxlan_ipsec_crypto_offload: # Check| ksft_eq(drops_after - drops_before, 0, # Check failed 189 != 0 TX drops during VXLAN+IPsec # Check| At ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py, # line 163, in test_vxlan_ipsec_crypto_offload: # Check| ksft_ge(bw_gbps, 0.1, # Check failed 0.0015058278404812596 < 0.1 Minimum 100Mbps over # VXLAN+IPsec not ok 1 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v4_inner_v4 ... Signed-off-by: Cosmin Ratiu Signed-off-by: Steffen Klassert --- tools/testing/selftests/drivers/net/hw/Makefile | 1 + tools/testing/selftests/drivers/net/hw/config | 5 + .../selftests/drivers/net/hw/ipsec_vxlan.py | 204 +++++++++++++++++++++ 3 files changed, 210 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 85ca4d1ecf9e..3b6ff4708005 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -30,6 +30,7 @@ TEST_PROGS = \ gro_hw.py \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ + ipsec_vxlan.py \ iou-zcrx.py \ irq.py \ loopback.sh \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index dd50cb8a7911..ae0168c2bbe6 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -12,5 +12,10 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NETKIT=y CONFIG_NET_SCH_INGRESS=y +CONFIG_INET6_ESP=y +CONFIG_INET6_ESP_OFFLOAD=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_UDMABUF=y CONFIG_VXLAN=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py new file mode 100755 index 000000000000..0740a4d85240 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""Traffic test for VXLAN + IPsec crypto-offload.""" + +import os + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx +from lib.py import CmdExitFailure, NetDrvEpEnv, cmd, defer, ethtool, ip +from lib.py import Iperf3Runner + +# Inner tunnel addresses - TEST-NET-2 (RFC 5737) / doc prefix (RFC 3849) +INNER_V4_LOCAL = "198.51.100.1" +INNER_V4_REMOTE = "198.51.100.2" +INNER_V6_LOCAL = "2001:db8:100::1" +INNER_V6_REMOTE = "2001:db8:100::2" + +# ESP parameters +SPI_OUT = "0x1000" +SPI_IN = "0x1001" +# 128-bit key + 32-bit salt = 20 bytes hex, 128-bit ICV +ESP_AEAD = "aead 'rfc4106(gcm(aes))' 0x" + "01" * 20 + " 128" + + +def xfrm(args, host=None): + """Runs 'ip xfrm' via shell to preserve parentheses in algo names.""" + cmd(f"ip xfrm {args}", shell=True, host=host) + + +def check_xfrm_offload_support(): + """Skips if iproute2 lacks xfrm offload support.""" + out = cmd("ip xfrm state help", fail=False) + if "offload" not in out.stdout + out.stderr: + raise KsftSkipEx("iproute2 too old, missing xfrm offload") + + +def check_esp_hw_offload(cfg): + """Skips if device lacks esp-hw-offload support.""" + check_xfrm_offload_support() + try: + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + except (CmdExitFailure, IndexError) as e: + raise KsftSkipEx(f"can't query features: {e}") from e + if not feat.get("esp-hw-offload", {}).get("active"): + raise KsftSkipEx("Device does not support esp-hw-offload") + + +def get_tx_drops(cfg): + """Returns TX dropped counter from the physical device.""" + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + return stats["stats64"]["tx"]["dropped"] + + +def setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver): + """Sets up VXLAN tunnel with IPsec transport-mode crypto-offload.""" + vxlan_name = f"vx{os.getpid()}" + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + if inner_ipver == "4": + inner_local = f"{INNER_V4_LOCAL}/24" + inner_remote = f"{INNER_V4_REMOTE}/24" + addr_extra = "" + else: + inner_local = f"{INNER_V6_LOCAL}/64" + inner_remote = f"{INNER_V6_REMOTE}/64" + addr_extra = " nodad" + + if outer_ipver == "6": + vxlan_opts = "udp6zerocsumtx udp6zerocsumrx" + else: + vxlan_opts = "noudpcsum" + + # VXLAN tunnel - local side + ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} " + f"local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {vxlan_name}") + ip(f"addr add {inner_local} dev {vxlan_name}{addr_extra}") + ip(f"link set {vxlan_name} up") + + # VXLAN tunnel - remote side + ip(f"link add {vxlan_name} type vxlan id 100 dstport 4789 {vxlan_opts} " + f"local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {vxlan_name}", host=cfg.remote) + ip(f"addr add {inner_remote} dev {vxlan_name}{addr_extra}", + host=cfg.remote) + ip(f"link set {vxlan_name} up", host=cfg.remote) + + # xfrm state - local outbound SA + xfrm(f"state add src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT} " + f"{ESP_AEAD} " + f"mode transport offload crypto dev {cfg.ifname} dir out") + defer(xfrm, f"state del src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT}") + + # xfrm state - local inbound SA + xfrm(f"state add src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN} " + f"{ESP_AEAD} " + f"mode transport offload crypto dev {cfg.ifname} dir in") + defer(xfrm, f"state del src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN}") + + # xfrm state - remote outbound SA (mirror, software crypto) + xfrm(f"state add src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN} " + f"{ESP_AEAD} " + f"mode transport", + host=cfg.remote) + defer(xfrm, f"state del src {remote_addr} dst {local_addr} " + f"proto esp spi {SPI_IN}", host=cfg.remote) + + # xfrm state - remote inbound SA (mirror, software crypto) + xfrm(f"state add src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT} " + f"{ESP_AEAD} " + f"mode transport", + host=cfg.remote) + defer(xfrm, f"state del src {local_addr} dst {remote_addr} " + f"proto esp spi {SPI_OUT}", host=cfg.remote) + + # xfrm policy - local out + xfrm(f"policy add src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir out " + f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport") + defer(xfrm, f"policy del src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir out") + + # xfrm policy - local in + xfrm(f"policy add src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir in " + f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport") + defer(xfrm, f"policy del src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir in") + + # xfrm policy - remote out + xfrm(f"policy add src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir out " + f"tmpl src {remote_addr} dst {local_addr} proto esp mode transport", + host=cfg.remote) + defer(xfrm, f"policy del src {remote_addr} dst {local_addr} " + f"proto udp dport 4789 dir out", host=cfg.remote) + + # xfrm policy - remote in + xfrm(f"policy add src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir in " + f"tmpl src {local_addr} dst {remote_addr} proto esp mode transport", + host=cfg.remote) + defer(xfrm, f"policy del src {local_addr} dst {remote_addr} " + f"proto udp dport 4789 dir in", host=cfg.remote) + + +def _vxlan_ipsec_variants(): + """Generates outer/inner IP version variants.""" + for outer in ["4", "6"]: + for inner in ["4", "6"]: + yield KsftNamedVariant(f"outer_v{outer}_inner_v{inner}", outer, inner) + + +@ksft_variants(_vxlan_ipsec_variants()) +def test_vxlan_ipsec_crypto_offload(cfg, outer_ipver, inner_ipver): + """Tests VXLAN+IPsec crypto-offload has no TX drops.""" + cfg.require_ipver(outer_ipver) + check_esp_hw_offload(cfg) + + setup_vxlan_ipsec(cfg, outer_ipver, inner_ipver) + + if inner_ipver == "4": + inner_local = INNER_V4_LOCAL + inner_remote = INNER_V4_REMOTE + ping = "ping" + else: + inner_local = INNER_V6_LOCAL + inner_remote = INNER_V6_REMOTE + ping = "ping -6" + + cmd(f"{ping} -c 1 -W 2 {inner_remote}") + + drops_before = get_tx_drops(cfg) + + runner = Iperf3Runner(cfg, server_ip=inner_local, + client_ip=inner_remote) + bw_gbps = runner.measure_bandwidth(reverse=True) + + cfg.wait_hw_stats_settle() + drops_after = get_tx_drops(cfg) + + ksft_eq(drops_after - drops_before, 0, + comment="TX drops during VXLAN+IPsec") + ksft_ge(bw_gbps, 0.1, + comment="Minimum 100Mbps over VXLAN+IPsec") + + +def main(): + """Runs VXLAN+IPsec crypto-offload GSO selftest.""" + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_vxlan_ipsec_crypto_offload], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From fa90a3145c0340c3f624206a81637c542254ea1d Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 22 Apr 2026 17:06:48 +0300 Subject: xfrm: Don't clobber inner headers when already set On VXLAN over IPsec egress, xfrm{4,6}_transport_output() blindly overwrite inner_transport_header (== the inner TCP header saved in VXLAN iptunnel_handle_offloads() -> skb_reset_inner_headers()) with the current transport_header (== the VXLAN outer UDP header set by udp_tunnel_xmit_skb()). This was a latent bug, harmless until commit [1] added a doff validation check in qdisc_pkt_len_segs_init() for encapsulated GSO packets. With the wrong inner_transport_header set by xfrm, qdisc_pkt_len_segs_init() interprets inner_transport_header as a TCP header, reads doff=0 from the upper byte of the VNI and drops the packet with DROP_REASON_SKB_BAD_GSO. Besides the use in GSO to determine the header size of segmented packets, inner_transport_header might be used by drivers to set up inner checksum offloading by pointing the HW to the inner transport header. A quick browse through available drivers shows that mlx5 uses skb->csum_start specifically for this scenario, while others either don't support VXLAN over IPsec crypto offload (ixgbe) or the HW is capable of parsing the packets itself (nfp, Chelsio). But in all cases, it is more correct to let the inner_transport_header point to the innermost header instead of overwriting it in xfrm. So fix this by guarding all four inner header save sites in xfrm_output.c (xfrm{4,6}_transport_output, xfrm{4,6}_tunnel_encap_add) with a check for skb->inner_protocol. When inner_protocol is set, a tunnel layer (VXLAN, Geneve, GRE, etc.) has already saved the correct inner header offsets and they must not be overwritten. When inner_protocol is zero, no prior tunnel encapsulation exists and xfrm must save the inner headers itself. The tunnel mode checks are only added for completion, since they aren't strictly required, as xfrm_output() forces software GSO in tunnel mode before encap. This makes the previously added test pass: # ./tools/testing/selftests/drivers/net/hw/ipsec_vxlan.py TAP version 13 1..4 ok 1 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v4_inner_v4 ok 2 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v4_inner_v6 ok 3 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v6_inner_v4 ok 4 ipsec_vxlan.test_vxlan_ipsec_crypto_offload.outer_v6_inner_v6 # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 [1] commit 7fb4c1967011 ("net: pull headers in qdisc_pkt_len_segs_init()") Fixes: f1bd7d659ef0 ("xfrm: Add encapsulation header offsets while SKB is not encrypted") Signed-off-by: Cosmin Ratiu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a9652b422f51..cc35c2fcbbe0 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -66,7 +66,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -167,7 +169,9 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; iph = ipv6_hdr(skb); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) + skb_set_inner_transport_header(skb, + skb_transport_offset(skb)); hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) @@ -276,8 +280,10 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *top_iph; int flags; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + @@ -321,8 +327,10 @@ static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *top_iph; int dsfield; - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + } skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + -- cgit v1.2.3 From 0a7b5221b5b51cc798fcfc3be00d02eade149d69 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 14:37:53 +0200 Subject: ALSA: caiaq: Fix potentially leftover ep1_in_urb at error path The previous fix for handling the error from setup_card() missed that an internal URB cdev->ep1_in_urb might have been already submitted beforehand. In the normal case, this URB gets killed at the disconnection, but in the error path, we didn't do it, hence there can be a potential leak. Fix it in the error path for setup_card(), too. Fixes: 28abd224db4a ("ALSA: caiaq: Handle probe errors properly") Cc: Link: https://patch.msgid.link/20260427123819.890185-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index ad9f744b496b..1afd91e27396 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -514,7 +514,7 @@ static int init_card(struct snd_usb_caiaqdev *cdev) err = setup_card(cdev); if (err < 0) - return err; + goto err_kill_urb; return 0; -- cgit v1.2.3 From b32ae47a2b0a1fb4bd4942242847966d9b178222 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 16:56:15 +0200 Subject: ALSA: caiaq: Don't abort when no input device is available The previous fix to handle the error from setup_card() caused a regression for the models that have no dedicated input device; snd_usb_caiaq_input_init() just returns -EINVAL, and we treat it as a fatal error although it should be ignored. As a regression fix, change the error code to -ENODEV, and ignore this error in the callee, to continue probing. Fixes: 28abd224db4a ("ALSA: caiaq: Handle probe errors properly") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=221423 Link: https://patch.msgid.link/20260427145642.6637-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 2 +- sound/usb/caiaq/input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 1afd91e27396..b20aae0caf60 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -366,7 +366,7 @@ static int setup_card(struct snd_usb_caiaqdev *cdev) #ifdef CONFIG_SND_USB_CAIAQ_INPUT ret = snd_usb_caiaq_input_init(cdev); - if (ret < 0) { + if (ret < 0 && ret != -ENODEV) { dev_err(dev, "Unable to set up input system (ret=%d)\n", ret); return ret; } diff --git a/sound/usb/caiaq/input.c b/sound/usb/caiaq/input.c index a9130891bb69..5c70fdf61cc1 100644 --- a/sound/usb/caiaq/input.c +++ b/sound/usb/caiaq/input.c @@ -804,7 +804,7 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *cdev) default: /* no input methods supported on this device */ - ret = -EINVAL; + ret = -ENODEV; goto exit_free_idev; } -- cgit v1.2.3 From c39f0bc03f84ba64c9144c95714df1dc36150f6d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 17:15:04 +0200 Subject: ALSA: usb-audio: Fix potential leak of pd at parsing UAC3 streams At parsing UAC3 streams, we allocate a PD object at each time, and either assign or free it. But there is a case where the PD object may be leaked; namely, in __snd_usb_parse_audio_interface() loop, when an audioformat shares the same endpoint with others, it's put to a link and returns from snd_usb_add_audio_stream(), but the PD is forgotten afterwards. Overall, the treatment of PD object in the parser code is a bit flaky, and we should be more careful about the object ownership. This patch tries to fix the above case and improve the code a bit. The pd object is now managed with the auto-cleanup in the loop, and the ownership is updated when the pd object gets assigned to the stream, which guarantees the release of the leftover object. Fixes: 7edf3b5e6a45 ("ALSA: usb-audio: AudioStreaming Power Domain parsing") Link: https://patch.msgid.link/20260427151508.12544-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 +- sound/usb/stream.c | 58 +++++++++++++++++++++--------------------------------- sound/usb/stream.h | 3 ++- 3 files changed, 25 insertions(+), 38 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7b803ad58487..0b4ecc2c6bcc 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -125,7 +125,7 @@ static int add_audio_stream_from_fixed_fmt(struct snd_usb_audio *chip, snd_usb_audioformat_set_sync_ep(chip, fp); - err = snd_usb_add_audio_stream(chip, stream, fp); + err = snd_usb_add_audio_stream(chip, stream, fp, NULL); if (err < 0) return err; diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 6c51226f771b..f8f56ace5652 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -79,7 +79,7 @@ static void snd_usb_audio_pcm_free(struct snd_pcm *pcm) static void snd_usb_init_substream(struct snd_usb_stream *as, int stream, struct audioformat *fp, - struct snd_usb_power_domain *pd) + struct snd_usb_power_domain **pdptr) { struct snd_usb_substream *subs = &as->substream[stream]; @@ -105,10 +105,11 @@ static void snd_usb_init_substream(struct snd_usb_stream *as, if (fp->channels > subs->channels_max) subs->channels_max = fp->channels; - if (pd) { - subs->str_pd = pd; + if (pdptr && *pdptr) { + subs->str_pd = *pdptr; + *pdptr = NULL; /* assigned */ /* Initialize Power Domain to idle status D1 */ - snd_usb_power_domain_set(subs->stream->chip, pd, + snd_usb_power_domain_set(subs->stream->chip, subs->str_pd, UAC3_PD_STATE_D1); } @@ -492,11 +493,14 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor * if not, create a new pcm stream. note, fp is added to the substream * fmt_list and will be freed on the chip instance release. do not free * fp or do remove it from the substream fmt_list to avoid double-free. + * + * pdptr is optional and can be NULL. When it's non-NULL and the PD gets + * assigned to the stream, *pdptr is cleared to NULL upon return. */ -static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip, - int stream, - struct audioformat *fp, - struct snd_usb_power_domain *pd) +int snd_usb_add_audio_stream(struct snd_usb_audio *chip, + int stream, + struct audioformat *fp, + struct snd_usb_power_domain **pdptr) { struct snd_usb_stream *as; @@ -529,7 +533,7 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip, err = snd_pcm_new_stream(as->pcm, stream, 1); if (err < 0) return err; - snd_usb_init_substream(as, stream, fp, pd); + snd_usb_init_substream(as, stream, fp, pdptr); return add_chmap(as->pcm, stream, subs); } @@ -558,7 +562,7 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip, else strscpy(pcm->name, "USB Audio"); - snd_usb_init_substream(as, stream, fp, pd); + snd_usb_init_substream(as, stream, fp, pdptr); /* * Keep using head insertion for M-Audio Audiophile USB (tm) which has a @@ -576,21 +580,6 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip, return add_chmap(pcm, stream, &as->substream[stream]); } -int snd_usb_add_audio_stream(struct snd_usb_audio *chip, - int stream, - struct audioformat *fp) -{ - return __snd_usb_add_audio_stream(chip, stream, fp, NULL); -} - -static int snd_usb_add_audio_stream_v3(struct snd_usb_audio *chip, - int stream, - struct audioformat *fp, - struct snd_usb_power_domain *pd) -{ - return __snd_usb_add_audio_stream(chip, stream, fp, pd); -} - static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip, struct usb_host_interface *alts, int protocol, int iface_no) @@ -1113,8 +1102,7 @@ found_clock: } } - if (pd) - *pd_out = pd; + *pd_out = pd; return fp; } @@ -1129,7 +1117,6 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, struct usb_interface_descriptor *altsd; int i, altno, err, stream; struct audioformat *fp = NULL; - struct snd_usb_power_domain *pd = NULL; bool set_iface_first; int num, protocol; @@ -1171,6 +1158,12 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, if (snd_usb_apply_interface_quirk(chip, iface_no, altno)) continue; + /* pd may be allocated at snd_usb_get_audioformat_uac3() and + * assigned at snd_usb_add_audio_stream(); otherwise it'll be + * freed automatically by cleanup at each loop. + */ + struct snd_usb_power_domain *pd __free(kfree) = NULL; + /* * Roland audio streaming interfaces are marked with protocols * 0/1/2, but are UAC 1 compatible. @@ -1226,23 +1219,16 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, *has_non_pcm = true; if ((fp->fmt_type == UAC_FORMAT_TYPE_I) == non_pcm) { audioformat_free(fp); - kfree(pd); fp = NULL; - pd = NULL; continue; } snd_usb_audioformat_set_sync_ep(chip, fp); dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint); - if (protocol == UAC_VERSION_3) - err = snd_usb_add_audio_stream_v3(chip, stream, fp, pd); - else - err = snd_usb_add_audio_stream(chip, stream, fp); - + err = snd_usb_add_audio_stream(chip, stream, fp, &pd); if (err < 0) { audioformat_free(fp); - kfree(pd); return err; } diff --git a/sound/usb/stream.h b/sound/usb/stream.h index d92e18d5818f..61b9a133da01 100644 --- a/sound/usb/stream.h +++ b/sound/usb/stream.h @@ -7,7 +7,8 @@ int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int snd_usb_add_audio_stream(struct snd_usb_audio *chip, int stream, - struct audioformat *fp); + struct audioformat *fp, + struct snd_usb_power_domain **pdptr); #endif /* __USBAUDIO_STREAM_H */ -- cgit v1.2.3 From 6e7247d8f5fefeceb0bb9cc80a5388a636b219cd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 27 Apr 2026 17:22:15 +0200 Subject: ALSA: usb-audio: Avoid potential endless loop in convert_chmap_v3() The convert_chmap_v3() has a loop with its increment size of cs_desc->wLength, but we forgot to validate cs_desc->wLength itself, which may lead to potential endless loop by a malformed descriptor. Add a proper size check to abort the loop for plugging the hole. Fixes: ecfd41166b72 ("ALSA: usb-audio: Validate UAC3 cluster segment descriptors") Cc: Link: https://patch.msgid.link/20260427152224.15276-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index f8f56ace5652..b2c5c8198281 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -353,6 +353,8 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor if (len < sizeof(*cs_desc)) break; cs_len = le16_to_cpu(cs_desc->wLength); + if (cs_len < sizeof(*cs_desc)) + break; if (len < cs_len) break; cs_type = cs_desc->bSegmentType; -- cgit v1.2.3 From c5cd6fd75b6a55761337c9e965dd5ad02485d00d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Apr 2026 13:22:22 +0200 Subject: sched/fair: Fix the negative lag increase fix Vincent reported that my rework of his original patch lost a little something. Specifically it got the return value wrong; it should not compare against the old se->vlag, but rather against the current value. Since the thing that matters is if the effective vruntime of an entity is affected and the thing needs repositioning or not. Fixes: 059258b0d424 ("sched/fair: Prevent negative lag increase during delayed dequeue") Reported-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Vincent Guittot Link: https://patch.msgid.link/20260423094107.GT3102624%40noisy.programming.kicks-ass.net --- kernel/sched/fair.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 69361c63353a..615861d96357 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -847,13 +847,19 @@ static s64 entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 avrunt * Similarly, check that the entity didn't gain positive lag when DELAY_ZERO * is set. * - * Return true if the lag has been adjusted. + * Return true if the vlag has been modified. Specifically: + * + * se->vlag != avg_vruntime() - se->vruntime + * + * This can be due to clamping in entity_lag() or clamping due to + * sched_delayed. Either way, when vlag is modified and the entity is + * retained, the tree needs to be adjusted. */ static __always_inline bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) { - s64 vlag = entity_lag(cfs_rq, se, avg_vruntime(cfs_rq)); - bool ret; + u64 avruntime = avg_vruntime(cfs_rq); + s64 vlag = entity_lag(cfs_rq, se, avruntime); WARN_ON_ONCE(!se->on_rq); @@ -863,10 +869,9 @@ bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) if (sched_feat(DELAY_ZERO)) vlag = min(vlag, 0); } - ret = (vlag == se->vlag); se->vlag = vlag; - return ret; + return avruntime - vlag != se->vruntime; } /* -- cgit v1.2.3 From ac8e69e693631689d74d8f1ebee6f84f737f797f Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 22 Apr 2026 11:34:00 +0200 Subject: sched/fair: Fix wakeup_preempt_fair() vs delayed dequeue Similar to how pick_next_entity() must dequeue delayed entities, so too must wakeup_preempt_fair(). Any delayed task being found means it is eligible and hence past the 0-lag point, ready for removal. Worse, by not removing delayed entities from consideration, it can skew the preemption decision, with the end result that a short slice wakeup will not result in a preemption. tip/sched/core tip/sched/core +this patch cyclictest slice (ms) (default)2.8 8 8 hackbench slice (ms) (default)2.8 20 20 Total Samples | 22559 22595 22683 Average (us) | 157 64( 59%) 59( 8%) Median (P50) (us) | 57 57( 0%) 58(- 2%) 90th Percentile (us) | 64 60( 6%) 60( 0%) 99th Percentile (us) | 2407 67( 97%) 67( 0%) 99.9th Percentile (us) | 3400 2288( 33%) 727( 68%) Maximum (us) | 5037 9252(-84%) 7461( 19%) Fixes: f12e148892ed ("sched/fair: Prepare pick_next_task() for delayed dequeue") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260422093400.319251-1-vincent.guittot@linaro.org --- kernel/sched/fair.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 615861d96357..728965851842 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1104,7 +1104,7 @@ static inline void cancel_protect_slice(struct sched_entity *se) * * Which allows tree pruning through eligibility. */ -static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect) +static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq, bool protect) { struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node; struct sched_entity *se = __pick_first_entity(cfs_rq); @@ -1175,11 +1175,6 @@ found: return best; } -static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) -{ - return __pick_eevdf(cfs_rq, true); -} - struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); @@ -5754,11 +5749,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags); * 4) do not run the "skip" process, if something else is available */ static struct sched_entity * -pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq) +pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq, bool protect) { struct sched_entity *se; - se = pick_eevdf(cfs_rq); + se = pick_eevdf(cfs_rq, protect); if (se->sched_delayed) { dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED); /* @@ -9032,7 +9027,7 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f { enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK; struct task_struct *donor = rq->donor; - struct sched_entity *se = &donor->se, *pse = &p->se; + struct sched_entity *nse, *se = &donor->se, *pse = &p->se; struct cfs_rq *cfs_rq = task_cfs_rq(donor); int cse_is_idle, pse_is_idle; @@ -9143,11 +9138,17 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f } pick: + nse = pick_next_entity(rq, cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT); + /* If @p has become the most eligible task, force preemption */ + if (nse == pse) + goto preempt; + /* - * If @p has become the most eligible task, force preemption. + * Because p is enqueued, nse being null can only mean that we + * dequeued a delayed task. */ - if (__pick_eevdf(cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT) == pse) - goto preempt; + if (!nse) + goto pick; if (sched_feat(RUN_TO_PARITY)) update_protect_slice(cfs_rq, se); @@ -9184,7 +9185,7 @@ again: throttled |= check_cfs_rq_runtime(cfs_rq); - se = pick_next_entity(rq, cfs_rq); + se = pick_next_entity(rq, cfs_rq, true); if (!se) goto again; cfs_rq = group_cfs_rq(se); -- cgit v1.2.3 From 3da56dc063cd77b9c0b40add930767fab4e389f3 Mon Sep 17 00:00:00 2001 From: Zicheng Qu Date: Fri, 24 Apr 2026 07:11:13 +0000 Subject: sched/fair: Clear rel_deadline when initializing forked entities A yield-triggered crash can happen when a newly forked sched_entity enters the fair class with se->rel_deadline unexpectedly set. The failing sequence is: 1. A task is forked while se->rel_deadline is still set. 2. __sched_fork() initializes vruntime, vlag and other sched_entity state, but does not clear rel_deadline. 3. On the first enqueue, enqueue_entity() calls place_entity(). 4. Because se->rel_deadline is set, place_entity() treats se->deadline as a relative deadline and converts it to an absolute deadline by adding the current vruntime. 5. However, the forked entity's deadline is not a valid inherited relative deadline for this new scheduling instance, so the conversion produces an abnormally large deadline. 6. If the task later calls sched_yield(), yield_task_fair() advances se->vruntime to se->deadline. 7. The inflated vruntime is then used by the following enqueue path, where the vruntime-derived key can overflow when multiplied by the entity weight. 8. This corrupts cfs_rq->sum_w_vruntime, breaks EEVDF eligibility calculation, and can eventually make all entities appear ineligible. pick_next_entity() may then return NULL unexpectedly, leading to a later NULL dereference. A captured trace shows the effect clearly. Before yield, the entity's vruntime was around: 9834017729983308 After yield_task_fair() executed: se->vruntime = se->deadline the vruntime jumped to: 19668035460670230 and the deadline was later advanced further to: 19668035463470230 This shows that the deadline had already become abnormally large before yield_task_fair() copied it into vruntime. rel_deadline is only meaningful when se->deadline really carries a relative deadline that still needs to be placed against vruntime. A freshly forked sched_entity should not inherit or retain this state. Clear se->rel_deadline in __sched_fork(), together with the other sched_entity runtime state, so that the first enqueue does not interpret the new entity's deadline as a stale relative deadline. Fixes: 82e9d0456e06 ("sched/fair: Avoid re-setting virtual deadline on 'migrations'") Analyzed-by: Hui Tang Analyzed-by: Zhang Qiao Signed-off-by: Zicheng Qu Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260424071113.1199600-1-quzicheng@huawei.com --- kernel/sched/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index da20fb6ea25a..b8871449d3c6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4458,6 +4458,7 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p) p->se.nr_migrations = 0; p->se.vruntime = 0; p->se.vlag = 0; + p->se.rel_deadline = 0; INIT_LIST_HEAD(&p->se.group_node); /* A delayed task cannot be in clone(). */ -- cgit v1.2.3 From db57a1aa54ff68669781976e4edb045e09e2b65b Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Thu, 23 Apr 2026 02:38:46 +0900 Subject: wifi: rsi: fix kthread lifetime race between self-exit and external-stop RSI driver use both self-exit(kthread_complete_and_exit) and external-stop (kthread_stop) when killing a kthread. Generally, kthread_stop() is called first, and in this case, no particular issues occur. However, in rare instances where kthread_complete_and_exit() is called first and then kthread_stop() is called, a UAF occurs because the kthread object, which has already exited and been freed, is accessed again. Therefore, to prevent this with minimal modification, you must remove kthread_stop() and change the code to wait until the self-exit operation is completed. Cc: Reported-by: syzbot+5de83f57cd8531f55596@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69e5d03b.a00a0220.1bd0ca.0064.GAE@google.com/ Fixes: 4c62764d0fc2 ("rsi: improve kernel thread handling to fix kernel panic") Signed-off-by: Jeongjun Park Link: https://patch.msgid.link/20260422173846.37640-1-aha310510@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/rsi/rsi_common.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 591602beeec6..3cdf9ded876d 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -70,12 +70,11 @@ static inline int rsi_create_kthread(struct rsi_common *common, return 0; } -static inline int rsi_kill_thread(struct rsi_thread *handle) +static inline void rsi_kill_thread(struct rsi_thread *handle) { atomic_inc(&handle->thread_done); rsi_set_event(&handle->event); - - return kthread_stop(handle->task); + wait_for_completion(&handle->completion); } void rsi_mac80211_detach(struct rsi_hw *hw); -- cgit v1.2.3 From 13d30682e8dee191ac04e93642f0372a723e8b0c Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Mon, 27 Apr 2026 23:38:41 -0300 Subject: ASoC: Intel: bytcr_wm5102: Fix MCLK leak on platform_clock_control error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If byt_wm5102_prepare_and_enable_pll1() fails in the SND_SOC_DAPM_EVENT_ON() path, platform_clock_control() returns after clk_prepare_enable(priv->mclk) without disabling the clock again. This leaks an MCLK enable reference on failed power-up attempts. Add the missing clk_disable_unprepare() on the error path, matching the unwind used by the other Intel platform_clock_control() implementations. Fixes: 9a87fc1e0619 ("ASoC: Intel: bytcr_wm5102: Add machine driver for BYT/WM5102") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Reviewed-by: Cezary Rojewski Reviewed-by: Hans de Goede Link: https://patch.msgid.link/20260427-bytcr-wm5102-mclk-leak-v1-1-02b96d08e99c@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_wm5102.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 4879f79aef29..4aa0cf49b033 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -170,6 +170,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, ret = byt_wm5102_prepare_and_enable_pll1(codec_dai, 48000); if (ret) { dev_err(card->dev, "Error setting codec sysclk: %d\n", ret); + clk_disable_unprepare(priv->mclk); return ret; } } else { -- cgit v1.2.3 From ac2c996675755c725a0065dbe3e2ebffded9080b Mon Sep 17 00:00:00 2001 From: Shixiong Ou Date: Fri, 24 Apr 2026 20:44:27 +0800 Subject: drm/udl: Increase GET_URB_TIMEOUT [WHY] A situation has occurred where udl_handle_damage() executed successfully and the kernel log appears normal, but the display fails to show any output. This is because the call to udl_get_urb() in udl_crtc_helper_atomic_enable() failed without generating any error message. [HOW] 1. Increase timeout of getting urb. 2. Add error messages when calling udl_get_urb() failed in udl_crtc_helper_atomic_enable(). Signed-off-by: Shixiong Ou Reviewed-by: Thomas Zimmermann Fixes: 5320918b9a87 ("drm/udl: initial UDL driver (v4)") Signed-off-by: Thomas Zimmermann Cc: # v3.4+ Link: https://patch.msgid.link/20260424124427.657-1-oushixiong1025@163.com --- drivers/gpu/drm/udl/udl_main.c | 3 +-- drivers/gpu/drm/udl/udl_modeset.c | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 08a0e9480d70..17950fe3a0ec 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -285,13 +285,12 @@ static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout) return unode->urb; } -#define GET_URB_TIMEOUT HZ struct urb *udl_get_urb(struct udl_device *udl) { struct urb *urb; spin_lock_irq(&udl->urbs.lock); - urb = udl_get_urb_locked(udl, GET_URB_TIMEOUT); + urb = udl_get_urb_locked(udl, HZ * 2); spin_unlock_irq(&udl->urbs.lock); return urb; } diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 231e829bd709..1ca073a4ecb2 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -342,8 +343,10 @@ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atom return; urb = udl_get_urb(udl); - if (!urb) + if (!urb) { + drm_err_ratelimited(dev, "get urb failed when enabling crtc\n"); goto out; + } buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); -- cgit v1.2.3 From f9c52a6ba9780bd27e0bf4c044fd91c13c778b6e Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Tue, 21 Apr 2026 11:47:35 +0200 Subject: net: ipv6: fix NOREF dst use in seg6 and rpl lwtunnels seg6_input_core() and rpl_input() call ip6_route_input() which sets a NOREF dst on the skb, then pass it to dst_cache_set_ip6() invoking dst_hold() unconditionally. On PREEMPT_RT, ksoftirqd is preemptible and a higher-priority task can release the underlying pcpu_rt between the lookup and the caching through a concurrent FIB lookup on a shared nexthop. Simplified race sequence: ksoftirqd/X higher-prio task (same CPU X) ----------- -------------------------------- seg6_input_core(,skb)/rpl_input(skb) dst_cache_get() -> miss ip6_route_input(skb) -> ip6_pol_route(,skb,flags) [RT6_LOOKUP_F_DST_NOREF in flags] -> FIB lookup resolves fib6_nh [nhid=N route] -> rt6_make_pcpu_route() [creates pcpu_rt, refcount=1] pcpu_rt->sernum = fib6_sernum [fib6_sernum=W] -> cmpxchg(fib6_nh.rt6i_pcpu, NULL, pcpu_rt) [slot was empty, store succeeds] -> skb_dst_set_noref(skb, dst) [dst is pcpu_rt, refcount still 1] rt_genid_bump_ipv6() -> bumps fib6_sernum [fib6_sernum from W to Z] ip6_route_output() -> ip6_pol_route() -> FIB lookup resolves fib6_nh [nhid=N] -> rt6_get_pcpu_route() pcpu_rt->sernum != fib6_sernum [W <> Z, stale] -> prev = xchg(rt6i_pcpu, NULL) -> dst_release(prev) [prev is pcpu_rt, refcount 1->0, dead] dst = skb_dst(skb) [dst is the dead pcpu_rt] dst_cache_set_ip6(dst) -> dst_hold() on dead dst -> WARN / use-after-free For the race to occur, ksoftirqd must be preemptible (PREEMPT_RT without PREEMPT_RT_NEEDS_BH_LOCK) and a concurrent task must be able to release the pcpu_rt. Shared nexthop objects provide such a path, as two routes pointing to the same nhid share the same fib6_nh and its rt6i_pcpu entry. Fix seg6_input_core() and rpl_input() by calling skb_dst_force() after ip6_route_input() to force the NOREF dst into a refcounted one before caching. The output path is not affected as ip6_route_output() already returns a refcounted dst. Fixes: af4a2209b134 ("ipv6: sr: use dst_cache in seg6_input") Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Cc: stable@vger.kernel.org Signed-off-by: Andrea Mayer Reviewed-by: Simon Horman Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260421094735.20997-1-andrea.mayer@uniroma2.it Signed-off-by: Paolo Abeni --- net/ipv6/rpl_iptunnel.c | 9 +++++++++ net/ipv6/seg6_iptunnel.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index c7942cf65567..4e10adcd70e8 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -287,7 +287,16 @@ static int rpl_input(struct sk_buff *skb) if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 9b64343ebad6..4c45c0a77d75 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -515,7 +515,16 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (!dst) { ip6_route_input(skb); + + /* ip6_route_input() sets a NOREF dst; force a refcount on it + * before caching or further use. + */ + skb_dst_force(skb); dst = skb_dst(skb); + if (unlikely(!dst)) { + err = -ENETUNREACH; + goto drop; + } /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { -- cgit v1.2.3 From d743c1ba6c66a7dcb2bfd93fd36b7185e8a4766b Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Thu, 23 Apr 2026 04:51:45 +0000 Subject: pinctrl: qcom: eliza: Fix QDSS trace clock/control pingroup names Fix a few typos for these in their respective pingroups, the groups already exist they just weren't referenced. Signed-off-by: Alexander Koskovich Fixes: 6f26989e15fb ("pinctrl: qcom: Add Eliza pinctrl driver") Reviewed-by: Konrad Dybcio Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-eliza.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-eliza.c b/drivers/pinctrl/qcom/pinctrl-eliza.c index c1f756cbcdeb..dd8c04046b18 100644 --- a/drivers/pinctrl/qcom/pinctrl-eliza.c +++ b/drivers/pinctrl/qcom/pinctrl-eliza.c @@ -1340,7 +1340,7 @@ static const struct msm_pingroup eliza_groups[] = { [51] = PINGROUP(51, _, _, _, _, _, _, _, _, _, _, _), [52] = PINGROUP(52, qup1_se2, pcie1_clk_req_n, qup1_se2, ddr_bist_complete, qdss_gpio_tracedata, _, vsense_trigger_mirnat, _, _, _, _), [53] = PINGROUP(53, qup1_se2, qup1_se2, gcc_gp1, ddr_bist_stop, _, qdss_gpio_tracedata, _, _, _, _, _), - [54] = PINGROUP(54, qup1_se2, qup1_se6, qdss_gpio_tracedata, gnss_adc1, atest_usb, ddr_pxi0, _, _, _, _, _), + [54] = PINGROUP(54, qup1_se2, qup1_se6, qdss_gpio_traceclk, gnss_adc1, atest_usb, ddr_pxi0, _, _, _, _, _), [55] = PINGROUP(55, qup1_se2, dp0_hot, qup1_se6, _, gnss_adc0, atest_usb, ddr_pxi0, _, _, _, _), [56] = PINGROUP(56, usb0_hs, tsense_pwm1, tsense_pwm2, tsense_pwm3, tsense_pwm4, _, _, _, _, _, _), [57] = PINGROUP(57, sd_write_protect, _, _, _, _, _, _, _, _, _, _), @@ -1358,7 +1358,7 @@ static const struct msm_pingroup eliza_groups[] = { [69] = PINGROUP(69, cam_mclk, audio_ext_mclk0, resout_gpio, prng_rosc1, _, _, _, _, _, _, _), [70] = PINGROUP(70, cci_i2c_sda, tmess_prng2, _, phase_flag, atest_char, _, _, _, _, _, _), [71] = PINGROUP(71, cci_i2c_scl, tmess_prng3, _, phase_flag, atest_char, _, _, _, _, _, _), - [72] = PINGROUP(72, cci_i2c_sda, tmess_prng1, qdss_gpio_tracedata, atest_char, _, _, _, _, _, _, _), + [72] = PINGROUP(72, cci_i2c_sda, tmess_prng1, qdss_gpio_tracectl, atest_char, _, _, _, _, _, _, _), [73] = PINGROUP(73, cci_i2c_scl, tmess_prng0, qdss_cti, atest_char, _, _, _, _, _, _, _), [74] = PINGROUP(74, cci_i2c_sda, prng_rosc3, qdss_cti, atest_char, _, _, _, _, _, _, _), [75] = PINGROUP(75, cci_i2c_scl, _, phase_flag, _, _, _, _, _, _, _, _), @@ -1430,10 +1430,10 @@ static const struct msm_pingroup eliza_groups[] = { [141] = PINGROUP(141, _, _, _, _, _, _, _, _, _, _, egpio), [142] = PINGROUP(142, _, _, _, _, _, _, _, _, _, _, egpio), [143] = PINGROUP(143, _, _, _, _, _, _, _, _, _, _, egpio), - [144] = PINGROUP(144, _, qdss_gpio_tracedata, _, _, _, _, _, _, _, _, egpio), + [144] = PINGROUP(144, _, qdss_gpio_tracectl, _, _, _, _, _, _, _, _, egpio), [145] = PINGROUP(145, qdss_gpio_tracedata, _, _, _, _, _, _, _, _, _, egpio), [146] = PINGROUP(146, _, qdss_gpio_tracedata, _, _, _, _, _, _, _, _, egpio), - [147] = PINGROUP(147, ddr_bist_fail, _, qdss_gpio_tracedata, _, _, _, _, _, _, _, egpio), + [147] = PINGROUP(147, ddr_bist_fail, _, qdss_gpio_traceclk, _, _, _, _, _, _, _, egpio), [148] = PINGROUP(148, _, _, _, _, _, _, _, _, _, _, egpio), [149] = PINGROUP(149, _, _, _, _, _, _, _, _, _, _, egpio), [150] = PINGROUP(150, _, _, _, _, _, _, _, _, _, _, egpio), -- cgit v1.2.3 From e72ce029810390eb987a036fb2c8a5da9a23b685 Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 Apr 2026 11:44:13 +0000 Subject: pinctrl: meson: amlogic-a4: fix deadlock issue Accessing the pinconf-pins sysfs node may deadlock. pinconf_pins_show() holds pctldev->mutex, and the platform driver calls pinctrl_find_gpio_range_from_pin(), which tries to acquire the same mutex again, leading to a deadlock. Use pinctrl_find_gpio_range_from_pin_nolock() to fix this issue. Fixes: 6e9be3abb78c ("pinctrl: Add driver support for Amlogic SoCs") Signed-off-by: Xianwei Zhao Reviewed-by: Neil Armstrong Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index e2293a872dcb..35d27626a336 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -292,7 +292,7 @@ static int aml_calc_reg_and_bit(struct pinctrl_gpio_range *range, static int aml_pinconf_get_pull(struct aml_pinctrl *info, unsigned int pin) { struct pinctrl_gpio_range *range = - pinctrl_find_gpio_range_from_pin(info->pctl, pin); + pinctrl_find_gpio_range_from_pin_nolock(info->pctl, pin); struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); unsigned int reg, bit, val; int ret, conf; @@ -326,7 +326,7 @@ static int aml_pinconf_get_drive_strength(struct aml_pinctrl *info, u16 *drive_strength_ua) { struct pinctrl_gpio_range *range = - pinctrl_find_gpio_range_from_pin(info->pctl, pin); + pinctrl_find_gpio_range_from_pin_nolock(info->pctl, pin); struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); unsigned int reg, bit; unsigned int val; @@ -365,7 +365,7 @@ static int aml_pinconf_get_gpio_bit(struct aml_pinctrl *info, unsigned int reg_type) { struct pinctrl_gpio_range *range = - pinctrl_find_gpio_range_from_pin(info->pctl, pin); + pinctrl_find_gpio_range_from_pin_nolock(info->pctl, pin); struct aml_gpio_bank *bank = gpio_chip_to_bank(range->gc); unsigned int reg, bit, val; int ret; -- cgit v1.2.3 From 9d69033ad967b6e09b1e5b30d1a32c6c4876465d Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Thu, 23 Apr 2026 16:55:24 +0530 Subject: pinctrl: qcom: Fix GPIO to PDC wake irq map for qcs615 PDC interrupts 122-125 were meant for ibi_i3c wakeup but qcs615 do not support i3c. GPIOs 39,51,88 and 89 are also connected to different PDC pin to support non-ibi wakeup. Update the wakeirq map to reflect same. Fixes: b698f36a9d40 ("pinctrl: qcom: add the tlmm driver for QCS615 platform") Signed-off-by: Maulik Shah Signed-off-by: Navya Malempati Reviewed-by: Konrad Dybcio Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-qcs615.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 0ed4332d989e..f066b3a576f7 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1040,11 +1040,11 @@ static const struct msm_pingroup qcs615_groups[] = { static const struct msm_gpio_wakeirq_map qcs615_pdc_map[] = { { 1, 45 }, { 3, 31 }, { 7, 55 }, { 9, 110 }, { 11, 34 }, { 13, 33 }, { 14, 35 }, { 17, 46 }, { 19, 48 }, { 21, 83 }, - { 22, 36 }, { 26, 38 }, { 35, 37 }, { 39, 125 }, { 41, 47 }, - { 47, 49 }, { 48, 51 }, { 50, 52 }, { 51, 123 }, { 55, 56 }, + { 22, 36 }, { 26, 38 }, { 35, 37 }, { 39, 118 }, { 41, 47 }, + { 47, 49 }, { 48, 51 }, { 50, 52 }, { 51, 116 }, { 55, 56 }, { 56, 57 }, { 57, 58 }, { 60, 60 }, { 71, 54 }, { 80, 73 }, { 81, 64 }, { 82, 50 }, { 83, 65 }, { 84, 92 }, { 85, 99 }, - { 86, 67 }, { 87, 84 }, { 88, 124 }, { 89, 122 }, { 90, 69 }, + { 86, 67 }, { 87, 84 }, { 88, 117 }, { 89, 115 }, { 90, 69 }, { 92, 88 }, { 93, 75 }, { 94, 91 }, { 95, 72 }, { 96, 82 }, { 97, 74 }, { 98, 95 }, { 99, 94 }, { 100, 100 }, { 101, 40 }, { 102, 93 }, { 103, 77 }, { 104, 78 }, { 105, 96 }, { 107, 97 }, -- cgit v1.2.3 From 0bb05e6adfa99a2ea1fee1125cc0953409f83ed8 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Tue, 21 Apr 2026 21:45:03 -0700 Subject: net: stmmac: Prevent NULL deref when RX memory exhausted The CPU receives frames from the MAC through conventional DMA: the CPU allocates buffers for the MAC, then the MAC fills them and returns ownership to the CPU. For each hardware RX queue, the CPU and MAC coordinate through a shared ring array of DMA descriptors: one descriptor per DMA buffer. Each descriptor includes the buffer's physical address and a status flag ("OWN") indicating which side owns the buffer: OWN=0 for CPU, OWN=1 for MAC. The CPU is only allowed to set the flag and the MAC is only allowed to clear it, and both must move through the ring in sequence: thus the ring is used for both "submissions" and "completions." In the stmmac driver, stmmac_rx() bookmarks its position in the ring with the `cur_rx` index. The main receive loop in that function checks for rx_descs[cur_rx].own=0, gives the corresponding buffer to the network stack (NULLing the pointer), and increments `cur_rx` modulo the ring size. After the loop exits, stmmac_rx_refill(), which bookmarks its position with `dirty_rx`, allocates fresh buffers and rearms the descriptors (setting OWN=1). If it fails any allocation, it simply stops early (leaving OWN=0) and will retry where it left off when next called. This means descriptors have a three-stage lifecycle (terms my own): - `empty` (OWN=1, buffer valid) - `full` (OWN=0, buffer valid and populated) - `dirty` (OWN=0, buffer NULL) But because stmmac_rx() only checks OWN, it confuses `full`/`dirty`. In the past (see 'Fixes:'), there was a bug where the loop could cycle `cur_rx` all the way back to the first descriptor it dirtied, resulting in a NULL dereference when mistaken for `full`. The aforementioned commit resolved that *specific* failure by capping the loop's iteration limit at `dma_rx_size - 1`, but this is only a partial fix: if the previous stmmac_rx_refill() didn't complete, then there are leftover `dirty` descriptors that the loop might encounter without needing to cycle fully around. The current code therefore panics (see 'Closes:') when stmmac_rx_refill() is memory-starved long enough for `cur_rx` to catch up to `dirty_rx`. Fix this by explicitly checking, before advancing `cur_rx`, if the next entry is dirty; exit the loop if so. This prevents processing of the final, used descriptor until stmmac_rx_refill() succeeds, but fully prevents the `cur_rx == dirty_rx` ambiguity as the previous bugfix intended: so remove the clamp as well. Since stmmac_rx_zc() is a copy-paste-and-tweak of stmmac_rx() and the code structure is identical, any fix to stmmac_rx() will also need a corresponding fix for stmmac_rx_zc(). Therefore, apply the same check there. In stmmac_rx() (not stmmac_rx_zc()), a related bug remains: after the MAC sets OWN=0 on the final descriptor, it will be unable to send any further DMA-complete IRQs until it's given more `empty` descriptors. Currently, the driver simply *hopes* that the next stmmac_rx_refill() succeeds, risking an indefinite stall of the receive process if not. But this is not a regression, so it can be addressed in a future change. Fixes: b6cb4541853c7 ("net: stmmac: avoid rx queue overrun") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221010 Cc: stable@vger.kernel.org Suggested-by: Russell King Signed-off-by: Sam Edwards Link: https://patch.msgid.link/20260422044503.5349-1-CFSworks@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ca68248dbc78..3591755ea30b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5549,9 +5549,12 @@ read_again: break; /* Prefetch the next RX descriptor */ - rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, - priv->dma_conf.dma_rx_size); - next_entry = rx_q->cur_rx; + next_entry = STMMAC_NEXT_ENTRY(rx_q->cur_rx, + priv->dma_conf.dma_rx_size); + if (unlikely(next_entry == rx_q->dirty_rx)) + break; + + rx_q->cur_rx = next_entry; np = stmmac_get_rx_desc(priv, rx_q, next_entry); @@ -5686,7 +5689,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_dir = page_pool_get_dma_dir(rx_q->page_pool); bufsz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; - limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { void *rx_head = stmmac_get_rx_desc(priv, rx_q, 0); @@ -5733,9 +5735,12 @@ read_again: if (unlikely(status & dma_own)) break; - rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, - priv->dma_conf.dma_rx_size); - next_entry = rx_q->cur_rx; + next_entry = STMMAC_NEXT_ENTRY(rx_q->cur_rx, + priv->dma_conf.dma_rx_size); + if (unlikely(next_entry == rx_q->dirty_rx)) + break; + + rx_q->cur_rx = next_entry; np = stmmac_get_rx_desc(priv, rx_q, next_entry); -- cgit v1.2.3 From a9e8765fd206388d5672db229784982bf559f097 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 22 Apr 2026 14:25:27 +0200 Subject: efivarfs: use QSTR() in efivarfs_alloc_dentry Use QSTR() and drop strlen() in efivarfs_alloc_dentry(). Signed-off-by: Thorsten Blum Signed-off-by: Ard Biesheuvel --- fs/efivarfs/super.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 1c5224cf183e..733c19571f1c 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -191,13 +191,10 @@ static const struct dentry_operations efivarfs_d_ops = { static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) { + struct qstr q = QSTR(name); struct dentry *d; - struct qstr q; int err; - q.name = name; - q.len = strlen(name); - err = efivarfs_d_hash(parent, &q); if (err) return ERR_PTR(err); -- cgit v1.2.3 From b336e40c62fbdc4b8a1f09a4ada31f4a90c69eb1 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 27 Apr 2026 17:56:30 +0200 Subject: efi: pstore: Drop efivar lock when efi_pstore_open() returns with an error If kzalloc fails, the function returns -ENOMEM without calling efivar_unlock(). Since open() returned an error, the calling site in pstore_get_backend_records() won't call the close() function, so the lock is never released. Thus drop the lock in case of errors here. Fixes: 859748255b434 ("efi: pstore: Omit efivars caching EFI varstore access layer") Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Thomas Huth Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi-pstore.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index a253b6144945..a5db3534f0a6 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -60,8 +60,10 @@ static int efi_pstore_open(struct pstore_info *psi) return err; psi->data = kzalloc(record_size, GFP_KERNEL); - if (!psi->data) + if (!psi->data) { + efivar_unlock(); return -ENOMEM; + } return 0; } -- cgit v1.2.3 From 4ca07b9239bd0478ae586632a2ed72be37ed8407 Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Thu, 23 Apr 2026 00:46:52 -0700 Subject: net: mctp i2c: check length before marking flow active Currently, mctp_i2c_get_tx_flow_state() is called before the packet length sanity check. This function marks a new flow as active in the MCTP core. If the sanity check fails, mctp_i2c_xmit() returns early without calling mctp_i2c_lock_nest(). This results in a mismatched locking state: the flow is active, but the I2C bus lock was never acquired for it. When the flow is later released, mctp_i2c_release_flow() will see the active state and queue an unlock marker. The TX thread will then decrement midev->i2c_lock_count from 0, causing it to underflow to -1. This underflow permanently breaks the driver's locking logic, allowing future transmissions to occur without holding the I2C bus lock, leading to bus collisions and potential hardware hangs. Move the mctp_i2c_get_tx_flow_state() call to after the length sanity check to ensure we only transition the flow state if we are actually going to proceed with the transmission and locking. Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Signed-off-by: William A. Kennington III Acked-by: Jeremy Kerr Link: https://patch.msgid.link/20260423074741.201460-1-william@wkennington.com Signed-off-by: Paolo Abeni --- drivers/net/mctp/mctp-i2c.c | 4 ++-- net/sched/cls_flower.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 15fe4d1163c1..ee2913758e54 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -496,8 +496,6 @@ static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) u8 *pecp; int rc; - fs = mctp_i2c_get_tx_flow_state(midev, skb); - hdr = (void *)skb_mac_header(skb); /* Sanity check that packet contents matches skb length, * and can't exceed MCTP_I2C_BUFSZ @@ -509,6 +507,8 @@ static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) return; } + fs = mctp_i2c_get_tx_flow_state(midev, skb); + if (skb_tailroom(skb) >= 1) { /* Linear case with space, we can just append the PEC */ skb_put(skb, 1); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 88f8a32fab2b..b9672ea05747 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -556,6 +556,7 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { struct cls_fl_head *head = fl_head_dereference(tp); + struct fl_flow_mask *mask; *last = false; @@ -572,11 +573,12 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, list_del_rcu(&f->list); spin_unlock(&tp->lock); - *last = fl_mask_put(head, f->mask); + mask = f->mask; if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f, rtnl_held, extack); tcf_unbind_filter(tp, &f->res); __fl_put(f); + *last = fl_mask_put(head, mask); return 0; } -- cgit v1.2.3 From f1fb23a0a0fcbdb66672da51d7d63a259f6396ca Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 27 Apr 2026 11:32:36 -0700 Subject: fbdev: ipu-v3: clean up kernel-doc warnings Correct all kernel-doc warnings: - fix a typedef kernel-doc comment - mark a list_head as private - use Returns: for function return values Warning: include/video/imx-ipu-image-convert.h:31 struct member 'list' not described in 'ipu_image_convert_run' Warning: include/video/imx-ipu-image-convert.h:40 function parameter 'ipu_image_convert_cb_t' not described in 'void' Warning: include/video/imx-ipu-image-convert.h:40 expecting prototype for ipu_image_convert_cb_t(). Prototype was for void() instead Warning: include/video/imx-ipu-image-convert.h:66 No description found for return value of 'ipu_image_convert_verify' Warning: include/video/imx-ipu-image-convert.h:90 No description found for return value of 'ipu_image_convert_prepare' Warning: include/video/imx-ipu-image-convert.h:125 No description found for return value of 'ipu_image_convert_queue' Warning: include/video/imx-ipu-image-convert.h:163 No description found for return value of 'ipu_image_convert' Signed-off-by: Randy Dunlap Reviewed-by: Philipp Zabel Signed-off-by: Helge Deller --- include/video/imx-ipu-image-convert.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/video/imx-ipu-image-convert.h b/include/video/imx-ipu-image-convert.h index 003b3927ede5..6b77968a6a15 100644 --- a/include/video/imx-ipu-image-convert.h +++ b/include/video/imx-ipu-image-convert.h @@ -27,12 +27,13 @@ struct ipu_image_convert_run { int status; + /* private: */ /* internal to image converter, callers don't touch */ struct list_head list; }; /** - * ipu_image_convert_cb_t - conversion callback function prototype + * typedef ipu_image_convert_cb_t - conversion callback function prototype * * @run: the completed conversion run pointer * @ctx: a private context pointer for the callback @@ -60,7 +61,7 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, * @out: output image format * @rot_mode: rotation mode * - * Returns 0 if the formats and rotation mode meet IPU restrictions, + * Returns: 0 if the formats and rotation mode meet IPU restrictions, * -EINVAL otherwise. */ int ipu_image_convert_verify(struct ipu_image *in, struct ipu_image *out, @@ -77,11 +78,11 @@ int ipu_image_convert_verify(struct ipu_image *in, struct ipu_image *out, * @complete: run completion callback * @complete_context: a context pointer for the completion callback * - * Returns an opaque conversion context pointer on success, error pointer + * In V4L2, drivers should call ipu_image_convert_prepare() at streamon. + * + * Returns: an opaque conversion context pointer on success, error pointer * on failure. The input/output formats and rotation mode must already meet * IPU retrictions. - * - * In V4L2, drivers should call ipu_image_convert_prepare() at streamon. */ struct ipu_image_convert_ctx * ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, @@ -122,6 +123,8 @@ void ipu_image_convert_unprepare(struct ipu_image_convert_ctx *ctx); * In V4L2, drivers should call ipu_image_convert_queue() while * streaming to queue the conversion of a received input buffer. * For example mem2mem devices this would be called in .device_run. + * + * Returns: 0 on success or -errno on error. */ int ipu_image_convert_queue(struct ipu_image_convert_run *run); @@ -155,6 +158,9 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx); * On successful return the caller can queue more run requests if needed, using * the prepared context in run->ctx. The caller is responsible for unpreparing * the context when no more conversion requests are needed. + * + * Returns: pointer to the created &struct ipu_image_convert_run that has + * been queued on success; an ERR_PTR(errno) on error. */ struct ipu_image_convert_run * ipu_image_convert(struct ipu_soc *ipu, enum ipu_ic_task ic_task, -- cgit v1.2.3 From 0b996ae54d876b41c52dd7cfc512eb008a47d781 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 28 Apr 2026 11:17:25 +0800 Subject: fbdev: defio: Remove duplicate include of linux/module.h Remove duplicate inclusion of linux/module.h in fb_defio.c to clean up redundant code. Signed-off-by: Chen Ni Signed-off-by: Helge Deller --- drivers/video/fbdev/core/fb_defio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index a12dd25ab697..fd00b86e1ae6 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From d237f719b2726c0e6d62bfa1543f53b624471929 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 28 Apr 2026 10:28:43 +0200 Subject: lib/fonts: Fix bit position when rotating by 180 degrees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the horizontal bit position when rotating a glyph by 180°. The original code in rotate_ud() rounded the value in width up to a multiple of 8, aka the bit pitch, and calculated the rotated pixel from that value. The new code stores the glyph's pitch in bit_pitch, but fails to update the rotated pixel's output accordingly. Simply replacing the variable does this. The bug can be reproduced by setting a font with an unaligned width, such as sun12x22, like this: setfont sun12x22 echo 2 > /sys/class/graphics/fbcon/rotate Without the fix, the font looks distorted. Fixes: a30e9e6b018f ("lib/fonts: Refactor glyph-rotation helpers") Closes: https://lore.gitlab.freedesktop.org/drm-ai-reviews/review-patch7-20260407092555.58816-8-tzimmermann@suse.de/ Cc: dri-devel@lists.freedesktop.org Cc: linux-fbdev@vger.kernel.org Signed-off-by: Thomas Zimmermann Signed-off-by: Helge Deller --- lib/fonts/font_rotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fonts/font_rotate.c b/lib/fonts/font_rotate.c index 065e0fc0667b..275406008823 100644 --- a/lib/fonts/font_rotate.c +++ b/lib/fonts/font_rotate.c @@ -106,7 +106,7 @@ static void __font_glyph_rotate_180(const unsigned char *glyph, for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { if (font_glyph_test_bit(glyph, x, y, bit_pitch)) { - font_glyph_set_bit(out, width - (1 + x + shift), height - (1 + y), + font_glyph_set_bit(out, bit_pitch - 1 - x - shift, height - 1 - y, bit_pitch); } } -- cgit v1.2.3 From 418b3e64e4459feb3f75979de9ec89e085745343 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Wed, 8 Apr 2026 00:35:48 -0400 Subject: md/raid5: Fix UAF on IO across the reshape position If make_stripe_request() returns STRIPE_WAIT_RESHAPE, raid5_make_request() will free the cloned bio. But raid5_make_request() can call make_stripe_request() multiple times, writing to the various stripes. If that bio got added to the toread or towrite lists of a stripe disk in an earlier call to make_stripe_request(), then it's not safe to just free the bio if a later part of it is found to cross the reshape position. Doing so can lead to a UAF error, when bio_endio() is called on the bio for the earlier stripes. Instead, raid5_make_request() needs to wait until all parts of the bio have called bio_endio(). To do this, bios that cross the reshape position while the reshape can't make progress are flagged as needing to wait for all parts to complete. When raid5_make_request() has a bio that failed make_stripe_request() with STRIPE_WAIT_RESHAPE, it sets bi->bi_private to a completion struct and waits for completion after ending the bio. When the bio_endio() is called for the last time on a clone bio with bi->bi_private set, it wakes up the waiter. This guarantees that raid5_make_request() doesn't return until the cloned bio needing a retry for io across the reshape boundary is safely cleaned up. There is a simple reproducer available at [1]. Compile the kernel with KASAN for more useful reporting when the error is triggered (this is not necessary to see the bug). [1] https://gist.github.com/bmarzins/e48598824305cf2171289e47d7241fa5 Signed-off-by: Benjamin Marzinski Reviewed-by: Xiao Ni Link: https://lore.kernel.org/r/20260408043548.1695157-1-bmarzins@redhat.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 31 ++++++++----------------------- drivers/md/md.h | 1 - drivers/md/raid5.c | 7 ++++++- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5fb5ae8368ba..8656f2eff40d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9341,9 +9341,11 @@ static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone) static void md_end_clone_io(struct bio *bio) { - struct md_io_clone *md_io_clone = bio->bi_private; + struct md_io_clone *md_io_clone = container_of(bio, struct md_io_clone, + bio_clone); struct bio *orig_bio = md_io_clone->orig_bio; struct mddev *mddev = md_io_clone->mddev; + struct completion *reshape_completion = bio->bi_private; if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false)) md_bitmap_end(mddev, md_io_clone); @@ -9355,7 +9357,10 @@ static void md_end_clone_io(struct bio *bio) bio_end_io_acct(orig_bio, md_io_clone->start_time); bio_put(bio); - bio_endio(orig_bio); + if (unlikely(reshape_completion)) + complete(reshape_completion); + else + bio_endio(orig_bio); percpu_ref_put(&mddev->active_io); } @@ -9380,7 +9385,7 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio) } clone->bi_end_io = md_end_clone_io; - clone->bi_private = md_io_clone; + clone->bi_private = NULL; *bio = clone; } @@ -9391,26 +9396,6 @@ void md_account_bio(struct mddev *mddev, struct bio **bio) } EXPORT_SYMBOL_GPL(md_account_bio); -void md_free_cloned_bio(struct bio *bio) -{ - struct md_io_clone *md_io_clone = bio->bi_private; - struct bio *orig_bio = md_io_clone->orig_bio; - struct mddev *mddev = md_io_clone->mddev; - - if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false)) - md_bitmap_end(mddev, md_io_clone); - - if (bio->bi_status && !orig_bio->bi_status) - orig_bio->bi_status = bio->bi_status; - - if (md_io_clone->start_time) - bio_end_io_acct(orig_bio, md_io_clone->start_time); - - bio_put(bio); - percpu_ref_put(&mddev->active_io); -} -EXPORT_SYMBOL_GPL(md_free_cloned_bio); - /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes * may proceed without blocking. It is important to call this before diff --git a/drivers/md/md.h b/drivers/md/md.h index d6f5482e2479..12d86aa7e6f9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -920,7 +920,6 @@ extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); void md_account_bio(struct mddev *mddev, struct bio **bio); -void md_free_cloned_bio(struct bio *bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); void md_write_metadata(struct mddev *mddev, struct md_rdev *rdev, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6e79829c5acb..0d76e82f4506 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6217,7 +6217,12 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) mempool_free(ctx, conf->ctx_pool); if (res == STRIPE_WAIT_RESHAPE) { - md_free_cloned_bio(bi); + DECLARE_COMPLETION_ONSTACK(done); + WRITE_ONCE(bi->bi_private, &done); + + bio_endio(bi); + + wait_for_completion(&done); return false; } -- cgit v1.2.3 From 45f96d758d0b34bcabc22ce781eaf4103d286cd0 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Tue, 14 Apr 2026 10:29:56 +0800 Subject: MAINTAINERS: Add Xiao Ni as md/raid reviewer I've been actively involved in the md subsystem, contributing bug fixes, performance improvements, and participating in code reviews. I will help improve patch review coverage and response time. Signed-off-by: Xiao Ni Link: https://lore.kernel.org/r/20260414022956.48271-1-xiaoraid25@gmail.com Signed-off-by: Yu Kuai --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..06c00e40999f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24764,6 +24764,7 @@ SOFTWARE RAID (Multiple Disks) SUPPORT M: Song Liu M: Yu Kuai R: Li Nan +R: Xiao Ni L: linux-raid@vger.kernel.org S: Supported Q: https://patchwork.kernel.org/project/linux-raid/list/ -- cgit v1.2.3 From f7b24c7b41f23b5f9caa8b913afe79cd4c397d39 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 16 Apr 2026 07:03:45 -0700 Subject: md/raid1,raid10: don't fail devices for invalid IO errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BLK_STS_INVAL indicates the IO request itself was invalid, not that the device has failed. When raid1 treats this as a device error, it retries on alternate mirrors which fail the same way, eventually exceeding the read error threshold and removing the device from the array. This happens when stacking configurations bypass bio_split_to_limits() in the IO path: dm-raid calls md_handle_request() directly without going through md_submit_bio(), skipping the alignment validation that would otherwise reject invalid bios early. The invalid bio reaches the lower block layers, which fail the bio with BLK_STS_INVAL, and raid1 wrongly interprets this as a device failure. Add BLK_STS_INVAL to raid1_should_handle_error() so that invalid IO errors are propagated back to the caller rather than triggering device removal. This is consistent with the previous kernel behavior when alignment checks were done earlier in the direct-io path. Fixes: 5ff3f74e145adc7 ("block: simplify direct io validity check") Reported-by: Tomáš Trnka Closes: https://lore.kernel.org/linux-block/2982107.4sosBPzcNG@electra/ Signed-off-by: Keith Busch Tested-by: Tomáš Trnka Link: https://lore.kernel.org/r/20260416140345.3872265-1-kbusch@meta.com Signed-off-by: Yu Kuai --- drivers/md/raid1-10.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index c33099925f23..56a56a4da4f8 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -293,8 +293,13 @@ static inline bool raid1_should_read_first(struct mddev *mddev, * bio with REQ_RAHEAD or REQ_NOWAIT can fail at anytime, before such IO is * submitted to the underlying disks, hence don't record badblocks or retry * in this case. + * + * BLK_STS_INVAL means the bio was not valid for the underlying device. This + * is a user error, not a device failure, so retrying or recording bad blocks + * would be wrong. */ static inline bool raid1_should_handle_error(struct bio *bio) { - return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT)); + return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT)) && + bio->bi_status != BLK_STS_INVAL; } -- cgit v1.2.3 From 9aa6d860b0930e2f72795665c42c44252a558a0c Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Thu, 16 Apr 2026 11:39:56 +0800 Subject: md/raid10: fix divide-by-zero in setup_geo() with zero far_copies setup_geo() extracts near_copies (nc) and far_copies (fc) from the user-provided layout parameter without checking for zero. When fc=0 with the "improved" far set layout selected, 'geo->far_set_size = disks / fc' triggers a divide-by-zero. Validate nc and fc immediately after extraction, returning -1 if either is zero. Fixes: 475901aff158 ("MD RAID10: Improve redundancy for 'far' and 'offset' algorithms (part 1)") Cc: stable@vger.kernel.org Signed-off-by: Junrui Luo Link: https://lore.kernel.org/linux-raid/SYBPR01MB7881A5E2556806CC1D318582AF232@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Yu Kuai --- drivers/md/raid10.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 4901ebe45c87..39085e7dd6d2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3791,6 +3791,8 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) nc = layout & 255; fc = (layout >> 8) & 255; fo = layout & (1<<16); + if (!nc || !fc) + return -1; geo->raid_disks = disks; geo->near_copies = nc; geo->far_copies = fc; -- cgit v1.2.3 From 8e8278ac702e2d5d44211b4b10599aa3b2cb0555 Mon Sep 17 00:00:00 2001 From: Abd-Alrhman Masalkhi Date: Wed, 15 Apr 2026 16:03:18 +0200 Subject: md: replace wait loop with wait_event() in md_handle_request() The wait loop is equivalent to wait_event() and can be simplified by usaing it for improving readability. Signed-off-by: Abd-Alrhman Masalkhi Link: https://lore.kernel.org/r/20260415140319.376578-2-abd.masalkhi@gmail.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8656f2eff40d..ebaf47fb9de6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -396,20 +396,12 @@ bool md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: if (is_suspended(mddev, bio)) { - DEFINE_WAIT(__wait); /* Bail out if REQ_NOWAIT is set for the bio */ if (bio->bi_opf & REQ_NOWAIT) { bio_wouldblock_error(bio); return true; } - for (;;) { - prepare_to_wait(&mddev->sb_wait, &__wait, - TASK_UNINTERRUPTIBLE); - if (!is_suspended(mddev, bio)) - break; - schedule(); - } - finish_wait(&mddev->sb_wait, &__wait); + wait_event(mddev->sb_wait, !is_suspended(mddev, bio)); } if (!percpu_ref_tryget_live(&mddev->active_io)) goto check_suspended; -- cgit v1.2.3 From 4d8c53c130f17902a7e76326cf867721cf768590 Mon Sep 17 00:00:00 2001 From: Abd-Alrhman Masalkhi Date: Wed, 15 Apr 2026 16:03:19 +0200 Subject: md: use mddev_lock_nointr() in mddev_suspend_and_lock_nointr() This keeps mddev locking consistent and ensures that any future changes to locking behavior are done through the wrapper. Signed-off-by: Abd-Alrhman Masalkhi Link: https://lore.kernel.org/r/20260415140319.376578-3-abd.masalkhi@gmail.com Signed-off-by: Yu Kuai --- drivers/md/md.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.h b/drivers/md/md.h index 12d86aa7e6f9..d3d4e2150dc8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -1014,7 +1014,7 @@ static inline int mddev_suspend_and_lock(struct mddev *mddev) static inline void mddev_suspend_and_lock_nointr(struct mddev *mddev) { mddev_suspend(mddev, false); - mutex_lock(&mddev->reconfig_mutex); + mddev_lock_nointr(mddev); } static inline void mddev_unlock_and_resume(struct mddev *mddev) -- cgit v1.2.3 From 8776d342cf8fa0b98ca5e6fb2d956966fb5ca364 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 25 Apr 2026 10:46:13 +0800 Subject: md: factor bitmap creation away from sysfs handling Factor bitmap creation and destruction into helpers that do not touch bitmap sysfs registration. This prepares the bitmap sysfs rework so callers such as the sysfs bitmap location path can create or destroy a bitmap backend without coupling that to sysfs group lifetime management. Reviewed-by: Su Yue Link: https://lore.kernel.org/r/20260425024615.1696892-2-yukuai@fnnas.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 78 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ebaf47fb9de6..99aa1367c991 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -679,7 +679,25 @@ static void active_io_release(struct percpu_ref *ref) static void no_op(struct percpu_ref *r) {} -static bool mddev_set_bitmap_ops(struct mddev *mddev) +static void md_bitmap_sysfs_add(struct mddev *mddev) +{ + if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group)) + pr_warn("md: cannot register extra bitmap attributes for %s\n", + mdname(mddev)); + else + /* + * Inform user with KOBJ_CHANGE about new bitmap + * attributes. + */ + kobject_uevent(&mddev->kobj, KOBJ_CHANGE); +} + +static void md_bitmap_sysfs_del(struct mddev *mddev) +{ + sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group); +} + +static bool mddev_set_bitmap_ops_nosysfs(struct mddev *mddev) { struct bitmap_operations *old = mddev->bitmap_ops; struct md_submodule_head *head; @@ -703,18 +721,6 @@ static bool mddev_set_bitmap_ops(struct mddev *mddev) mddev->bitmap_ops = (void *)head; xa_unlock(&md_submodule); - - if (!mddev_is_dm(mddev) && mddev->bitmap_ops->group) { - if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group)) - pr_warn("md: cannot register extra bitmap attributes for %s\n", - mdname(mddev)); - else - /* - * Inform user with KOBJ_CHANGE about new bitmap - * attributes. - */ - kobject_uevent(&mddev->kobj, KOBJ_CHANGE); - } return true; err: @@ -722,15 +728,6 @@ err: return false; } -static void mddev_clear_bitmap_ops(struct mddev *mddev) -{ - if (!mddev_is_dm(mddev) && mddev->bitmap_ops && - mddev->bitmap_ops->group) - sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group); - - mddev->bitmap_ops = NULL; -} - int mddev_init(struct mddev *mddev) { int err = 0; @@ -6531,7 +6528,7 @@ out: return id; } -static int md_bitmap_create(struct mddev *mddev) +static int md_bitmap_create_nosysfs(struct mddev *mddev) { enum md_submodule_id orig_id = mddev->bitmap_id; enum md_submodule_id sb_id; @@ -6540,7 +6537,7 @@ static int md_bitmap_create(struct mddev *mddev) if (mddev->bitmap_id == ID_BITMAP_NONE) return -EINVAL; - if (!mddev_set_bitmap_ops(mddev)) + if (!mddev_set_bitmap_ops_nosysfs(mddev)) return -ENOENT; err = mddev->bitmap_ops->create(mddev); @@ -6552,7 +6549,7 @@ static int md_bitmap_create(struct mddev *mddev) * doesn't match, and mdadm is not the latest version to set * bitmap_type, set bitmap_ops based on the disk version. */ - mddev_clear_bitmap_ops(mddev); + mddev->bitmap_ops = NULL; sb_id = md_bitmap_get_id_from_sb(mddev); if (sb_id == ID_BITMAP_NONE || sb_id == orig_id) @@ -6562,27 +6559,50 @@ static int md_bitmap_create(struct mddev *mddev) mdname(mddev), orig_id, sb_id); mddev->bitmap_id = sb_id; - if (!mddev_set_bitmap_ops(mddev)) { + if (!mddev_set_bitmap_ops_nosysfs(mddev)) { mddev->bitmap_id = orig_id; return -ENOENT; } err = mddev->bitmap_ops->create(mddev); if (err) { - mddev_clear_bitmap_ops(mddev); + mddev->bitmap_ops = NULL; mddev->bitmap_id = orig_id; } return err; } -static void md_bitmap_destroy(struct mddev *mddev) +static int md_bitmap_create(struct mddev *mddev) +{ + int err; + + err = md_bitmap_create_nosysfs(mddev); + if (err) + return err; + + if (!mddev_is_dm(mddev) && mddev->bitmap_ops->group) + md_bitmap_sysfs_add(mddev); + + return 0; +} + +static void md_bitmap_destroy_nosysfs(struct mddev *mddev) { if (!md_bitmap_registered(mddev)) return; mddev->bitmap_ops->destroy(mddev); - mddev_clear_bitmap_ops(mddev); + mddev->bitmap_ops = NULL; +} + +static void md_bitmap_destroy(struct mddev *mddev) +{ + if (!mddev_is_dm(mddev) && mddev->bitmap_ops && + mddev->bitmap_ops->group) + md_bitmap_sysfs_del(mddev); + + md_bitmap_destroy_nosysfs(mddev); } int md_run(struct mddev *mddev) -- cgit v1.2.3 From aba3d6d6cb55c6e1116d1215140559dd7ecdf9a9 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 25 Apr 2026 10:46:14 +0800 Subject: md/md-bitmap: split bitmap sysfs groups Split the classic bitmap sysfs files into a common bitmap group with the location attribute and a separate internal bitmap group for the remaining files. At the same time, convert bitmap operations from a single sysfs group to a sysfs group array so backends can share part of their sysfs layout while adding backend-specific attributes separately. Switch the bitmap sysfs helpers to use sysfs_update_groups() for the add and update path, and remove groups in reverse order so shared named groups are unmerged before the last group removes the directory. Also make bitmap operation lookup depend only on the currently selected bitmap id matching the installed backend. This prepares the lookup path for a later registered none backend. Reviewed-by: Su Yue Link: https://lore.kernel.org/r/20260425024615.1696892-3-yukuai@fnnas.com Signed-off-by: Yu Kuai --- drivers/md/md-bitmap.c | 23 +++++++++++++++++++---- drivers/md/md-bitmap.h | 2 +- drivers/md/md-llbitmap.c | 7 ++++++- drivers/md/md.c | 21 ++++++++++++++------- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 83378c033c72..eba649703a1c 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2955,8 +2955,12 @@ static struct md_sysfs_entry max_backlog_used = __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, behind_writes_used_show, behind_writes_used_reset); -static struct attribute *md_bitmap_attrs[] = { +static struct attribute *md_bitmap_common_attrs[] = { &bitmap_location.attr, + NULL +}; + +static struct attribute *md_bitmap_internal_attrs[] = { &bitmap_space.attr, &bitmap_timeout.attr, &bitmap_backlog.attr, @@ -2967,9 +2971,20 @@ static struct attribute *md_bitmap_attrs[] = { NULL }; -static struct attribute_group md_bitmap_group = { +static struct attribute_group md_bitmap_common_group = { + .name = "bitmap", + .attrs = md_bitmap_common_attrs, +}; + +static struct attribute_group md_bitmap_internal_group = { .name = "bitmap", - .attrs = md_bitmap_attrs, + .attrs = md_bitmap_internal_attrs, +}; + +static const struct attribute_group *bitmap_groups[] = { + &md_bitmap_common_group, + &md_bitmap_internal_group, + NULL, }; static struct bitmap_operations bitmap_ops = { @@ -3013,7 +3028,7 @@ static struct bitmap_operations bitmap_ops = { .set_pages = bitmap_set_pages, .free = md_bitmap_free, - .group = &md_bitmap_group, + .groups = bitmap_groups, }; int md_bitmap_init(void) diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h index b42a28fa83a0..214f623c7e79 100644 --- a/drivers/md/md-bitmap.h +++ b/drivers/md/md-bitmap.h @@ -125,7 +125,7 @@ struct bitmap_operations { void (*set_pages)(void *data, unsigned long pages); void (*free)(void *data); - struct attribute_group *group; + const struct attribute_group **groups; }; /* the bitmap API */ diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c index 9e7e6b1a6f15..1adc5b117821 100644 --- a/drivers/md/md-llbitmap.c +++ b/drivers/md/md-llbitmap.c @@ -1738,6 +1738,11 @@ static struct attribute_group md_llbitmap_group = { .attrs = md_llbitmap_attrs, }; +static const struct attribute_group *md_llbitmap_groups[] = { + &md_llbitmap_group, + NULL, +}; + static struct bitmap_operations llbitmap_ops = { .head = { .type = MD_BITMAP, @@ -1774,7 +1779,7 @@ static struct bitmap_operations llbitmap_ops = { .dirty_bits = llbitmap_dirty_bits, .write_all = llbitmap_write_all, - .group = &md_llbitmap_group, + .groups = md_llbitmap_groups, }; int md_llbitmap_init(void) diff --git a/drivers/md/md.c b/drivers/md/md.c index 99aa1367c991..0ef81d116191 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -681,7 +681,7 @@ static void no_op(struct percpu_ref *r) {} static void md_bitmap_sysfs_add(struct mddev *mddev) { - if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group)) + if (sysfs_update_groups(&mddev->kobj, mddev->bitmap_ops->groups)) pr_warn("md: cannot register extra bitmap attributes for %s\n", mdname(mddev)); else @@ -694,16 +694,23 @@ static void md_bitmap_sysfs_add(struct mddev *mddev) static void md_bitmap_sysfs_del(struct mddev *mddev) { - sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group); + int nr_groups = 0; + + for (nr_groups = 0; mddev->bitmap_ops->groups[nr_groups]; nr_groups++) + ; + + while (--nr_groups >= 1) + sysfs_unmerge_group(&mddev->kobj, + mddev->bitmap_ops->groups[nr_groups]); + sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->groups[0]); } static bool mddev_set_bitmap_ops_nosysfs(struct mddev *mddev) { - struct bitmap_operations *old = mddev->bitmap_ops; struct md_submodule_head *head; - if (mddev->bitmap_id == ID_BITMAP_NONE || - (old && old->head.id == mddev->bitmap_id)) + if (mddev->bitmap_ops && + mddev->bitmap_ops->head.id == mddev->bitmap_id) return true; xa_lock(&md_submodule); @@ -6581,7 +6588,7 @@ static int md_bitmap_create(struct mddev *mddev) if (err) return err; - if (!mddev_is_dm(mddev) && mddev->bitmap_ops->group) + if (!mddev_is_dm(mddev) && mddev->bitmap_ops->groups) md_bitmap_sysfs_add(mddev); return 0; @@ -6599,7 +6606,7 @@ static void md_bitmap_destroy_nosysfs(struct mddev *mddev) static void md_bitmap_destroy(struct mddev *mddev) { if (!mddev_is_dm(mddev) && mddev->bitmap_ops && - mddev->bitmap_ops->group) + mddev->bitmap_ops->groups) md_bitmap_sysfs_del(mddev); md_bitmap_destroy_nosysfs(mddev); -- cgit v1.2.3 From f2926a533d03fe70d753b512b713e06a2aa174af Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 25 Apr 2026 10:46:15 +0800 Subject: md/md-bitmap: add a none backend for bitmap grow Add a real none bitmap backend that exposes the common bitmap sysfs group and use it to keep bitmap/location available when an array has no bitmap. Then switch the bitmap location sysfs path to move only between none and the classic bitmap backend, using the no-sysfs bitmap helpers while merging or unmerging the internal bitmap sysfs group. This restores mdadm --grow bitmap addition through bitmap/location. Fixes: fb8cc3b0d9db ("md/md-bitmap: delay registration of bitmap_ops until creating bitmap") Reviewed-by: Su Yue Link: https://lore.kernel.org/r/20260425024615.1696892-4-yukuai@fnnas.com Signed-off-by: Yu Kuai --- drivers/md/md-bitmap.c | 108 +++++++++++++++++++++++++++++++++++++++++++++---- drivers/md/md.c | 42 +++++++++++++++---- drivers/md/md.h | 3 ++ 3 files changed, 137 insertions(+), 16 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index eba649703a1c..028b9ca8ce52 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -216,6 +216,7 @@ struct bitmap { }; static struct workqueue_struct *md_bitmap_wq; +static struct attribute_group md_bitmap_internal_group; static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, int chunksize, bool init); @@ -2580,6 +2581,30 @@ static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize) return __bitmap_resize(bitmap, blocks, chunksize, false); } +static bool bitmap_none_enabled(void *data, bool flush) +{ + return false; +} + +static int bitmap_none_create(struct mddev *mddev) +{ + return 0; +} + +static int bitmap_none_load(struct mddev *mddev) +{ + return 0; +} + +static void bitmap_none_destroy(struct mddev *mddev) +{ +} + +static int bitmap_none_get_stats(void *data, struct md_bitmap_stats *stats) +{ + return -ENOENT; +} + static ssize_t location_show(struct mddev *mddev, char *page) { @@ -2618,7 +2643,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len) goto out; } - bitmap_destroy(mddev); + sysfs_unmerge_group(&mddev->kobj, &md_bitmap_internal_group); + md_bitmap_destroy_nosysfs(mddev); + mddev->bitmap_id = ID_BITMAP_NONE; + if (!mddev_set_bitmap_ops_nosysfs(mddev)) + goto none_err; mddev->bitmap_info.offset = 0; if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; @@ -2654,16 +2683,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len) } mddev->bitmap_info.offset = offset; - rv = bitmap_create(mddev); + md_bitmap_destroy_nosysfs(mddev); + mddev->bitmap_id = ID_BITMAP; + if (!mddev_set_bitmap_ops_nosysfs(mddev)) + goto bitmap_err; + + rv = md_bitmap_create_nosysfs(mddev); if (rv) - goto out; + goto create_err; - rv = bitmap_load(mddev); + rv = mddev->bitmap_ops->load(mddev); if (rv) { mddev->bitmap_info.offset = 0; - bitmap_destroy(mddev); - goto out; + goto load_err; } + + rv = sysfs_merge_group(&mddev->kobj, + &md_bitmap_internal_group); + if (rv) + goto merge_err; } } if (!mddev->external) { @@ -2679,6 +2717,22 @@ out: if (rv) return rv; return len; + +merge_err: + mddev->bitmap_info.offset = 0; +load_err: + md_bitmap_destroy_nosysfs(mddev); +create_err: + mddev->bitmap_info.offset = 0; + mddev->bitmap_id = ID_BITMAP_NONE; + if (!mddev_set_bitmap_ops_nosysfs(mddev)) + rv = -ENOENT; + goto out; +bitmap_err: + rv = -ENOENT; +none_err: + mddev->bitmap_info.offset = 0; + goto out; } static struct md_sysfs_entry bitmap_location = @@ -2987,6 +3041,27 @@ static const struct attribute_group *bitmap_groups[] = { NULL, }; +static const struct attribute_group *bitmap_none_groups[] = { + &md_bitmap_common_group, + NULL, +}; + +static struct bitmap_operations bitmap_none_ops = { + .head = { + .type = MD_BITMAP, + .id = ID_BITMAP_NONE, + .name = "none", + }, + + .enabled = bitmap_none_enabled, + .create = bitmap_none_create, + .load = bitmap_none_load, + .destroy = bitmap_none_destroy, + .get_stats = bitmap_none_get_stats, + + .groups = bitmap_none_groups, +}; + static struct bitmap_operations bitmap_ops = { .head = { .type = MD_BITMAP, @@ -3033,16 +3108,33 @@ static struct bitmap_operations bitmap_ops = { int md_bitmap_init(void) { + int err; + md_bitmap_wq = alloc_workqueue("md_bitmap", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!md_bitmap_wq) return -ENOMEM; - return register_md_submodule(&bitmap_ops.head); + err = register_md_submodule(&bitmap_none_ops.head); + if (err) + goto err_wq; + + err = register_md_submodule(&bitmap_ops.head); + if (err) + goto err_none; + + return 0; + +err_none: + unregister_md_submodule(&bitmap_none_ops.head); +err_wq: + destroy_workqueue(md_bitmap_wq); + return err; } void md_bitmap_exit(void) { - destroy_workqueue(md_bitmap_wq); unregister_md_submodule(&bitmap_ops.head); + unregister_md_submodule(&bitmap_none_ops.head); + destroy_workqueue(md_bitmap_wq); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 0ef81d116191..7937b927d923 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -705,7 +705,7 @@ static void md_bitmap_sysfs_del(struct mddev *mddev) sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->groups[0]); } -static bool mddev_set_bitmap_ops_nosysfs(struct mddev *mddev) +bool mddev_set_bitmap_ops_nosysfs(struct mddev *mddev) { struct md_submodule_head *head; @@ -4275,7 +4275,7 @@ bitmap_type_show(struct mddev *mddev, char *page) xa_lock(&md_submodule); xa_for_each(&md_submodule, i, head) { - if (head->type != MD_BITMAP) + if (head->type != MD_BITMAP || head->id == ID_BITMAP_NONE) continue; if (mddev->bitmap_id == head->id) @@ -6535,7 +6535,7 @@ out: return id; } -static int md_bitmap_create_nosysfs(struct mddev *mddev) +int md_bitmap_create_nosysfs(struct mddev *mddev) { enum md_submodule_id orig_id = mddev->bitmap_id; enum md_submodule_id sb_id; @@ -6544,8 +6544,10 @@ static int md_bitmap_create_nosysfs(struct mddev *mddev) if (mddev->bitmap_id == ID_BITMAP_NONE) return -EINVAL; - if (!mddev_set_bitmap_ops_nosysfs(mddev)) + if (!mddev_set_bitmap_ops_nosysfs(mddev)) { + mddev->bitmap_id = orig_id; return -ENOENT; + } err = mddev->bitmap_ops->create(mddev); if (!err) @@ -6559,8 +6561,10 @@ static int md_bitmap_create_nosysfs(struct mddev *mddev) mddev->bitmap_ops = NULL; sb_id = md_bitmap_get_id_from_sb(mddev); - if (sb_id == ID_BITMAP_NONE || sb_id == orig_id) + if (sb_id == ID_BITMAP_NONE || sb_id == orig_id) { + mddev->bitmap_id = orig_id; return err; + } pr_info("md: %s: bitmap version mismatch, switching from %d to %d\n", mdname(mddev), orig_id, sb_id); @@ -6594,7 +6598,7 @@ static int md_bitmap_create(struct mddev *mddev) return 0; } -static void md_bitmap_destroy_nosysfs(struct mddev *mddev) +void md_bitmap_destroy_nosysfs(struct mddev *mddev) { if (!md_bitmap_registered(mddev)) return; @@ -6612,6 +6616,16 @@ static void md_bitmap_destroy(struct mddev *mddev) md_bitmap_destroy_nosysfs(mddev); } +static void md_bitmap_set_none(struct mddev *mddev) +{ + mddev->bitmap_id = ID_BITMAP_NONE; + if (!mddev_set_bitmap_ops_nosysfs(mddev)) + return; + + if (!mddev_is_dm(mddev) && mddev->bitmap_ops->groups) + md_bitmap_sysfs_add(mddev); +} + int md_run(struct mddev *mddev) { int err; @@ -6821,6 +6835,10 @@ int md_run(struct mddev *mddev) if (mddev->sb_flags) md_update_sb(mddev, 0); + if (IS_ENABLED(CONFIG_MD_BITMAP) && !mddev->bitmap_info.file && + !mddev->bitmap_info.offset) + md_bitmap_set_none(mddev); + md_new_event(); return 0; @@ -7766,7 +7784,8 @@ static int set_bitmap_file(struct mddev *mddev, int fd) { int err = 0; - if (!md_bitmap_registered(mddev)) + if (!md_bitmap_registered(mddev) || + mddev->bitmap_id == ID_BITMAP_NONE) return -EINVAL; if (mddev->pers) { @@ -7831,10 +7850,12 @@ static int set_bitmap_file(struct mddev *mddev, int fd) if (err) { md_bitmap_destroy(mddev); + md_bitmap_set_none(mddev); fd = -1; } } else if (fd < 0) { md_bitmap_destroy(mddev); + md_bitmap_set_none(mddev); } } @@ -8141,12 +8162,16 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.default_offset; mddev->bitmap_info.space = mddev->bitmap_info.default_space; + mddev->bitmap_id = ID_BITMAP; rv = md_bitmap_create(mddev); if (!rv) rv = mddev->bitmap_ops->load(mddev); - if (rv) + if (rv) { md_bitmap_destroy(mddev); + mddev->bitmap_info.offset = 0; + md_bitmap_set_none(mddev); + } } else { struct md_bitmap_stats stats; @@ -8174,6 +8199,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) } md_bitmap_destroy(mddev); mddev->bitmap_info.offset = 0; + md_bitmap_set_none(mddev); } } md_update_sb(mddev, 1); diff --git a/drivers/md/md.h b/drivers/md/md.h index d3d4e2150dc8..52c378086046 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -934,6 +934,9 @@ extern void md_allow_write(struct mddev *mddev); extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); extern int md_check_no_bitmap(struct mddev *mddev); +bool mddev_set_bitmap_ops_nosysfs(struct mddev *mddev); +int md_bitmap_create_nosysfs(struct mddev *mddev); +void md_bitmap_destroy_nosysfs(struct mddev *mddev); extern int md_integrity_register(struct mddev *mddev); extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); -- cgit v1.2.3 From c1a3cdb0b49fff28d90e2c33114a7c0058261f60 Mon Sep 17 00:00:00 2001 From: Abd-Alrhman Masalkhi Date: Thu, 23 Apr 2026 12:13:01 +0200 Subject: md/raid1: replace wait loop with wait_event_idle() in raid1_write_request() The wait loop is equivalent to wait_event_idle(); use it to improve readability. Signed-off-by: Abd-Alrhman Masalkhi Link: https://lore.kernel.org/linux-raid/20260423101303.48196-2-abd.masalkhi@gmail.com Signed-off-by: Yu Kuai --- drivers/md/raid1.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ba91f7e61920..64d970e2ef50 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1510,21 +1510,14 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mddev->cluster_ops->area_resyncing(mddev, WRITE, bio->bi_iter.bi_sector, bio_end_sector(bio))) { - DEFINE_WAIT(w); if (bio->bi_opf & REQ_NOWAIT) { bio_wouldblock_error(bio); return; } - for (;;) { - prepare_to_wait(&conf->wait_barrier, - &w, TASK_IDLE); - if (!mddev->cluster_ops->area_resyncing(mddev, WRITE, - bio->bi_iter.bi_sector, - bio_end_sector(bio))) - break; - schedule(); - } - finish_wait(&conf->wait_barrier, &w); + wait_event_idle(conf->wait_barrier, + !mddev->cluster_ops->area_resyncing(mddev, WRITE, + bio->bi_iter.bi_sector, + bio_end_sector(bio))); } /* -- cgit v1.2.3 From 408434a3245cc6ea981df4edd7fbf0be49856727 Mon Sep 17 00:00:00 2001 From: Abd-Alrhman Masalkhi Date: Thu, 23 Apr 2026 12:13:02 +0200 Subject: md: use mddev_is_dm() instead of open-coding gendisk checks Replace direct checks on mddev->gendisk with mddev_is_dm() in md_handle_request() and md_run(). Signed-off-by: Abd-Alrhman Masalkhi Link: https://lore.kernel.org/linux-raid/20260423101303.48196-3-abd.masalkhi@gmail.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7937b927d923..a4b1048adbba 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -408,7 +408,7 @@ check_suspended: if (!mddev->pers->make_request(mddev, bio)) { percpu_ref_put(&mddev->active_io); - if (!mddev->gendisk && mddev->pers->prepare_suspend) + if (mddev_is_dm(mddev) && mddev->pers->prepare_suspend) return false; goto check_suspended; } @@ -6746,7 +6746,7 @@ int md_run(struct mddev *mddev) } /* dm-raid expect sync_thread to be frozen until resume */ - if (mddev->gendisk) + if (!mddev_is_dm(mddev)) mddev->recovery = 0; /* may be over-ridden by personality */ -- cgit v1.2.3 From 3b2f70eab5a2cd15e27b1447e66e45302b28ff2c Mon Sep 17 00:00:00 2001 From: Abd-Alrhman Masalkhi Date: Thu, 23 Apr 2026 12:13:03 +0200 Subject: md: use ATTRIBUTE_GROUPS() for md default sysfs attributes Replace the md_default_group and md_attr_groups with ATTRIBUTE_GROUPS(). Signed-off-by: Abd-Alrhman Masalkhi Link: https://lore.kernel.org/linux-raid/20260423101303.48196-4-abd.masalkhi@gmail.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index a4b1048adbba..8b568eee8743 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6055,10 +6055,7 @@ static struct attribute *md_default_attrs[] = { &md_logical_block_size.attr, NULL, }; - -static const struct attribute_group md_default_group = { - .attrs = md_default_attrs, -}; +ATTRIBUTE_GROUPS(md_default); static struct attribute *md_redundancy_attrs[] = { &md_scan_mode.attr, @@ -6083,11 +6080,6 @@ static const struct attribute_group md_redundancy_group = { .attrs = md_redundancy_attrs, }; -static const struct attribute_group *md_attr_groups[] = { - &md_default_group, - NULL, -}; - static ssize_t md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { @@ -6170,7 +6162,7 @@ static const struct sysfs_ops md_sysfs_ops = { static const struct kobj_type md_ktype = { .release = md_kobj_release, .sysfs_ops = &md_sysfs_ops, - .default_groups = md_attr_groups, + .default_groups = md_default_groups, }; int mdp_major = 0; -- cgit v1.2.3 From c366a7b5ed7564e41345c380285bd3f6cb98971b Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Fri, 17 Apr 2026 15:35:30 +0800 Subject: s390/debug: Reject zero-length input before trimming a newline debug_get_user_string() duplicates the userspace buffer with memdup_user_nul() and then unconditionally looks at buffer[user_len - 1] to strip a trailing newline. A zero-length write reaches this helper unchanged, so the newline trim reads before the start of the allocated buffer. Reject empty writes before accessing the last input byte. Fixes: 66a464dbc8e0 ("[PATCH] s390: debug feature changes") Cc: stable@vger.kernel.org Signed-off-by: Pengpeng Hou Reviewed-by: Benjamin Block Reviewed-by: Vasily Gorbik Tested-by: Vasily Gorbik Link: https://lore.kernel.org/r/20260417073530.96002-1-pengpeng@iscas.ac.cn Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 31430e9bcfdd..2612f634e826 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1414,6 +1414,9 @@ static inline char *debug_get_user_string(const char __user *user_buf, { char *buffer; + if (!user_len) + return ERR_PTR(-EINVAL); + buffer = memdup_user_nul(user_buf, user_len); if (IS_ERR(buffer)) return buffer; -- cgit v1.2.3 From e14622a7584f9608927c59a7d6ae4a0999dc545e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 17 Apr 2026 14:33:43 +0200 Subject: s390/debug: Reject zero-length input in debug_input_flush_fn() debug_input_flush_fn() always copies one byte from the userspace buffer with copy_from_user() regardless of the supplied write length. A zero-length write therefore reads one byte beyond the caller's buffer. If the stale byte happens to be '-' or a digit the debug log is silently flushed. With an unmapped buffer the call returns -EFAULT. Reject zero-length writes before copying from userspace. Cc: stable@vger.kernel.org # v5.10+ Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 2612f634e826..7650f2adb5cf 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1587,6 +1587,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, char input_buf[1]; int rc = user_len; + if (!user_len) { + rc = -EINVAL; + goto out; + } + if (user_len > 0x10000) user_len = 0x10000; if (*offset != 0) { -- cgit v1.2.3 From 77aba6accd9e26f069ab81bdcb941681d5f7a0a7 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Mon, 9 Mar 2026 11:12:30 +0100 Subject: MAINTAINERS: Replace one of the maintainers for s390/pci Add myself as co-maintainer for s390/pci, replacing Gerald Schaefer who has moved his focus to s390/mm. Thank you Gerald! Signed-off-by: Gerd Bayer Acked-by: Niklas Schnelle Acked-by: Gerald Schaefer Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..b778c584bea5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23521,7 +23521,7 @@ F: drivers/s390/net/ S390 PCI SUBSYSTEM M: Niklas Schnelle -M: Gerald Schaefer +M: Gerd Bayer L: linux-s390@vger.kernel.org S: Supported F: Documentation/arch/s390/pci.rst -- cgit v1.2.3 From 8587af9cff43aa114ee69b401b8ac3e2c5aea4d3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Apr 2026 16:19:42 +0200 Subject: s390/sclp: Remove SCLP_OFB Kconfig option Remove the SCLP_OFB Kconfig option and enable the guarded code unconditionally. This guards only a few lines of code, so the impact is very low while at the same time this reduces the large number of Kconfig options. Acked-by: Christian Borntraeger Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- drivers/s390/char/Kconfig | 8 -------- drivers/s390/char/sclp_config.c | 6 ------ 2 files changed, 14 deletions(-) diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 4d8f09910a46..7416f941e5b6 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -85,14 +85,6 @@ config HMC_DRV transfer cache size from its default value 0.5MB to N bytes. If N is zero, then no caching is performed. -config SCLP_OFB - def_bool n - prompt "Support for Open-for-Business SCLP Event" - depends on S390 - help - This option enables the Open-for-Business interface to the s390 - Service Element. - config S390_UV_UAPI def_tristate m prompt "Ultravisor userspace API" diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 9cfbe3fc3dca..8c77e8c44fc2 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -80,14 +80,11 @@ static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) static struct sclp_register sclp_conf_register = { -#ifdef CONFIG_SCLP_OFB .send_mask = EVTYP_CONFMGMDATA_MASK, -#endif .receive_mask = EVTYP_CONFMGMDATA_MASK, .receiver_fn = sclp_conf_receiver_fn, }; -#ifdef CONFIG_SCLP_OFB static int sclp_ofb_send_req(char *ev_data, size_t len) { static DEFINE_MUTEX(send_mutex); @@ -143,11 +140,9 @@ static const struct bin_attribute ofb_bin_attr = { }, .write = sysfs_ofb_data_write, }; -#endif static int __init sclp_ofb_setup(void) { -#ifdef CONFIG_SCLP_OFB struct kset *ofb_kset; int rc; @@ -159,7 +154,6 @@ static int __init sclp_ofb_setup(void) kset_unregister(ofb_kset); return rc; } -#endif return 0; } -- cgit v1.2.3 From b95e0e792822bad8fc9eb33ea3a90005e29e75e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Apr 2026 07:52:44 +0200 Subject: s390/mm: Fix phys_to_folio() usage in do_secure_storage_access() In case of a Secure-Storage-Access exception the effective aka virtual address which caused the exception is contained within the TEID. do_secure_storage_access() incorrectly uses phys_to_folio() instead of virt_to_folio() to translate the virtual address to the corresponding folio. Fix this by using virt_to_folio() instead of phys_to_folio(). Fixes: 084ea4d611a3 ("s390/mm: add (non)secure page access exceptions handlers") Reviewed-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 191cc53caead..028aeb9c48d6 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -438,7 +438,7 @@ void do_secure_storage_access(struct pt_regs *regs) panic("Unexpected PGM 0x3d with TEID bit 61=0"); } if (is_kernel_fault(regs)) { - folio = phys_to_folio(addr); + folio = virt_to_folio((void *)addr); if (unlikely(!folio_try_get(folio))) return; rc = uv_convert_from_secure(folio_to_phys(folio)); -- cgit v1.2.3 From d986ba0329dcca102e227995371135c9bbcefb6b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 28 Apr 2026 21:59:30 +0900 Subject: ntfs: fix invalid PTR_ERR() usage in __ntfs_bitmap_set_bits_in_run() The Smatch reported a warning in __ntfs_bitmap_set_bits_in_run(): "warn: passing a valid pointer to 'PTR_ERR'" This occurs because the 'folio' variable might contain a valid pointer when jumping to the 'rollback' label, specifically when 'cnt <= 0' is detected during the subsequent page mapping loop. In such cases, calling PTR_ERR(folio) is incorrect as it does not contain an error code. Fix this by introducing an explicit 'err' variable to track the error status. This ensures that the rollback logic and the return value consistently use a proper error code regardless of the state of the folio pointer. Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon --- fs/ntfs/bitmap.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c index 656d802333e3..b1436b3151b9 100644 --- a/fs/ntfs/bitmap.c +++ b/fs/ntfs/bitmap.c @@ -125,7 +125,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, struct address_space *mapping; struct folio *folio; u8 *kaddr; - int pos, len; + int pos, len, err; u8 bit; struct ntfs_inode *ni = NTFS_I(vi); struct ntfs_volume *vol = ni->vol; @@ -201,8 +201,10 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, /* If we are not in the last page, deal with all subsequent pages. */ while (index < end_index) { - if (cnt <= 0) + if (cnt <= 0) { + err = -EIO; goto rollback; + } /* Update @index and get the next folio. */ folio_mark_dirty(folio); @@ -214,6 +216,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, ntfs_error(vi->i_sb, "Failed to map subsequent page (error %li), aborting.", PTR_ERR(folio)); + err = PTR_ERR(folio); goto rollback; } @@ -265,7 +268,7 @@ rollback: * - @count - @cnt is the number of bits that have been modified */ if (is_rollback) - return PTR_ERR(folio); + return err; if (count != cnt) pos = __ntfs_bitmap_set_bits_in_run(vi, start_bit, count - cnt, value ? 0 : 1, true); @@ -274,14 +277,14 @@ rollback: if (!pos) { /* Rollback was successful. */ ntfs_error(vi->i_sb, - "Failed to map subsequent page (error %li), aborting.", - PTR_ERR(folio)); + "Failed to map subsequent page (error %i), aborting.", + err); } else { /* Rollback failed. */ ntfs_error(vi->i_sb, - "Failed to map subsequent page (error %li) and rollback failed (error %i). Aborting and leaving inconsistent metadata. Unmount and run chkdsk.", - PTR_ERR(folio), pos); + "Failed to map subsequent page (error %i) and rollback failed (error %i). Aborting and leaving inconsistent metadata. Unmount and run chkdsk.", + err, pos); NVolSetErrors(NTFS_SB(vi->i_sb)); } - return PTR_ERR(folio); + return err; } -- cgit v1.2.3 From a6715d7ec472a476db17787697a4abda62962284 Mon Sep 17 00:00:00 2001 From: Evangelos Petrongonas Date: Fri, 10 Apr 2026 01:16:05 +0000 Subject: kho: skip KHO for crash kernel kho_fill_kimage() unconditionally populates the kimage with KHO metadata for every kexec image type. When the image is a crash kernel, this can be problematic as the crash kernel can run in a small reserved region and the KHO scratch areas can sit outside it. The crash kernel then faults during kho_memory_init() when it tries phys_to_virt() on the KHO FDT address: Unable to handle kernel paging request at virtual address xxxxxxxx ... fdt_offset_ptr+... fdt_check_node_offset_+... fdt_first_property_offset+... fdt_get_property_namelen_+... fdt_getprop+... kho_memory_init+... mm_core_init+... start_kernel+... kho_locate_mem_hole() already skips KHO logic for KEXEC_TYPE_CRASH images, but kho_fill_kimage() was missing the same guard. As kho_fill_kimage() is the single point that populates image->kho.fdt and image->kho.scratch, fixing it here is sufficient for both arm64 and x86 as the FDT and boot_params path are bailing out when these fields are unset. Fixes: d7255959b69a ("kho: allow kexec load before KHO finalization") Signed-off-by: Evangelos Petrongonas Reviewed-by: Mike Rapoport (Microsoft) Link: https://patch.msgid.link/20260410011609.1103-1-epetron@amazon.de Signed-off-by: Mike Rapoport (Microsoft) --- kernel/liveupdate/kexec_handover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 94762de1fe5f..4fde8325c49f 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -1702,7 +1702,7 @@ int kho_fill_kimage(struct kimage *image) int err = 0; struct kexec_buf scratch; - if (!kho_enable) + if (!kho_enable || image->type == KEXEC_TYPE_CRASH) return 0; image->kho.fdt = virt_to_phys(kho_out.fdt); -- cgit v1.2.3 From 0fb1daf0b78d0e23b63b6b65de56d4a3fd83bc14 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 15 Apr 2026 06:23:00 +0100 Subject: mm/memfd_luo: report error when restoring a folio fails mid-loop memfd_luo_retrieve_folios() initialises err to -EIO, but the per-iteration calls to mem_cgroup_charge(), shmem_add_to_page_cache() and shmem_inode_acct_blocks() reuse and overwrite err. Once any iteration completes successfully, err becomes zero. If a later iteration's kho_restore_folio() returns NULL, the failure path jumps to put_folios without resetting err, so the function returns 0. The caller memfd_luo_retrieve() then takes the success path, sets args->file and reports the restore as successful, leaving userspace with a partially populated memfd and no indication that anything went wrong. Set err to -EIO in the kho_restore_folio() failure branch so the error is propagated to the caller. Signed-off-by: David Carlier Reviewed-by: Pratyush Yadav Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd") Link: https://patch.msgid.link/20260415052300.362539-1-devnexen@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memfd_luo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index b02b503c750d..35d1247281e0 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -427,6 +427,7 @@ static int memfd_luo_retrieve_folios(struct file *file, if (!folio) { pr_err("Unable to restore folio at physical address: %llx\n", phys); + err = -EIO; goto put_folios; } index = pfolio->index; -- cgit v1.2.3 From 46f74a3f7d57d9cc0110b09cbc8163fa0a01afa2 Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Sat, 25 Apr 2026 05:13:39 +0200 Subject: net: phy: dp83869: fix setting CLK_O_SEL field. Table 7-121 in datasheet says we have to set register 0xc6 to value 0x10 before CLK_O_SEL can be modified. No more infos about this field found in datasheet. With this fix, setting of CLK_O_SEL field in IO_MUX_CFG register worked through dts property "ti,clk-output-sel" on a DP83869HMRGZR. Signed-off-by: Heiko Schocher Reviewed-by: Simon Horman Fixes: 01db923e8377 ("net: phy: dp83869: Add TI dp83869 phy") Link: https://patch.msgid.link/20260425031339.3318-1-hs@nabladev.com Signed-off-by: Paolo Abeni --- drivers/net/phy/dp83869.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index 1f381d7b13ff..96a7d255f50f 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -31,6 +31,7 @@ #define DP83869_RGMIICTL 0x0032 #define DP83869_STRAP_STS1 0x006e #define DP83869_RGMIIDCTL 0x0086 +#define DP83869_ANA_PLL_PROG_PI 0x00c6 #define DP83869_RXFCFG 0x0134 #define DP83869_RXFPMD1 0x0136 #define DP83869_RXFPMD2 0x0137 @@ -826,12 +827,22 @@ static int dp83869_config_init(struct phy_device *phydev) dp83869_config_port_mirroring(phydev); /* Clock output selection if muxing property is set */ - if (dp83869->clk_output_sel != DP83869_CLK_O_SEL_REF_CLK) + if (dp83869->clk_output_sel != DP83869_CLK_O_SEL_REF_CLK) { + /* + * Table 7-121 in datasheet says we have to set register 0xc6 + * to value 0x10 before CLK_O_SEL can be modified. + */ + ret = phy_write_mmd(phydev, DP83869_DEVADDR, + DP83869_ANA_PLL_PROG_PI, 0x10); + if (ret) + return ret; + ret = phy_modify_mmd(phydev, DP83869_DEVADDR, DP83869_IO_MUX_CFG, DP83869_IO_MUX_CFG_CLK_O_SEL_MASK, dp83869->clk_output_sel << DP83869_IO_MUX_CFG_CLK_O_SEL_SHIFT); + } if (phy_interface_is_rgmii(phydev)) { ret = phy_write_mmd(phydev, DP83869_DEVADDR, DP83869_RGMIIDCTL, -- cgit v1.2.3 From 76b48a70b16b4036814964b039cde413e0164416 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 6 Feb 2026 00:08:36 -0500 Subject: IB/hfi1: Fix potential use-after-free in PIO and SDMA map teardown The current teardown logic for dd->pio_map and dd->sdma_map frees the structures while they might still be accessed by RCU readers. Although the pointer is nulled under a spinlock, the memory is reclaimed before waiting for the grace period to end. This patch fixes the sequence by: 1. Extracting the pointer under the lock. 2. Clearing the RCU-protected pointer. 3. Waiting for readers to finish with synchronize_rcu(). 4. Finally freeing the memory. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://patch.msgid.link/r/20260206050836.5890-1-lirongqing@baidu.com Signed-off-by: Li RongQing Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pio.c | 5 ++++- drivers/infiniband/hw/hfi1/sdma.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 51afaac88c72..9121d83bf88a 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1942,13 +1942,16 @@ bail: void free_pio_map(struct hfi1_devdata *dd) { + struct pio_vl_map *map; + /* Free PIO map if allocated */ if (rcu_access_pointer(dd->pio_map)) { spin_lock_irq(&dd->pio_map_lock); - pio_map_free(rcu_access_pointer(dd->pio_map)); + map = rcu_access_pointer(dd->pio_map); RCU_INIT_POINTER(dd->pio_map, NULL); spin_unlock_irq(&dd->pio_map_lock); synchronize_rcu(); + pio_map_free(map); } kfree(dd->kernel_send_context); dd->kernel_send_context = NULL; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index e5f442938177..cfd9dd0f7e81 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1255,6 +1255,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) { size_t i; struct sdma_engine *sde; + struct sdma_vl_map *map; if (dd->sdma_pad_dma) { dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, @@ -1291,10 +1292,11 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) } if (rcu_access_pointer(dd->sdma_map)) { spin_lock_irq(&dd->sde_map_lock); - sdma_map_free(rcu_access_pointer(dd->sdma_map)); + map = rcu_access_pointer(dd->sdma_map); RCU_INIT_POINTER(dd->sdma_map, NULL); spin_unlock_irq(&dd->sde_map_lock); synchronize_rcu(); + sdma_map_free(map); } kfree(dd->per_sdma); dd->per_sdma = NULL; -- cgit v1.2.3 From 4c6f86d85d03cdb33addce86aa69aa795ca6c47a Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 14 Apr 2026 07:15:55 -0400 Subject: RDMA/rxe: Reject unknown opcodes before ICRC processing Even after applying commit 7244491dab34 ("RDMA/rxe: Validate pad and ICRC before payload_size() in rxe_rcv"), a single unauthenticated UDP packet can still trigger panic. That patch handled payload_size() underflow only for valid opcodes with short packets, not for packets carrying an unknown opcode. The unknown-opcode OOB read described below predates that commit and reaches back to the initial Soft RoCE driver. The check added there reads pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE where header_size(pkt) expands to rxe_opcode[pkt->opcode].length. The rxe_opcode[] array has 256 entries but is only populated for defined IB opcodes; any other entry (for example opcode 0xff) is zero-initialized, so length == 0 and the check degenerates to pkt->paylen < 0 + bth_pad(pkt) + RXE_ICRC_SIZE which does not constrain pkt->paylen enough. rxe_icrc_hdr() then computes rxe_opcode[pkt->opcode].length - RXE_BTH_BYTES which underflows when length == 0 and passes a huge value to rxe_crc32(), causing an out-of-bounds read of the skb payload. Reproduced on v7.0-rc7 with that fix applied, QEMU/KVM with CONFIG_RDMA_RXE=y and CONFIG_KASAN=y, after rdma link add rxe0 type rxe netdev eth0 A single 48-byte UDP packet to port 4791 with BTH opcode=0xff and QPN=IB_MULTICAST_QPN triggers: BUG: KASAN: slab-out-of-bounds in crc32_le+0x115/0x170 Read of size 1 at addr ... The buggy address is located 0 bytes to the right of allocated 704-byte region Call Trace: crc32_le+0x115/0x170 rxe_icrc_hdr.isra.0+0x226/0x300 rxe_icrc_check+0x13f/0x3a0 rxe_rcv+0x6e1/0x16e0 rxe_udp_encap_recv+0x20a/0x320 udp_queue_rcv_one_skb+0x7ed/0x12c0 Subsequent packets with the same shape fault on unmapped memory and panic the kernel. The trigger requires only module load and "rdma link add"; no QP, no connection, and no authentication. Fix this by rejecting packets whose opcode has no rxe_opcode[] entry, detected via the zero mask or zero length, before any length arithmetic runs. Cc: stable@vger.kernel.org Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://patch.msgid.link/r/20260414111555.3386793-1-michael.bommarito@gmail.com Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Michael Bommarito Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f79214738c2b..2d5e701ff961 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,6 +330,17 @@ void rxe_rcv(struct sk_buff *skb) pkt->qp = NULL; pkt->mask |= rxe_opcode[pkt->opcode].mask; + /* + * Unknown opcodes have a zero-initialized rxe_opcode[] entry, so + * both mask and length are 0. Reject them before any length math: + * rxe_icrc_hdr() would otherwise compute length - RXE_BTH_BYTES + * and pass the underflowed value to rxe_crc32(), producing an + * out-of-bounds read. + */ + if (unlikely(!rxe_opcode[pkt->opcode].mask || + !rxe_opcode[pkt->opcode].length)) + goto drop; + if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) + RXE_ICRC_SIZE)) goto drop; -- cgit v1.2.3 From 1114c87aa6f195cf07da55a27b2122ae26557b26 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sat, 18 Apr 2026 12:21:41 -0400 Subject: RDMA/rxe: Reject non-8-byte ATOMIC_WRITE payloads atomic_write_reply() at drivers/infiniband/sw/rxe/rxe_resp.c unconditionally dereferences 8 bytes at payload_addr(pkt): value = *(u64 *)payload_addr(pkt); check_rkey() previously accepted an ATOMIC_WRITE request with pktlen == resid == 0 because the length validation only compared pktlen against resid. A remote initiator that sets the RETH length to 0 therefore reaches atomic_write_reply() with a zero-byte logical payload, and the responder reads sizeof(u64) bytes from past the logical end of the packet into skb->head tailroom, then writes those 8 bytes into the attacker's MR via rxe_mr_do_atomic_write(). That is a remote disclosure of 4 bytes of kernel tailroom per probe (the other 4 bytes are the packet's own trailing ICRC). IBA oA19-28 defines ATOMIC_WRITE as exactly 8 bytes. Anything else is protocol-invalid. Hoist a strict length check into check_rkey() so the responder never reaches the unchecked dereference, and keep the existing WRITE-family length logic for the normal RDMA WRITE path. Reproduced on mainline with an unmodified rxe driver: a sustained zero-length ATOMIC_WRITE probe repeatedly leaks adjacent skb head-buffer bytes into the attacker's MR, including recognisable kernel strings and partial kernel-direct-map pointer words. With this patch applied the responder rejects the PDU and the MR stays all-zero. Cc: stable@vger.kernel.org Fixes: 034e285f8b99 ("RDMA/rxe: Make responder support atomic write on RC service") Link: https://patch.msgid.link/r/20260418162141.3610201-1-michael.bommarito@gmail.com Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 9faf8c09aa8e..9cb2f6fbf2dd 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -540,7 +540,19 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } skip_check_range: - if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { + if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + /* IBA oA19-28: ATOMIC_WRITE payload is exactly 8 bytes. + * Reject any other length before the responder reads + * sizeof(u64) bytes from payload_addr(pkt); a shorter + * payload would read past the logical end of the packet + * into skb->head tailroom. + */ + if (resid != sizeof(u64) || pktlen != sizeof(u64) || + bth_pad(pkt)) { + state = RESPST_ERR_LENGTH; + goto err; + } + } else if (pkt->mask & RXE_WRITE_MASK) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; -- cgit v1.2.3 From c488df06bd552bb8b6e14fa0cfd5ad986c6e9525 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Fri, 24 Apr 2026 13:51:02 +0800 Subject: RDMA/mlx5: Fix error path fall-through in mlx5_ib_dev_res_srq_init() mlx5_ib_dev_res_srq_init() allocates two SRQs, s0 and s1. When ib_create_srq() fails for s1, the error branch destroys s0 but falls through and unconditionally assigns the freed s0 and the ERR_PTR s1 to devr->s0 and devr->s1. This leads to several problems: the lock-free fast path checks "if (devr->s1) return 0;" and treats the ERR_PTR as already initialised; users in mlx5_ib_create_qp() dereference the freed SRQ or ERR_PTR via to_msrq(devr->s0)->msrq.srqn; and mlx5_ib_dev_res_cleanup() dereferences the ERR_PTR and double-frees s0 on teardown. Fix by adding the same `goto unlock` in the s1 failure path. Cc: stable@vger.kernel.org Fixes: 5895e70f2e6e ("IB/mlx5: Allocate resources just before first QP/SRQ is created") Link: https://patch.msgid.link/r/SYBPR01MB7881E1E0970268BD69C0BA75AF2B2@SYBPR01MB7881.ausprd01.prod.outlook.com Reported-by: Yuhao Jiang Signed-off-by: Junrui Luo Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 109661c2ac12..8115ae869ef2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3392,6 +3392,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) "Couldn't create SRQ 1 for res init, err=%pe\n", s1); ib_destroy_srq(s0); + goto unlock; } devr->s0 = s0; -- cgit v1.2.3 From 8135b89a376ab2c20db3921c9c38cf83994b1f1c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Apr 2026 09:10:52 +0200 Subject: Revert "parisc: led: fix reference leak on failed device registration" This reverts commit 707610bcccbd0327530938e33f3f33211a640a4e. platform_device_register() is going to be fixed instead. Signed-off-by: Helge Deller --- drivers/parisc/led.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index b299fcc48b08..016c9d5a60a8 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -543,10 +543,8 @@ static void __init register_led_regions(void) static int __init startup_leds(void) { - if (platform_device_register(&platform_leds)) { - pr_info("LED: failed to register LEDs\n"); - platform_device_put(&platform_leds); - } + if (platform_device_register(&platform_leds)) + printk(KERN_INFO "LED: failed to register LEDs\n"); register_led_regions(); return 0; } -- cgit v1.2.3 From b8425ceefee5df2b8490fa70a07e4e402c752492 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Apr 2026 12:32:11 +0200 Subject: parisc: drivers: switch to dynamic root device Driver core expects devices to be dynamically allocated and will, for example, complain loudly if a device that lacks a release function is ever freed. Use root_device_register() to allocate and register the root device instead of open coding using a static device. While at it, drop the redundant additional reference taken at init. Signed-off-by: Johan Hovold Signed-off-by: Helge Deller --- arch/parisc/kernel/drivers.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index bc47bbe3026e..b52ad704ec8a 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -41,9 +41,7 @@ const struct dma_map_ops *hppa_dma_ops __ro_after_init; EXPORT_SYMBOL(hppa_dma_ops); -static struct device root = { - .init_name = "parisc", -}; +static struct device *root; static inline int check_dev(struct device *dev) { @@ -89,7 +87,7 @@ static int for_each_padev(int (*fn)(struct device *, void *), void * data) .obj = data, .fn = fn, }; - return device_for_each_child(&root, &recurse_data, descend_children); + return device_for_each_child(root, &recurse_data, descend_children); } /** @@ -290,7 +288,7 @@ const struct parisc_device * find_pa_parent_type(const struct parisc_device *padev, int type) { const struct device *dev = &padev->dev; - while (dev != &root) { + while (dev != root) { struct parisc_device *candidate = to_parisc_device(dev); if (candidate->id.hw_type == type) return candidate; @@ -319,7 +317,7 @@ static void get_node_path(struct device *dev, struct hardware_path *path) dev = dev->parent; } - while (dev != &root) { + while (dev != root) { if (dev_is_pci(dev)) { unsigned int devfn = to_pci_dev(dev)->devfn; path->bc[i--] = PCI_SLOT(devfn) | (PCI_FUNC(devfn)<< 5); @@ -482,7 +480,7 @@ static struct parisc_device * __init alloc_tree_node( static struct parisc_device *create_parisc_device(struct hardware_path *modpath) { int i; - struct device *parent = &root; + struct device *parent = root; for (i = 0; i < 6; i++) { if (modpath->bc[i] == -1) continue; @@ -755,7 +753,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath) struct device *hwpath_to_device(struct hardware_path *modpath) { int i; - struct device *parent = &root; + struct device *parent = root; for (i = 0; i < 6; i++) { if (modpath->bc[i] == -1) continue; @@ -880,7 +878,7 @@ void __init walk_central_bus(void) { walk_native_bus(CENTRAL_BUS_ADDR, CENTRAL_BUS_ADDR + (MAX_NATIVE_DEVICES * NATIVE_DEVICE_OFFSET), - &root); + root); } static __init void print_parisc_device(struct parisc_device *dev) @@ -907,9 +905,10 @@ void __init init_parisc_bus(void) { if (bus_register(&parisc_bus_type)) panic("Could not register PA-RISC bus type\n"); - if (device_register(&root)) + + root = root_device_register("parisc"); + if (IS_ERR(root)) panic("Could not register PA-RISC root device\n"); - get_device(&root); } static __init void qemu_header(void) -- cgit v1.2.3 From 852534744c2d35626a604f128ff0b8ec12805591 Mon Sep 17 00:00:00 2001 From: Christofer Jonason Date: Wed, 4 Mar 2026 10:07:27 +0100 Subject: iio: adc: xilinx-xadc: Fix sequencer mode in postdisable for dual mux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xadc_postdisable() unconditionally sets the sequencer to continuous mode. For dual external multiplexer configurations this is incorrect: simultaneous sampling mode is required so that ADC-A samples through the mux on VAUX[0-7] while ADC-B simultaneously samples through the mux on VAUX[8-15]. In continuous mode only ADC-A is active, so VAUX[8-15] channels return incorrect data. Since postdisable is also called from xadc_probe() to set the initial idle state, the wrong sequencer mode is active from the moment the driver loads. The preenable path already uses xadc_get_seq_mode() which returns SIMULTANEOUS for dual mux. Fix postdisable to do the same. Fixes: bdc8cda1d010 ("iio:adc: Add Xilinx XADC driver") Cc: stable@vger.kernel.org Signed-off-by: Christofer Jonason Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Reviewed-by: Salih Erim Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index e257c1b94a5f..3980dfacbcd7 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -817,6 +817,7 @@ static int xadc_postdisable(struct iio_dev *indio_dev) { struct xadc *xadc = iio_priv(indio_dev); unsigned long scan_mask; + int seq_mode; int ret; int i; @@ -824,6 +825,12 @@ static int xadc_postdisable(struct iio_dev *indio_dev) for (i = 0; i < indio_dev->num_channels; i++) scan_mask |= BIT(indio_dev->channels[i].scan_index); + /* + * Use the correct sequencer mode for the idle state: simultaneous + * mode for dual external mux configurations, continuous otherwise. + */ + seq_mode = xadc_get_seq_mode(xadc, scan_mask); + /* Enable all channels and calibration */ ret = xadc_write_adc_reg(xadc, XADC_REG_SEQ(0), scan_mask & 0xffff); if (ret) @@ -834,11 +841,11 @@ static int xadc_postdisable(struct iio_dev *indio_dev) return ret; ret = xadc_update_adc_reg(xadc, XADC_REG_CONF1, XADC_CONF1_SEQ_MASK, - XADC_CONF1_SEQ_CONTINUOUS); + seq_mode); if (ret) return ret; - return xadc_power_adc_b(xadc, XADC_CONF1_SEQ_CONTINUOUS); + return xadc_power_adc_b(xadc, seq_mode); } static int xadc_preenable(struct iio_dev *indio_dev) -- cgit v1.2.3 From 673478bc29cf72010faaf293c1c8c667393335a0 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 2 Apr 2026 13:40:15 +0800 Subject: iio: chemical: mhz19b: reject oversized serial replies mhz19b_receive_buf() appends each serdev chunk into the fixed MHZ19B_CMD_SIZE receive buffer and advances buf_idx by len without checking that the chunk fits in the remaining space. A large callback can therefore overflow st->buf before the command path validates the reply. Reset the reply state before each command and reject oversized serial replies before copying them into the fixed buffer. When an oversized reply is detected, wake the waiter and report -EMSGSIZE instead of overwriting st->buf. Fixes: 4572a70b3681 ("iio: chemical: Add support for Winsen MHZ19B CO2 sensor") Cc: stable@vger.kernel.org Signed-off-by: Pengpeng Hou Acked-by: Gyeyoung Baek Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/mhz19b.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/iio/chemical/mhz19b.c b/drivers/iio/chemical/mhz19b.c index 3c64154918b1..9d4cf432919e 100644 --- a/drivers/iio/chemical/mhz19b.c +++ b/drivers/iio/chemical/mhz19b.c @@ -52,6 +52,8 @@ struct mhz19b_state { struct completion buf_ready; u8 buf_idx; + bool buf_overflow; + /* * Serdev receive buffer. * When data is received from the MH-Z19B, @@ -106,6 +108,10 @@ static int mhz19b_serdev_cmd(struct iio_dev *indio_dev, int cmd, u16 arg) cmd_buf[8] = mhz19b_get_checksum(cmd_buf); /* Write buf to uart ctrl synchronously */ + st->buf_idx = 0; + st->buf_overflow = false; + reinit_completion(&st->buf_ready); + ret = serdev_device_write(serdev, cmd_buf, MHZ19B_CMD_SIZE, 0); if (ret < 0) return ret; @@ -121,6 +127,9 @@ static int mhz19b_serdev_cmd(struct iio_dev *indio_dev, int cmd, u16 arg) if (!ret) return -ETIMEDOUT; + if (st->buf_overflow) + return -EMSGSIZE; + if (st->buf[8] != mhz19b_get_checksum(st->buf)) { dev_err(dev, "checksum err"); return -EINVAL; @@ -240,6 +249,14 @@ static size_t mhz19b_receive_buf(struct serdev_device *serdev, { struct iio_dev *indio_dev = dev_get_drvdata(&serdev->dev); struct mhz19b_state *st = iio_priv(indio_dev); + size_t remaining = MHZ19B_CMD_SIZE - st->buf_idx; + + if (len > remaining) { + st->buf_idx = 0; + st->buf_overflow = true; + complete(&st->buf_ready); + return len; + } memcpy(st->buf + st->buf_idx, data, len); st->buf_idx += len; -- cgit v1.2.3 From b66f922f6a4fa92840f662fbcfeb4f8a0f774bcc Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Fri, 27 Mar 2026 20:27:54 +0800 Subject: iio: light: veml6070: Fix resource leak in probe error path The driver calls i2c_new_dummy_device() to create a dummy device, then calls i2c_smbus_write_byte(). If i2c_smbus_write_byte() fails and returns, the cleanup via devm_add_action_or_reset() was never registered, so the dummy device leaks. Switch to devm_i2c_new_dummy_device() which registers cleanup atomically with device creation, eliminating the error-path window. Fixes: 7501bff87c3e ("iio: light: veml6070: add action for i2c_unregister_device") Reviewed-by: Andy Shevchenko Signed-off-by: Felix Gu Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/veml6070.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/iio/light/veml6070.c b/drivers/iio/light/veml6070.c index 74d7246e5225..4bbd86d0cb46 100644 --- a/drivers/iio/light/veml6070.c +++ b/drivers/iio/light/veml6070.c @@ -245,13 +245,6 @@ static const struct iio_info veml6070_info = { .write_raw = veml6070_write_raw, }; -static void veml6070_i2c_unreg(void *p) -{ - struct veml6070_data *data = p; - - i2c_unregister_device(data->client2); -} - static int veml6070_probe(struct i2c_client *client) { struct veml6070_data *data; @@ -281,7 +274,8 @@ static int veml6070_probe(struct i2c_client *client) if (ret < 0) return ret; - data->client2 = i2c_new_dummy_device(client->adapter, VEML6070_ADDR_DATA_LSB); + data->client2 = devm_i2c_new_dummy_device(&client->dev, client->adapter, + VEML6070_ADDR_DATA_LSB); if (IS_ERR(data->client2)) return dev_err_probe(&client->dev, PTR_ERR(data->client2), "i2c device for second chip address failed\n"); @@ -292,10 +286,6 @@ static int veml6070_probe(struct i2c_client *client) if (ret < 0) return ret; - ret = devm_add_action_or_reset(&client->dev, veml6070_i2c_unreg, data); - if (ret < 0) - return ret; - return devm_iio_device_register(&client->dev, indio_dev); } -- cgit v1.2.3 From 1a772719318c11e146f6fbe621fffd230a6f456a Mon Sep 17 00:00:00 2001 From: Radu Sabau Date: Wed, 8 Apr 2026 13:32:13 +0300 Subject: iio: adc: ad4695: Fix call ordering in offload buffer postenable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ad4695_enter_advanced_sequencer_mode() was called after spi_offload_trigger_enable(). That is wrong because ad4695_enter_advanced_sequencer_mode() issues regular SPI transfers to put the ADC into advanced sequencer mode, and not all SPI offload capable controllers support regular SPI transfers while offloading is enabled. Fix this by calling ad4695_enter_advanced_sequencer_mode() before spi_offload_trigger_enable(), so the ADC is fully configured before the first CNV pulse can occur. This is consistent with the same constraint that already applies to the BUSY_GP_EN write above it. Update the error unwind labels accordingly: add err_exit_conversion_mode so that a failure of spi_offload_trigger_enable() correctly exits conversion mode before clearing BUSY_GP_EN. Fixes: f09f140e3ea8 ("iio: adc: ad4695: Add support for SPI offload") Reviewed-by: Nuno Sá Reviewed-by: David Lechner Signed-off-by: Radu Sabau Cc: Stable@vger.kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4695.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index cda419638d9a..53642de7330d 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -876,14 +876,14 @@ static int ad4695_offload_buffer_postenable(struct iio_dev *indio_dev) if (ret) goto err_unoptimize_message; - ret = spi_offload_trigger_enable(st->offload, st->offload_trigger, - &config); + ret = ad4695_enter_advanced_sequencer_mode(st, num_slots); if (ret) goto err_disable_busy_output; - ret = ad4695_enter_advanced_sequencer_mode(st, num_slots); + ret = spi_offload_trigger_enable(st->offload, st->offload_trigger, + &config); if (ret) - goto err_offload_trigger_disable; + goto err_exit_conversion_mode; mutex_lock(&st->cnv_pwm_lock); pwm_get_state(st->cnv_pwm, &state); @@ -895,23 +895,16 @@ static int ad4695_offload_buffer_postenable(struct iio_dev *indio_dev) ret = pwm_apply_might_sleep(st->cnv_pwm, &state); mutex_unlock(&st->cnv_pwm_lock); if (ret) - goto err_offload_exit_conversion_mode; + goto err_offload_trigger_disable; return 0; -err_offload_exit_conversion_mode: - /* - * We have to unwind in a different order to avoid triggering offload. - * ad4695_exit_conversion_mode() triggers a conversion, so it has to be - * done after spi_offload_trigger_disable(). - */ - spi_offload_trigger_disable(st->offload, st->offload_trigger); - ad4695_exit_conversion_mode(st); - goto err_disable_busy_output; - err_offload_trigger_disable: spi_offload_trigger_disable(st->offload, st->offload_trigger); +err_exit_conversion_mode: + ad4695_exit_conversion_mode(st); + err_disable_busy_output: regmap_clear_bits(st->regmap, AD4695_REG_GP_MODE, AD4695_REG_GP_MODE_BUSY_GP_EN); -- cgit v1.2.3 From 761e8b489e6cf166c574034b70637f8a7eadd0ee Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Tue, 31 Mar 2026 13:13:00 +0300 Subject: iio: gyro: adis16260: fix division by zero in write_raw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a validation check for the sampling frequency value before using it as a divisor. A user writing zero to the sampling_frequency sysfs attribute triggers a division by zero in the kernel. Fixes: 089a41985c6c ("staging: iio: adis16260 digital gyro driver") Signed-off-by: Antoniu Miclaus Reviewed-by: Nuno Sá Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/adis16260.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/gyro/adis16260.c b/drivers/iio/gyro/adis16260.c index 586e6cfa14a9..91b9c5f18ec4 100644 --- a/drivers/iio/gyro/adis16260.c +++ b/drivers/iio/gyro/adis16260.c @@ -287,6 +287,9 @@ static int adis16260_write_raw(struct iio_dev *indio_dev, addr = adis16260_addresses[chan->scan_index][1]; return adis_write_reg_16(adis, addr, val); case IIO_CHAN_INFO_SAMP_FREQ: + if (val <= 0) + return -EINVAL; + if (spi_get_device_id(adis->spi)->driver_data) t = 256 / val; else -- cgit v1.2.3 From bb21ee31f5753a7972148798fd7dfb841dd33bdb Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Thu, 16 Apr 2026 14:14:42 +0300 Subject: iio: Fix iio_multiply_value use in iio_read_channel_processed_scale The function iio_multiply_value returns IIO_VAL_INT (1) on success or a negative error number on failure, while iio_read_channel_processed_scale should return an error code or 0. This creates a situation where the expected result is treated as an error. Fix this by checking the iio_multiply_value result separately, instead of passing it as a return value. Fixes: 05f958d003c9 ("iio: Improve iio_read_channel_processed_scale() precision") Signed-off-by: Svyatoslav Ryhel Reviewed-by: Hans de Goede Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 0df0ab3de270..9ce20cb05a9b 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -738,7 +738,11 @@ int iio_read_channel_processed_scale(struct iio_channel *chan, int *val, if (ret < 0) return ret; - return iio_multiply_value(val, scale, ret, pval, pval2); + ret = iio_multiply_value(val, scale, ret, pval, pval2); + if (ret < 0) + return ret; + + return 0; } else { ret = iio_channel_read(chan, val, NULL, IIO_CHAN_INFO_RAW); if (ret < 0) -- cgit v1.2.3 From 7e5c0f97c66ad538b87c04a640573371fb434b4f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Apr 2026 11:01:22 +0200 Subject: iio: adc: nxp-sar-adc: Avoid division by zero When Common Clock Framework is disabled, clk_get_rate() returns 0. This is used as part of the divisor to perform nanosecond delays with help of ndelay(). When the above condition occurs the compiler, due to unspecified behaviour, is free to do what it wants to. Here it saturates the value, which is logical from mathematics point of view. However, the ndelay() implementation has set a reasonable upper threshold and refuses to provide anything for such a long delay. That's why code may not be linked under these circumstances. To solve the issue, provide a wrapper that calls ndelay() when the value is known not to be zero. Fixes: 4434072a893e ("iio: adc: Add the NXP SAR ADC support for the s32g2/3 platforms") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603311958.ly6uROit-lkp@intel.com/ Signed-off-by: Andy Shevchenko Acked-by: Daniel Lezcano Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/nxp-sar-adc.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/nxp-sar-adc.c b/drivers/iio/adc/nxp-sar-adc.c index 9d9f2c76bed4..705dd7da1bd2 100644 --- a/drivers/iio/adc/nxp-sar-adc.c +++ b/drivers/iio/adc/nxp-sar-adc.c @@ -198,6 +198,15 @@ static void nxp_sar_adc_irq_cfg(struct nxp_sar_adc *info, bool enable) writel(0, NXP_SAR_ADC_IMR(info->regs)); } +static void nxp_sar_adc_wait_for(struct nxp_sar_adc *info, unsigned int cycles) +{ + u64 rate; + + rate = clk_get_rate(info->clk); + if (rate) + ndelay(div64_u64(NSEC_PER_SEC, rate * cycles)); +} + static bool nxp_sar_adc_set_enabled(struct nxp_sar_adc *info, bool enable) { u32 mcr; @@ -221,7 +230,7 @@ static bool nxp_sar_adc_set_enabled(struct nxp_sar_adc *info, bool enable) * configuration of NCMR and the setting of NSTART. */ if (enable) - ndelay(div64_u64(NSEC_PER_SEC, clk_get_rate(info->clk) * 3)); + nxp_sar_adc_wait_for(info, 3); return pwdn; } @@ -469,7 +478,7 @@ static void nxp_sar_adc_stop_conversion(struct nxp_sar_adc *info) * only when the capture finishes. The delay will be very * short, usec-ish, which is acceptable in the atomic context. */ - ndelay(div64_u64(NSEC_PER_SEC, clk_get_rate(info->clk)) * 80); + nxp_sar_adc_wait_for(info, 80); } static int nxp_sar_adc_start_conversion(struct nxp_sar_adc *info, bool raw) -- cgit v1.2.3 From 0d42e2c0bd6ceb89e44c6e065f9bdf9b1df3ef0c Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 14 Apr 2026 13:30:06 +0100 Subject: iio: adc: npcm: fix unbalanced clk_disable_unprepare() The driver acquired the ADC clock with devm_clk_get() and read its rate, but never called clk_prepare_enable(). The probe error path and npcm_adc_remove() both called clk_disable_unprepare() unconditionally, causing the clk framework's enable/prepare counts to underflow on probe failure or module unbind. The issue went unnoticed because NPCM BMC firmware leaves the ADC clock enabled at boot, so the driver happened to work in practice. Switch to devm_clk_get_enabled() so the clock is properly enabled during probe and automatically released by the device-managed cleanup, and drop the now-redundant clk_disable_unprepare() from both the probe error path and remove(). While at it, drop the duplicate error message on devm_request_irq() failure since the IRQ core already logs it. Fixes: 9bf85fbc9d8f ("iio: adc: add NPCM ADC driver") Signed-off-by: David Carlier Reviewed-by: Andy Shevchenko Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/npcm_adc.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/iio/adc/npcm_adc.c b/drivers/iio/adc/npcm_adc.c index ddabb9600d46..61c8b825bda1 100644 --- a/drivers/iio/adc/npcm_adc.c +++ b/drivers/iio/adc/npcm_adc.c @@ -231,7 +231,7 @@ static int npcm_adc_probe(struct platform_device *pdev) if (IS_ERR(info->reset)) return PTR_ERR(info->reset); - info->adc_clk = devm_clk_get(&pdev->dev, NULL); + info->adc_clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(info->adc_clk)) { dev_warn(&pdev->dev, "ADC clock failed: can't read clk\n"); return PTR_ERR(info->adc_clk); @@ -244,17 +244,13 @@ static int npcm_adc_probe(struct platform_device *pdev) info->adc_sample_hz = clk_get_rate(info->adc_clk) / ((div + 1) * 2); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto err_disable_clk; - } + if (irq < 0) + return irq; ret = devm_request_irq(&pdev->dev, irq, npcm_adc_isr, 0, "NPCM_ADC", indio_dev); - if (ret < 0) { - dev_err(dev, "failed requesting interrupt\n"); - goto err_disable_clk; - } + if (ret < 0) + return ret; reg_con = ioread32(info->regs + NPCM_ADCCON); info->vref = devm_regulator_get_optional(&pdev->dev, "vref"); @@ -262,7 +258,7 @@ static int npcm_adc_probe(struct platform_device *pdev) ret = regulator_enable(info->vref); if (ret) { dev_err(&pdev->dev, "Can't enable ADC reference voltage\n"); - goto err_disable_clk; + return ret; } iowrite32(reg_con & ~NPCM_ADCCON_REFSEL, @@ -272,10 +268,8 @@ static int npcm_adc_probe(struct platform_device *pdev) * Any error which is not ENODEV indicates the regulator * has been specified and so is a failure case. */ - if (PTR_ERR(info->vref) != -ENODEV) { - ret = PTR_ERR(info->vref); - goto err_disable_clk; - } + if (PTR_ERR(info->vref) != -ENODEV) + return PTR_ERR(info->vref); /* Use internal reference */ iowrite32(reg_con | NPCM_ADCCON_REFSEL, @@ -314,8 +308,6 @@ err_iio_register: iowrite32(reg_con & ~NPCM_ADCCON_ADC_EN, info->regs + NPCM_ADCCON); if (!IS_ERR(info->vref)) regulator_disable(info->vref); -err_disable_clk: - clk_disable_unprepare(info->adc_clk); return ret; } @@ -332,7 +324,6 @@ static void npcm_adc_remove(struct platform_device *pdev) iowrite32(regtemp & ~NPCM_ADCCON_ADC_EN, info->regs + NPCM_ADCCON); if (!IS_ERR(info->vref)) regulator_disable(info->vref); - clk_disable_unprepare(info->adc_clk); } static struct platform_driver npcm_adc_driver = { -- cgit v1.2.3 From 5aba4f94b225617a55fed442a70329b2ee19c0a5 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Wed, 1 Apr 2026 14:08:29 +0300 Subject: iio: chemical: scd30: fix division by zero in write_raw Add a zero check for val2 before using it as a divisor when setting the sampling frequency. A user writing a zero fractional part to the sampling_frequency sysfs attribute triggers a division by zero in the kernel. Fixes: 64b3d8b1b0f5 ("iio: chemical: scd30: add core driver") Signed-off-by: Antoniu Miclaus Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/scd30_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/chemical/scd30_core.c b/drivers/iio/chemical/scd30_core.c index a665fcb78806..11d6bc1b63e6 100644 --- a/drivers/iio/chemical/scd30_core.c +++ b/drivers/iio/chemical/scd30_core.c @@ -256,7 +256,7 @@ static int scd30_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const guard(mutex)(&state->lock); switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: - if (val) + if (val || !val2) return -EINVAL; val = 1000000000 / val2; -- cgit v1.2.3 From 0de4cb473aed57ee4ba7e0551ad27bddc19fc519 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 28 Apr 2026 08:10:43 -0700 Subject: workqueue: fix devm_alloc_workqueue() va_list misuse devm_alloc_workqueue() built a va_list and passed it as a single positional argument to the variadic alloc_workqueue() macro: va_start(args, max_active); wq = alloc_workqueue(fmt, flags, max_active, args); va_end(args); C does not allow forwarding a va_list through a ... parameter. alloc_workqueue() expands to alloc_workqueue_noprof(), which runs its own va_start() over its ... params, so the inner vsnprintf(wq->name, sizeof(wq->name), fmt, args) in __alloc_workqueue() received the outer va_list object as the first variadic slot rather than the caller's actual format arguments. Add a new static helper alloc_workqueue_va() that wraps __alloc_workqueue() and runs wq_init_lockdep() on success, and fold both alloc_workqueue_noprof() and devm_alloc_workqueue_noprof() onto it as suggested by Tejun. The wq_init_lockdep() step is required on the devm path too, otherwise __flush_workqueue()'s on-stack COMPLETION_INITIALIZER_ONSTACK_MAP would NULL-deref wq->lockdep_map. No caller changes are required. devm_alloc_ordered_workqueue() is a macro forwarding to devm_alloc_workqueue() and inherits the fix. Two in-tree callers actively trigger the broken path on every probe: drivers/power/supply/mt6370-charger.c:889 drivers/power/supply/max77705_charger.c:649 both of which use devm_alloc_ordered_workqueue(dev, "%s", 0, dev_name(dev)). A standalone reproducer module is available at[1]. Link: https://github.com/leitao/debug/blob/main/workqueue/valist/wq_va_test.c [1] Fixes: 1dfc9d60a69e ("workqueue: devres: Add device-managed allocate workqueue") Signed-off-by: Breno Leitao Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 6 ++++-- kernel/workqueue.c | 28 +++++++++++++++++++--------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ab6cb70ca1a5..6177624539b3 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -534,8 +534,10 @@ alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...) * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(2, 5) struct workqueue_struct * -devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, - int max_active, ...); +devm_alloc_workqueue_noprof(struct device *dev, const char *fmt, + unsigned int flags, int max_active, ...); +#define devm_alloc_workqueue(...) \ + alloc_hooks(devm_alloc_workqueue_noprof(__VA_ARGS__)) #ifdef CONFIG_LOCKDEP /** diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5f747f241a5f..24d0265191d4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5906,6 +5906,20 @@ err_destroy: return NULL; } +static struct workqueue_struct *alloc_workqueue_va(const char *fmt, + unsigned int flags, + int max_active, + va_list args) +{ + struct workqueue_struct *wq; + + wq = __alloc_workqueue(fmt, flags, max_active, args); + if (wq) + wq_init_lockdep(wq); + + return wq; +} + __printf(1, 4) struct workqueue_struct *alloc_workqueue_noprof(const char *fmt, unsigned int flags, @@ -5915,12 +5929,8 @@ struct workqueue_struct *alloc_workqueue_noprof(const char *fmt, va_list args; va_start(args, max_active); - wq = __alloc_workqueue(fmt, flags, max_active, args); + wq = alloc_workqueue_va(fmt, flags, max_active, args); va_end(args); - if (!wq) - return NULL; - - wq_init_lockdep(wq); return wq; } @@ -5932,15 +5942,15 @@ static void devm_workqueue_release(void *res) } __printf(2, 5) struct workqueue_struct * -devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, - int max_active, ...) +devm_alloc_workqueue_noprof(struct device *dev, const char *fmt, + unsigned int flags, int max_active, ...) { struct workqueue_struct *wq; va_list args; int ret; va_start(args, max_active); - wq = alloc_workqueue(fmt, flags, max_active, args); + wq = alloc_workqueue_va(fmt, flags, max_active, args); va_end(args); if (!wq) return NULL; @@ -5951,7 +5961,7 @@ devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags, return wq; } -EXPORT_SYMBOL_GPL(devm_alloc_workqueue); +EXPORT_SYMBOL_GPL(devm_alloc_workqueue_noprof); #ifdef CONFIG_LOCKDEP __printf(1, 5) -- cgit v1.2.3 From 278dd0487907112de8e34e1a97ac6145a8081523 Mon Sep 17 00:00:00 2001 From: Rosalie Wanders Date: Fri, 10 Apr 2026 21:53:54 +0200 Subject: HID: sony: fix incorrect force-feedback check in sony_suspend() This commit fixes the incorrect force-feedback check in sony_suspend(), without this the check will always be true due to checking a constant define that is never 0. Signed-off-by: Rosalie Wanders Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index b5e724676c1d..23db406092ef 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2456,11 +2456,10 @@ static void sony_remove(struct hid_device *hdev) static int sony_suspend(struct hid_device *hdev, pm_message_t message) { #ifdef CONFIG_SONY_FF + struct sony_sc *sc = hid_get_drvdata(hdev); /* On suspend stop any running force-feedback events */ - if (SONY_FF_SUPPORT) { - struct sony_sc *sc = hid_get_drvdata(hdev); - + if (sc->quirks & SONY_FF_SUPPORT) { sc->left = sc->right = 0; sony_send_output_report(sc); } -- cgit v1.2.3 From 80c4bbb2b38513e9c3d84805fa61a0ee16d79c45 Mon Sep 17 00:00:00 2001 From: Michael Zaidman Date: Sat, 11 Apr 2026 09:24:37 +0300 Subject: HID: ft260: validate i2c input report length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two checks to ft260_raw_event() to prevent out-of-bounds reads from malicious or malfunctioning devices: First, reject reports shorter than the 2-byte header (report ID + length fields). Without this, even accessing xfer->length on a 1-byte report is an OOB read. Second, validate xfer->length against the actual data capacity of the received HID report. Each I2C data report ID (0xD0 through 0xDE) defines a different report size in the HID descriptor, so the available payload varies per report. A corrupted length field could cause memcpy to read beyond the report buffer. Reported-by: Sebastián Josué Alba Vives Signed-off-by: Michael Zaidman Signed-off-by: Jiri Kosina --- drivers/hid/hid-ft260.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index 333341e80b0e..70e2eedb465a 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -1068,10 +1068,22 @@ static int ft260_raw_event(struct hid_device *hdev, struct hid_report *report, struct ft260_device *dev = hid_get_drvdata(hdev); struct ft260_i2c_input_report *xfer = (void *)data; + if (size < offsetof(struct ft260_i2c_input_report, data)) { + hid_err(hdev, "short report %d\n", size); + return -1; + } + if (xfer->report >= FT260_I2C_REPORT_MIN && xfer->report <= FT260_I2C_REPORT_MAX) { - ft260_dbg("i2c resp: rep %#02x len %d\n", xfer->report, - xfer->length); + ft260_dbg("i2c resp: rep %#02x len %d size %d\n", + xfer->report, xfer->length, size); + + if (xfer->length > size - + offsetof(struct ft260_i2c_input_report, data)) { + hid_err(hdev, "report %#02x: length %d exceeds HID report size\n", + xfer->report, xfer->length); + return -1; + } if ((dev->read_buf == NULL) || (xfer->length > dev->read_len - dev->read_idx)) { -- cgit v1.2.3 From 0f2b8466fb744a8b3313a9c1e2008f8cd53b2db7 Mon Sep 17 00:00:00 2001 From: Rosalie Wanders Date: Sat, 11 Apr 2026 17:32:48 +0200 Subject: HID: sony: remove unneeded WARN_ON() in sony_leds_init() This commit removes the unneeded WARN_ON() macro usage in sony_leds_init(), this is unneeded because the sony_leds_init() function call is already gated behind a SONY_LED_SUPPORT check in sony_input_configured() Signed-off-by: Rosalie Wanders Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 23db406092ef..6a860b9ef677 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -1640,9 +1640,6 @@ static int sony_leds_init(struct sony_sc *sc) u8 max_brightness[MAX_LEDS] = { [0 ... (MAX_LEDS - 1)] = 1 }; u8 use_hw_blink[MAX_LEDS] = { 0 }; - if (WARN_ON(!(sc->quirks & SONY_LED_SUPPORT))) - return -EINVAL; - if (sc->quirks & BUZZ_CONTROLLER) { sc->led_count = 4; use_color_names = 0; -- cgit v1.2.3 From a4170b63eda999d20ad6dc39ddc3ce5c1ac619e6 Mon Sep 17 00:00:00 2001 From: Rosalie Wanders Date: Sun, 12 Apr 2026 03:08:06 +0200 Subject: HID: sony: add missing size validation for SMK-Link remotes This commit adds the missing size validation for SMK-Link remotes in sony_raw_event(), this prevents a malicious device from allowing hid-sony to read out of bounds of the provided buffer. I do not own these devices so the size check only forces that the buffer is large enough for nsg_mrxu_parse_report(). Signed-off-by: Rosalie Wanders Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 6a860b9ef677..13fe7a3e57d7 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -1169,10 +1169,9 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report, sixaxis_parse_report(sc, rd, size); } else if ((sc->quirks & MOTION_CONTROLLER_BT) && rd[0] == 0x01 && size == 49) { sixaxis_parse_report(sc, rd, size); - } else if ((sc->quirks & NAVIGATION_CONTROLLER) && rd[0] == 0x01 && - size == 49) { + } else if ((sc->quirks & NAVIGATION_CONTROLLER) && rd[0] == 0x01 && size == 49) { sixaxis_parse_report(sc, rd, size); - } else if ((sc->quirks & NSG_MRXU_REMOTE) && rd[0] == 0x02) { + } else if ((sc->quirks & NSG_MRXU_REMOTE) && rd[0] == 0x02 && size >= 12) { nsg_mrxu_parse_report(sc, rd, size); return 1; } else if ((sc->quirks & RB4_GUITAR_PS4_USB) && rd[0] == 0x01 && size == 64) { -- cgit v1.2.3 From 12bd440b66ed8968afffc46928233967b5b79b98 Mon Sep 17 00:00:00 2001 From: Rosalie Wanders Date: Sun, 12 Apr 2026 03:12:03 +0200 Subject: HID: sony: add missing size validation for Rock Band 3 Pro instruments This commit adds the missing size validation for Rock Band 3 PS3 Pro instruments in sony_raw_event(), this prevents a malicious device from allowing hid-sony to read out of bounds of the provided buffer. Signed-off-by: Rosalie Wanders Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 13fe7a3e57d7..315343415e8f 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -1188,7 +1188,7 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report, /* Rock Band 3 PS3 Pro instruments set rd[24] to 0xE0 when they're * sending full reports, and 0x02 when only sending navigation. */ - if ((sc->quirks & RB3_PRO_INSTRUMENT) && rd[24] == 0x02) { + if ((sc->quirks & RB3_PRO_INSTRUMENT) && size >= 25 && rd[24] == 0x02) { /* Only attempt to enable full report every 8 seconds */ if (time_after(jiffies, sc->rb3_pro_poke_jiffies)) { sc->rb3_pro_poke_jiffies = jiffies + secs_to_jiffies(8); -- cgit v1.2.3 From 163f8b7f9a84086c67c76aeadc04e6d43e32df6e Mon Sep 17 00:00:00 2001 From: Kuba Piecuch Date: Tue, 28 Apr 2026 12:46:01 +0000 Subject: sched_ext: Call wakeup_preempt() in local_dsq_post_enq() There are several edge cases (see linked thread) where an IMMED task can be left lingering on a local DSQ if an RT task swoops in at the wrong time. All of these edge cases are due to rq->next_class being idle even after dispatching a task to rq's local DSQ. We should bump rq->next_class to &ext_sched_class as soon as we've inserted a task into the local DSQ. To optimize the common case of rq->next_class == &ext_sched_class, only call wakeup_preempt() if rq->next_class is below EXT. If next_class is EXT or above, wakeup_preempt() is a no-op anyway. This lets us also simplify the preempt_curr() logic a bit since wakeup_preempt() will call preempt_curr() for us if next_class is below EXT. Link: https://lore.kernel.org/all/DHZPHUFXB4N3.2RY28MUEWBNYK@google.com/ Signed-off-by: Kuba Piecuch Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 9eda20e5fdb8..cac0b18239fe 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1402,14 +1402,51 @@ static void local_dsq_post_enq(struct scx_sched *sch, struct scx_dispatch_q *dsq struct task_struct *p, u64 enq_flags) { struct rq *rq = container_of(dsq, struct rq, scx.local_dsq); - bool preempt = false; call_task_dequeue(sch, rq, p, 0); + /* + * Note that @rq's lock may be dropped between this enqueue and @p + * actually getting on CPU. This gives higher-class tasks (e.g. RT) + * an opportunity to wake up on @rq and prevent @p from running. + * Here are some concrete examples: + * + * Example 1: + * + * We dispatch two tasks from a single ops.dispatch(): + * - First, a local task to this CPU's local DSQ; + * - Second, a local/remote task to a remote CPU's local DSQ. + * We must drop the local rq lock in order to finish the second + * dispatch. In that time, an RT task can wake up on the local rq. + * + * Example 2: + * + * We dispatch a local/remote task to a remote CPU's local DSQ. + * We must drop the remote rq lock before the dispatched task can run, + * which gives an RT task an opportunity to wake up on the remote rq. + * + * Both examples work the same if we replace dispatching with moving + * the tasks from a user-created DSQ. + * + * We must detect these wakeups so that we can re-enqueue IMMED tasks + * from @rq's local DSQ. scx_wakeup_preempt() serves exactly this + * purpose, but for it to be invoked, we must ensure that we bump + * @rq->next_class to &ext_sched_class if it's currently idle. + * + * wakeup_preempt() does the bumping, and since we only invoke it if + * @rq->next_class is below &ext_sched_class, it will also + * resched_curr(rq). + */ + if (sched_class_above(p->sched_class, rq->next_class)) + wakeup_preempt(rq, p, 0); + /* * If @rq is in balance, the CPU is already vacant and looking for the * next task to run. No need to preempt or trigger resched after moving * @p into its local DSQ. + * Note that the wakeup_preempt() above may have already triggered + * a resched if @rq->next_class was idle. It's harmless, since + * need_resched is cleared immediately after task pick. */ if (rq->scx.flags & SCX_RQ_IN_BALANCE) return; @@ -1417,11 +1454,8 @@ static void local_dsq_post_enq(struct scx_sched *sch, struct scx_dispatch_q *dsq if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr && rq->curr->sched_class == &ext_sched_class) { rq->curr->scx.slice = 0; - preempt = true; - } - - if (preempt || sched_class_above(&ext_sched_class, rq->curr->sched_class)) resched_curr(rq); + } } static void dispatch_enqueue(struct scx_sched *sch, struct rq *rq, -- cgit v1.2.3 From 55ce1858848132ed074fe907f00b5ce1ccab0ce1 Mon Sep 17 00:00:00 2001 From: Damien Dejean Date: Tue, 14 Apr 2026 13:38:58 +0000 Subject: HID: elan: Add support for ELAN SB974D touchpad MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Elan SB974D touchpad uses ELAN_MT_I2C format to send HID reports. Add an entry to match for the device and parse its vendor specific format. Signed-off-by: Damien Dejean Signed-off-by: Kornel Dulęba Signed-off-by: Jiri Kosina --- drivers/hid/hid-elan.c | 1 + drivers/hid/hid-ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c index 76d93fc48f6a..0190ad567ce4 100644 --- a/drivers/hid/hid-elan.c +++ b/drivers/hid/hid-elan.c @@ -513,6 +513,7 @@ static const struct hid_device_id elan_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2_10_COVER), .driver_data = ELAN_HAS_LED }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_TOSHIBA_CLICK_L9W) }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_SB974D) }, { } }; MODULE_DEVICE_TABLE(hid, elan_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0cf63742315b..8cfec7dced66 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -455,6 +455,7 @@ #define USB_DEVICE_ID_EDIFIER_QR30 0xa101 /* EDIFIER Hal0 2.0 SE */ #define USB_VENDOR_ID_ELAN 0x04f3 +#define USB_DEVICE_ID_SB974D 0x0400 #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 -- cgit v1.2.3 From 3524900cc571bd922a1a6b6a0eb0c2705cdb3559 Mon Sep 17 00:00:00 2001 From: Matthew Schwartz Date: Mon, 20 Apr 2026 11:15:22 -0700 Subject: HID: hid-lenovo-go-s: restore OS_TYPE after resume from s2idle The controller MCU does not persist OS_TYPE across power cycles. During s2idle resume, the USB device may be power-cycled, causing the OS_TYPE setting to revert to the default Windows value. Add a reset_resume callback so that this is correctly restored after resume. Fixes: a23f3497bf208c59ad ("HID: hid-lenovo-go-s: Add Lenovo Legion Go S Series HID Driver") Reviewed-by: Derek J. Clark Signed-off-by: Matthew Schwartz Signed-off-by: Jiri Kosina --- drivers/hid/hid-lenovo-go-s.c | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/hid/hid-lenovo-go-s.c b/drivers/hid/hid-lenovo-go-s.c index 01c7bdd4fbe0..ff1782a75191 100644 --- a/drivers/hid/hid-lenovo-go-s.c +++ b/drivers/hid/hid-lenovo-go-s.c @@ -1369,6 +1369,14 @@ static void cfg_setup(struct work_struct *work) "Failed to retrieve IMU Manufacturer: %i\n", ret); return; } + + ret = mcu_property_out(drvdata.hdev, GET_GAMEPAD_CFG, FEATURE_OS_MODE, + NULL, 0); + if (ret) { + dev_err(&drvdata.hdev->dev, + "Failed to retrieve OS Mode: %i\n", ret); + return; + } } static int hid_gos_cfg_probe(struct hid_device *hdev, @@ -1427,6 +1435,27 @@ static void hid_gos_cfg_remove(struct hid_device *hdev) hid_set_drvdata(hdev, NULL); } +static int hid_gos_cfg_reset_resume(struct hid_device *hdev) +{ + u8 os_mode = drvdata.os_mode; + int ret; + + ret = mcu_property_out(drvdata.hdev, SET_GAMEPAD_CFG, + FEATURE_OS_MODE, &os_mode, 1); + if (ret < 0) + return ret; + + ret = mcu_property_out(drvdata.hdev, GET_GAMEPAD_CFG, + FEATURE_OS_MODE, NULL, 0); + if (ret < 0) + return ret; + + if (drvdata.os_mode != os_mode) + return -ENODEV; + + return 0; +} + static int hid_gos_probe(struct hid_device *hdev, const struct hid_device_id *id) { @@ -1481,6 +1510,20 @@ static void hid_gos_remove(struct hid_device *hdev) } } +static int hid_gos_reset_resume(struct hid_device *hdev) +{ + int ep = get_endpoint_address(hdev); + + switch (ep) { + case GO_S_CFG_INTF_IN: + return hid_gos_cfg_reset_resume(hdev); + default: + break; + } + + return 0; +} + static const struct hid_device_id hid_gos_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_QHE, USB_DEVICE_ID_LENOVO_LEGION_GO_S_XINPUT) }, @@ -1496,6 +1539,7 @@ static struct hid_driver hid_lenovo_go_s = { .probe = hid_gos_probe, .remove = hid_gos_remove, .raw_event = hid_gos_raw_event, + .reset_resume = hid_gos_reset_resume, }; module_hid_driver(hid_lenovo_go_s); -- cgit v1.2.3 From ae4ac077332ea3341a0f4c0973556c6b7ac5b7a1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Apr 2026 10:10:02 +0300 Subject: HID: intel-thc-hid: Intel-quickspi: Fix some error codes If we have a partial read that is supposed to be treated as failure but in this code we forgot to set the error code. Return -EINVAL. Fixes: 9d8d51735a3a ("HID: intel-thc-hid: intel-quickspi: Add HIDSPI protocol implementation") Signed-off-by: Dan Carpenter Reviewed-by: Even Xu Reviewed-by: Mark Pearson Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index 16f780bc879b..cb19057f1191 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -94,7 +94,7 @@ static int quickspi_get_device_descriptor(struct quickspi_device *qsdev) dev_err_once(qsdev->dev, "Read DEVICE_DESCRIPTOR failed, ret = %d\n", ret); dev_err_once(qsdev->dev, "DEVICE_DESCRIPTOR expected len = %u, actual read = %u\n", input_len, read_len); - return ret; + return ret ?: -EINVAL; } input_rep_type = ((struct input_report_body_header *)read_buf)->input_report_type; @@ -318,7 +318,7 @@ int reset_tic(struct quickspi_device *qsdev) dev_err_once(qsdev->dev, "Read RESET_RESPONSE body failed, ret = %d\n", ret); dev_err_once(qsdev->dev, "RESET_RESPONSE body expected len = %u, actual = %u\n", read_len, actual_read_len); - return ret; + return ret ?: -EINVAL; } input_rep_type = FIELD_GET(HIDSPI_IN_REP_BDY_HDR_REP_TYPE, reset_response); -- cgit v1.2.3 From 487359284509a6745e14b8c0518768bc277809b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Apr 2026 10:33:16 +0200 Subject: HID: uclogic: Fix regression of input name assignment The previous fix for adding the devm_kasprintf() return check in the commit bd07f751208b ("HID: uclogic: Add NULL check in uclogic_input_configured()") changed the condition of hi->input->name assignment, and it resulted in missing the proper input device name when no custom suffix is defined. Restore the conditional to the original content to address the regression. Fixes: bd07f751208b ("HID: uclogic: Add NULL check in uclogic_input_configured()") Signed-off-by: Takashi Iwai Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index bd7f93e96e4e..b73f09d26688 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -184,7 +184,9 @@ static int uclogic_input_configured(struct hid_device *hdev, suffix = "System Control"; break; } - } else { + } + + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); if (!hi->input->name) -- cgit v1.2.3 From d99f7a32f09dccbe396187370ec1a74a31b73d7e Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Wed, 29 Apr 2026 01:36:12 +0800 Subject: sched_ext: Fix scx_flush_disable_work() UAF race scx_flush_disable_work() calls irq_work_sync() followed by kthread_flush_work() to ensure that the disable kthread work has fully completed before bpf_scx_unreg() frees the SCX scheduler. However, a concurrent scx_vexit() (e.g., triggered by a watchdog stall) creates a race window between scx_claim_exit() and irq_work_queue(): CPU A (scx_vexit (watchdog)) CPU B (bpf_scx_unreg) ---- ---- scx_claim_exit() atomic_try_cmpxchg(NONE->kind) stack_trace_save() vscnprintf() scx_disable() scx_claim_exit() -> FAIL scx_flush_disable_work() irq_work_sync() // no-op: not queued yet kthread_flush_work() // no-op: not queued yet kobject_put(&sch->kobj) -> free %sch irq_work_queue() -> UAF on %sch scx_disable_irq_workfn() kthread_queue_work() -> UAF The root cause is that CPU B's scx_flush_disable_work() returns after syncing an irq_work that has not yet been queued, while CPU A is still executing the code between scx_claim_exit() and irq_work_queue(). Loop until exit_kind reaches SCX_EXIT_DONE or SCX_EXIT_NONE, draining disable_irq_work and disable_work in each pass. This ensures that any work queued after the previous check is caught, while also correctly handling cases where no disable was triggered (e.g., the scx_sub_enable_workfn() abort path). Fixes: 510a27055446 ("sched_ext: sync disable_irq_work in bpf_scx_unreg()") Reported-by: https://sashiko.dev/#/patchset/20260424100221.32407-1-icheng%40nvidia.com Suggested-by: Tejun Heo Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index cac0b18239fe..9483be03a4ca 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6039,8 +6039,13 @@ static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind) */ static void scx_flush_disable_work(struct scx_sched *sch) { - irq_work_sync(&sch->disable_irq_work); - kthread_flush_work(&sch->disable_work); + int kind; + + do { + irq_work_sync(&sch->disable_irq_work); + kthread_flush_work(&sch->disable_work); + kind = atomic_read(&sch->exit_kind); + } while (kind != SCX_EXIT_NONE && kind != SCX_EXIT_DONE); } static void dump_newline(struct seq_buf *s) -- cgit v1.2.3 From c4cca236968683eb0d59abfb12d5c7e4d8514227 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Apr 2026 12:39:51 -0500 Subject: ipmi: Add limits to event and receive message requests The driver would just fetch events and receive messages until the BMC said it was done. To avoid issues with BMCs that never say they are done, add a limit of 10 fetches at a time. In addition, an si interface has an attn state it can return from the hardware which is supposed to cause a flag fetch to see if the driver needs to fetch events or message or a few other things. If the attn bit gets stuck, it's a similar problem. So allow messages in between flag fetches so the driver itself doesn't get stuck. This is a more general fix than the previous fix for the specific bad BMC, but should fix the more general issue of a BMC that won't stop saying it has data. This has been there from the beginning of the driver. It's not a bug per-se, but it is accounting for bugs in BMCs. Reported-by: Matt Fleming Closes: https://lore.kernel.org/lkml/20260415115930.3428942-1-matt@readmodwrite.com/ Fixes: <1da177e4c3f4> ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 54 ++++++++++++++++++++++++++++++++-------- drivers/char/ipmi/ipmi_ssif.c | 23 +++++++++++++++-- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 08c208cc64c5..7c3c463e08da 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -168,6 +168,10 @@ struct smi_info { OEM2_DATA_AVAIL) unsigned char msg_flags; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + bool last_was_flag_fetch; + /* Does the BMC have an event buffer? */ bool has_event_buffer; @@ -410,7 +414,10 @@ static void start_getting_msg_queue(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_MESSAGES; + if (smi_info->si_state != SI_GETTING_MESSAGES) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_MESSAGES; + } } static void start_getting_events(struct smi_info *smi_info) @@ -421,7 +428,10 @@ static void start_getting_events(struct smi_info *smi_info) start_new_msg(smi_info, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - smi_info->si_state = SI_GETTING_EVENTS; + if (smi_info->si_state != SI_GETTING_EVENTS) { + smi_info->num_requests_in_a_row = 0; + smi_info->si_state = SI_GETTING_EVENTS; + } } /* @@ -595,6 +605,7 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->si_state = SI_NORMAL; } else { smi_info->msg_flags = msg[3]; + smi_info->last_was_flag_fetch = true; handle_flags(smi_info); } break; @@ -646,6 +657,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, events); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -684,6 +700,11 @@ static void handle_transaction_done(struct smi_info *smi_info) } else { smi_inc_stat(smi_info, incoming_messages); + smi_info->num_requests_in_a_row++; + if (smi_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + /* * Do this before we deliver the message * because delivering the message releases the @@ -825,6 +846,26 @@ restart: goto out; } + /* + * If we are currently idle, or if the last thing that was + * done was a flag fetch and there is a message pending, try + * to start the next message. + * + * We do the waiting message check to avoid a stuck flag + * completely wedging the driver. Let a message through + * in between flag operations if that happens. + */ + if (si_sm_result == SI_SM_IDLE || + (si_sm_result == SI_SM_ATTN && smi_info->waiting_msg && + smi_info->last_was_flag_fetch)) { + smi_info->last_was_flag_fetch = false; + smi_inc_stat(smi_info, idles); + + si_sm_result = start_next_msg(smi_info); + if (si_sm_result != SI_SM_IDLE) + goto restart; + } + /* * We prefer handling attn over new messages. But don't do * this if there is not yet an upper layer to handle anything. @@ -852,15 +893,6 @@ restart: } } - /* If we are currently idle, try to start the next message. */ - if (si_sm_result == SI_SM_IDLE) { - smi_inc_stat(smi_info, idles); - - si_sm_result = start_next_msg(smi_info); - if (si_sm_result != SI_SM_IDLE) - goto restart; - } - if ((si_sm_result == SI_SM_IDLE) && (atomic_read(&smi_info->req_events))) { /* diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index b49500a1bd36..f3798f4e6a63 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -225,6 +225,9 @@ struct ssif_info { bool has_event_buffer; bool supports_alert; + /* When requesting events and messages, don't do it forever. */ + unsigned int num_requests_in_a_row; + /* * Used to tell what we should do with alerts. If we are * waiting on a response, read the data immediately. @@ -413,7 +416,10 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags) } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_EVENTS; + if (ssif_info->ssif_state != SSIF_GETTING_EVENTS) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_EVENTS; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -436,7 +442,10 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info, } ssif_info->curr_msg = msg; - ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + if (ssif_info->ssif_state != SSIF_GETTING_MESSAGES) { + ssif_info->num_requests_in_a_row = 0; + ssif_info->ssif_state = SSIF_GETTING_MESSAGES; + } ipmi_ssif_unlock_cond(ssif_info, flags); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); @@ -843,6 +852,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; + handle_flags(ssif_info, flags); ssif_inc_stat(ssif_info, events); deliver_recv_msg(ssif_info, msg); @@ -876,6 +890,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; handle_flags(ssif_info, flags); } else { + ssif_info->num_requests_in_a_row++; + if (ssif_info->num_requests_in_a_row > 10) + /* Stop if we do this too many times. */ + ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL; + ssif_inc_stat(ssif_info, incoming_messages); handle_flags(ssif_info, flags); deliver_recv_msg(ssif_info, msg); -- cgit v1.2.3 From 09dd798270ff582d7309f285d4aaf5dbebae01cb Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Apr 2026 13:02:18 -0500 Subject: ipmi:si: Return state to normal if message allocation fails There were places where nothing would get started if a message allocation failed, so the driver needs to return to normal state. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 7c3c463e08da..9a9d12be9bf7 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -497,15 +497,19 @@ retry: } else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) { /* Messages available. */ smi_info->curr_msg = alloc_msg_handle_irq(smi_info); - if (!smi_info->curr_msg) + if (!smi_info->curr_msg) { + smi_info->si_state = SI_NORMAL; return; + } start_getting_msg_queue(smi_info); } else if (smi_info->msg_flags & EVENT_MSG_BUFFER_FULL) { /* Events available. */ smi_info->curr_msg = alloc_msg_handle_irq(smi_info); - if (!smi_info->curr_msg) + if (!smi_info->curr_msg) { + smi_info->si_state = SI_NORMAL; return; + } start_getting_events(smi_info); } else if (smi_info->msg_flags & OEM_DATA_AVAIL && -- cgit v1.2.3 From a8aebe93a4938c0ca1941eeaae821738f869be3d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 21 Apr 2026 06:50:22 -0500 Subject: ipmi:ssif: NULL thread on error Cleanup code was checking the thread for NULL, but it was possibly a PTR_ERR() in one spot. Spotted with static analysis. Link: https://sourceforge.net/p/openipmi/mailman/message/59324676/ Fixes: 75c486cb1bca ("ipmi:ssif: Clean up kthread on errors") Cc: # 91eb7ec72612: ipmi:ssif: Remove unnecessary indention Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f3798f4e6a63..f419b46bf002 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1905,6 +1905,7 @@ static int ssif_probe(struct i2c_client *client) "kssif%4.4x", thread_num); if (IS_ERR(ssif_info->thread)) { rv = PTR_ERR(ssif_info->thread); + ssif_info->thread = NULL; dev_notice(&ssif_info->client->dev, "Could not start kernel thread: error %d\n", rv); -- cgit v1.2.3 From 3b75dd76e64a04771861bb5647951c264919e563 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 20 Apr 2026 06:25:09 -0700 Subject: tracing: branch: Fix inverted check on stat tracer registration init_annotated_branch_stats() and all_annotated_branch_stats() check the return value of register_stat_tracer() with "if (!ret)", but register_stat_tracer() returns 0 on success and a negative errno on failure. The inverted check causes the warning to be printed on every successful registration, e.g.: Warning: could not register annotated branches stats while leaving real failures silent. The initcall also returned a hard-coded 1 instead of the actual error. Invert the check and propagate ret so that the warning fires on real errors and the initcall reports the correct status. Cc: Mathieu Desnoyers Cc: Ingo Molnar Cc: Frederic Weisbecker Link: https://patch.msgid.link/20260420-tracing-v1-1-d8f4cd0d6af1@debian.org Fixes: 002bb86d8d42 ("tracing/ftrace: separate events tracing and stats tracing engine") Signed-off-by: Breno Leitao Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 6809b370e991..d1564db95a8f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -373,10 +373,10 @@ __init static int init_annotated_branch_stats(void) int ret; ret = register_stat_tracer(&annotated_branch_stats); - if (!ret) { + if (ret) { printk(KERN_WARNING "Warning: could not register " "annotated branches stats\n"); - return 1; + return ret; } return 0; } @@ -438,10 +438,10 @@ __init static int all_annotated_branch_stats(void) int ret; ret = register_stat_tracer(&all_branch_stats); - if (!ret) { + if (ret) { printk(KERN_WARNING "Warning: could not register " "all branches stats\n"); - return 1; + return ret; } return 0; } -- cgit v1.2.3 From 5ec07d5204b4544271f32f6261ee097fe53cb081 Mon Sep 17 00:00:00 2001 From: Sheng Che Peng Date: Wed, 22 Apr 2026 10:18:19 +0800 Subject: tracepoint: Fix typo in tracepoint.h comment Change "my" to "may" in the description of subsystem configurations. Link: https://patch.msgid.link/20260422021819.1788091-1-synte4028@gmail.com Signed-off-by: Sheng Che Peng Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 578e520b6ee6..763eea4d80d8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -202,7 +202,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define TP_CONDITION(args...) args /* - * Individual subsystem my have a separate configuration to + * Individual subsystem may have a separate configuration to * enable their tracepoints. By default, this file will create * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created -- cgit v1.2.3 From 927011b65a875302d08709bbe82eaf4d0d96c5d5 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 Apr 2026 22:49:41 -0400 Subject: drm/amdgpu: fix build for CONFIG_DRM_FBDEV_EMULATION=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge-commit 02e778f12359 ("Merge tag 'amd-drm-next-7.1-2026-03-12' of https://gitlab.freedesktop.org/agd5f/linux into drm-next") removes the stub for drm_fb_helper_gem_is_fb(), so the buld gets broken if DRM_FBDEV_EMULATION is not set. ‘drm_fb_helper_gem_is_fb’; did you mean ‘drm_fb_helper_from_client’? [-Wimplicit-function-declaration] 1777 | if (!drm_fb_helper_gem_is_fb(dev->fb_helper, fb->obj[0])) { | ^~~~~~~~~~~~~~~~~~~~~~~ | drm_fb_helper_from_client Restore it. Fixes: 02e778f12359 ("Merge tag 'amd-drm-next-7.1-2026-03-12' of https://gitlab.freedesktop.org/agd5f/linux into drm-next") Reviewed-by: Thomas Zimmermann Signed-off-by: Yury Norov Signed-off-by: Alex Deucher (cherry picked from commit 7b81bc38e92c2522484c42671401eaa023ae8831) --- include/drm/drm_fb_helper.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index bf391903443d..0c5e5ed7b5e7 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -273,6 +273,12 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper); bool drm_fb_helper_gem_is_fb(const struct drm_fb_helper *fb_helper, const struct drm_gem_object *obj); +#else +static inline bool drm_fb_helper_gem_is_fb(const struct drm_fb_helper *fb_helper, + const struct drm_gem_object *obj) +{ + return false; +} #endif #endif -- cgit v1.2.3 From d2f272a36e1b4b857165021cfb2689a92efff2f5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 20 Apr 2026 16:08:35 +0200 Subject: drm/amdgpu: rework userq fence signal processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move more code into a common userq function. Signed-off-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 12f52fab11500d0dce7d23c71909eaf0cf9aa701) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +--------- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 10 +--------- drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 11 +---------- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 11 +---------- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 11 +---------- 7 files changed, 19 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index a15ca3e35344..d9b9c03267c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -205,6 +205,19 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) msecs_to_jiffies(timeout_ms)); } +void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags; + + xa_lock_irqsave(xa, flags); + queue = xa_load(xa, doorbell); + if (queue) + amdgpu_userq_fence_driver_process(queue->fence_drv); + xa_unlock_irqrestore(xa, flags); +} + static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue) { INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 675fe6395ac8..8b8f345b60b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -156,6 +156,7 @@ void amdgpu_userq_reset_work(struct work_struct *work); void amdgpu_userq_pre_reset(struct amdgpu_device *adev); int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost); void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue); +void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell); int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8c82e90f871b..d40ab1e95480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6523,15 +6523,7 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct amdgpu_usermode_queue *queue; - struct xarray *xa = &adev->userq_doorbell_xa; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 65c33823a688..0e0b1e5b88fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4854,15 +4854,7 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct xarray *xa = &adev->userq_doorbell_xa; - struct amdgpu_usermode_queue *queue; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c index 68fd3c04134d..68db1bc73bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -3643,16 +3643,7 @@ static int gfx_v12_1_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct xarray *xa = &adev->userq_doorbell_xa; - struct amdgpu_usermode_queue *queue; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 0f530bb8a9a3..8ca46e1e474e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1662,17 +1662,8 @@ static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev, u32 doorbell_offset = entry->src_data[0]; if (adev->enable_mes && doorbell_offset) { - struct amdgpu_usermode_queue *queue; - struct xarray *xa = &adev->userq_doorbell_xa; - unsigned long flags; - doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 9ed817b69a3b..37191e2918d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1594,17 +1594,8 @@ static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev, u32 doorbell_offset = entry->src_data[0]; if (adev->enable_mes && doorbell_offset) { - struct xarray *xa = &adev->userq_doorbell_xa; - struct amdgpu_usermode_queue *queue; - unsigned long flags; - doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } return 0; -- cgit v1.2.3 From ec3e3976f626d9845a228d78d8a371ddc18edec8 Mon Sep 17 00:00:00 2001 From: Gaghik Khachatrian Date: Mon, 13 Apr 2026 11:11:52 -0400 Subject: drm/amd/display: Update MCIF_ADDR macro to address IGT DWB regression [Why] A previous warning-fix commit updated type casts in the DCN3 mmhubbub code but missed updating the MCIF_ADDR macro to the correct, fully parenthesized and casted version. This caused a regression during DWB tests, where address values could be misinterpreted, potentially leading to incorrect hardware programming. [How] Updated the MCIF_ADDR macro in dcn30_mmhubbub.c to use the proper parenthesization and type casting, ensuring correct address handling. Removed redundant casts from REG_UPDATE calls for improved clarity and consistency with current coding standards. Fixes: f4cdbb5d5405 ("drm/amd/display: Fix implicit narrowing conversion warnings") Reviewed-by: Clayton King Signed-off-by: Gaghik Khachatrian Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 4f251a5e9f2297023b00b7cab606de111931cfa3) --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c index 6f2a0d5d963b..62fe5c3b18dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c @@ -40,8 +40,8 @@ #define FN(reg_name, field_name) \ mcif_wb30->mcif_wb_shift->field_name, mcif_wb30->mcif_wb_mask->field_name -#define MCIF_ADDR(addr) (((unsigned long long)addr & 0xffffffffff) + 0xFE) >> 8 -#define MCIF_ADDR_HIGH(addr) (unsigned long long)addr >> 40 +#define MCIF_ADDR(addr) ((uint32_t)((((unsigned long long)(addr) & 0xffffffffffULL) + 0xFEULL) >> 8)) +#define MCIF_ADDR_HIGH(addr) ((uint32_t)(((unsigned long long)(addr)) >> 40)) /* wbif programming guide: * 1. set up wbif parameter: -- cgit v1.2.3 From d6b99885b122528651d554a7bd907211a81579c2 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 27 Apr 2026 17:17:41 +0530 Subject: drm/amd/pm: Update emit clock logic If only one level is enabled in clock table, there is no need to follow the fine grained clock logic which expects a minimum of two levels (min/max). Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher (cherry picked from commit 7f19097af1496dd908a044ca95862f32d05f02df) --- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 3d49e58794d2..90c7127beabf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1370,7 +1370,7 @@ int smu_cmn_print_dpm_clk_levels(struct smu_context *smu, level_index = 1; } - if (!is_fine_grained) { + if (!is_fine_grained || count == 1) { for (i = 0; i < count; i++) { freq_match = !is_deep_sleep && smu_cmn_freqs_match( -- cgit v1.2.3 From 31bc64e87f5f3d9ccbb7e625d570cfd8f52c77fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 23 Apr 2026 12:29:03 -0400 Subject: drm/amd/display: properly handle family setting for early GC 11.5.4 Early variants need an override. Fixes: 57d00816c6a9 ("drm/amdgpu: set family for GC 11.5.4") Cc: Pratik Vishwakarma Cc: Roman Li Cc: Mario Limonciello Reviewed-by: Mario Limonciello (AMD) Tested-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 922fccc2d3f8186008c19ba08a49ae8a9463cb50) --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 +--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index fcad7daaa41b..8d99bfaa498f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -3090,10 +3090,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): - adev->family = AMDGPU_FAMILY_GC_11_5_0; - break; case IP_VERSION(11, 5, 4): - adev->family = AMDGPU_FAMILY_GC_11_5_4; + adev->family = AMDGPU_FAMILY_GC_11_5_0; break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e96a12ff2d31..f8f9953565f6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1903,7 +1903,11 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - init_data.asic_id.chip_family = adev->family; + /* special handling for early revisions of GC 11.5.4 */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) + init_data.asic_id.chip_family = AMDGPU_FAMILY_GC_11_5_4; + else + init_data.asic_id.chip_family = adev->family; init_data.asic_id.pci_revision_id = adev->pdev->revision; init_data.asic_id.hw_internal_rev = adev->external_rev_id; -- cgit v1.2.3 From 8d80b293b41fcb5e9396db93e788b0f4ebcbafb7 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:35 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v2.0 enc/dec rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 1b61de45dfaf ("drm/amdgpu: add initial VCN2.0 support (v2)") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit e2b5499fca55f1a32960a311bbb62e35891eaf73) --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e35fae9cdaf6..0442bfcfd384 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2113,6 +2113,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, @@ -2145,6 +2146,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, -- cgit v1.2.3 From 4f317863a3ab212a027d8c8c3cc3af4e3fb95704 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:35 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v2.5 enc/dec rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 28c17d72072b ("drm/amdgpu: add VCN2.5 basic supports") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit efc9dd5590894109bce9a0bfe1fa5592dd6b20b1) --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 006a15451197..8b8184fe6764 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1778,6 +1778,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, @@ -1879,6 +1880,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v2_5_enc_ring_get_rptr, .get_wptr = vcn_v2_5_enc_ring_get_wptr, .set_wptr = vcn_v2_5_enc_ring_set_wptr, -- cgit v1.2.3 From f1e5a6660d7cbf006079126d9babbf0ccf538c6b Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:35 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v3.0 enc/dec rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: cf14826cdfb5 ("drm/amdgpu: add VCN3.0 support for Sienna_Cichlid") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 663bed3c7b8b9a7624b0d95d300ddae034ad0614) --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 6fb4fcdbba4f..4924da5af5e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1856,6 +1856,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -2036,6 +2037,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -2138,6 +2140,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v3_0_enc_ring_get_rptr, .get_wptr = vcn_v3_0_enc_ring_get_wptr, .set_wptr = vcn_v3_0_enc_ring_set_wptr, -- cgit v1.2.3 From 51f694221047c84fa185be98210eb2c354ffb8c6 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v4.0 enc ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 8da1170a16e4 ("drm/amdgpu: add VCN4 ip block support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit fd852c048b46f9825e904a4f3f4538fe9d8827d9) --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 5dec92691f73..bbdd017cbafb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1994,6 +1994,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, -- cgit v1.2.3 From 4532b52b34e4e4310386e6fdf6a643368599f522 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v4.0.3 enc ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: b889ef4ac988 ("drm/amdgpu/vcn: add vcn support for VCN4_0_3") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit ff1a5a125c5a70c328806b9bc01d7d942cf3f9aa) --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index ff3013b97abd..10e8fc2821f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1775,6 +1775,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v4_0_3_unified_ring_get_rptr, .get_wptr = vcn_v4_0_3_unified_ring_get_wptr, .set_wptr = vcn_v4_0_3_unified_ring_set_wptr, -- cgit v1.2.3 From 589a254bf3e88204c8402b9cbccd5e23a0af990f Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v4.0.5 enc ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 547aad32edac ("drm/amdgpu: add VCN4 ip block support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 084d94ac93707bdda07efb5cee786f632de4219b) --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 1f6a22983c0d..1571cc5a148c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1483,6 +1483,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, -- cgit v1.2.3 From 8cae0ce77de492d7c31c1532a2e80c0c6e7e58cb Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v5.0.0 enc ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: b6d1a0632051 ("drm/amdgpu: add VCN_5_0_0 IP block support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 49b1fbbb5a071197ee71e2d70959b1cb29bdc317) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 6109124f852e..d5f49fa33bee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1207,6 +1207,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_0_unified_ring_get_rptr, .get_wptr = vcn_v5_0_0_unified_ring_get_wptr, .set_wptr = vcn_v5_0_0_unified_ring_set_wptr, -- cgit v1.2.3 From 8f4954722eab88e10c4ea0c0d3b1269c31421d3a Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v5.0.1 enc ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 346492f30ce3 ("drm/amdgpu: Add VCN_5_0_1 support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit e16be95a2c3ee712b142cb27d2dca0b461181359) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index c28c6aff17aa..54fbf8d73ca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -1419,6 +1419,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, -- cgit v1.2.3 From ed9d2832b09eecfe6055833c925d586ce0dda70a Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: drm/amdgpu/vcn: set no_user_fence for VCN v5.0.2 enc ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCN encoder and decoder rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 8433398c789c ("drm/amdgpu: Add VCN v5_0_2") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 48fc78c31ea7fec63100a772f863cf51b2f8cd0a) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c index c3d3cc023058..bbc172db91a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c @@ -994,6 +994,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_2_unified_ring_get_rptr, .get_wptr = vcn_v5_0_2_unified_ring_get_wptr, .set_wptr = vcn_v5_0_2_unified_ring_set_wptr, -- cgit v1.2.3 From e5f612dc91650561fe2b5b76dd6d2898ec9ad480 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:10 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v2.0 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 6ac27241106b ("drm/amdgpu: add JPEG v2.0 function supports") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 96179da0c6b059eb31706a0abe8dd6381c533143) --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 9fe8d10ab270..cffb1e6bab35 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -802,6 +802,7 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, -- cgit v1.2.3 From 79405e774ede411c6b47ed41c651e40b92de64a2 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:10 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v2.5 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 14f43e8f88c5 ("drm/amdgpu: move JPEG2.5 out from VCN2.5") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 3216a7f4e2642bda5fd14f57586e835ae9202587) --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 20983f126b49..13a6e24c624a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -693,6 +693,7 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, @@ -724,6 +725,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, -- cgit v1.2.3 From a2baf12eec41f246689e6a3f8619af1200031576 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:10 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v3.0 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: dfd57dbf44dd ("drm/amdgpu: add JPEG3.0 support for Sienna_Cichlid") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 4d7d774f100efb5089c86a1fb8c5bf47c63fc9ef) --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 98f5e0622bc5..d0445df39d2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -594,6 +594,7 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, -- cgit v1.2.3 From e7e90b5839aeb8805ec83bb4da610b8dab8e184d Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v4.0 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: b13111de32a9 ("drm/amdgpu/jpeg: add jpeg support for VCN4_0_0") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 8d0cac9478a3f046279c657d6a2545de49ae675a) --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 0bd83820dd20..6fd4238a8471 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -759,6 +759,7 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, -- cgit v1.2.3 From 83e37c0987ca92f9e87789b46dd311dcf5a4a6c8 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v4.0.3 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: e684e654eba9 ("drm/amdgpu/jpeg: add jpeg support for VCN4_0_3") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 2f6afc97d259d530f4f86c7743efbc573a8da927) --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 82abe181c730..0c746580de11 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1219,6 +1219,7 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, -- cgit v1.2.3 From b65b7f3f3c18f797f81a2af7c97e2079900ad6db Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v4.0.5 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 8f98a715da8e ("drm/amdgpu/jpeg: add jpeg support for VCN4_0_5") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit f05d0a4f21fc720116d6e238f23308b199891058) --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 54fd9c800c40..a43582b9c876 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -804,6 +804,7 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, -- cgit v1.2.3 From ea7c61c5f895e8f9ea0ffffa180498ef9c740152 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v5.0.0 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: dfad65c65728 ("drm/amdgpu: Add JPEG5 support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 0f43893d3cd478fa57836697525b338817c9c23d) --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 46bf15dce2bd..72a4b2d0676f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -680,6 +680,7 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, -- cgit v1.2.3 From 2f8e3da71a1b469b6e157aa3972f1448b3157840 Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v5.0.1 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: b8f57b69942b ("drm/amdgpu: Add JPEG5_0_1 support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 742a98e2e81702df8fe1b1eccee5223220a03dc2) --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index edecbfe66c79..250316704dfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -884,6 +884,7 @@ static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, -- cgit v1.2.3 From 8068519c7e78819f88e1c08fe027efd5e468609d Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v5.0.2 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 855e3e19f69c ("drm/amdgpu: Add JPEG_v5_0_2 IP block") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 4ec1c402fb0fb39511136c5fc874788542c476bc) --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c index 285c459379c4..7a4ecea6b39a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c @@ -703,6 +703,7 @@ static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_2_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_2_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_2_dec_ring_set_wptr, -- cgit v1.2.3 From 3b0ea2021351b6b813b34fac940957f1f4fad85b Mon Sep 17 00:00:00 2001 From: Yinjie Yao Date: Mon, 27 Apr 2026 11:46:11 -0400 Subject: drm/amdgpu/jpeg: set no_user_fence for JPEG v5.3.0 ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JPEG rings do not support 64-bit user fence writes, reject CS submissions with user fences. Fixes: 4aeaf3cbfa9f ("drm/amdgpu/jpeg: Add jpeg 5.3.0 support") Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Yinjie Yao Signed-off-by: Alex Deucher (cherry picked from commit 86ac011ae234c03fb872f4945913391ea1d8862e) --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c index 1821dced936f..e7546816baba 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c @@ -661,6 +661,7 @@ static const struct amd_ip_funcs jpeg_v5_3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_3_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_3_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_3_0_dec_ring_set_wptr, -- cgit v1.2.3 From 8f935acbc18ff7ad09cb812528b28c59c78f10f9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 27 Apr 2026 20:06:57 +0800 Subject: drm/amdgpu: clean up the userq unmap error handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_userq_unmap_helper() already handles the unmap error case. Signed-off-by: Prike Liang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 66cb6579990b633ccc7300c27011d837b9a58da0) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index d9b9c03267c0..de140a8ed135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -656,12 +656,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que #endif amdgpu_userq_detect_and_reset_queues(uq_mgr); r = amdgpu_userq_unmap_helper(queue); - /*TODO: It requires a reset for userq hw unmap error*/ - if (r) { - drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); - queue->state = AMDGPU_USERQ_STATE_HUNG; - } - atomic_dec(&uq_mgr->userq_count[queue->queue_type]); amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); -- cgit v1.2.3 From 47a5dfc8add4e60ff1ddc312f79998e70cbb0c09 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 27 Apr 2026 12:53:30 +0530 Subject: drm/amd/pm: Add fine grained flag to SMU v13.0.6 Gfx clock is fine grained on SMU v13.0.6/12 SOCs. Add the flag to report clock frequencies correctly. Fixes: 7380228401c4 ("drm/amd/pm: Use generic dpm table for SMUv13 SOCs") Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher (cherry picked from commit d4871d837bbf70173f63426a84fa80b39e408b9e) --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index cd0a23f432ff..0df8c05a7fce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1129,6 +1129,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) /* gfxclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.gfx_table; dpm_table->clk_type = SMU_GFXCLK; + dpm_table->flags = SMU_DPM_TABLE_FINE_GRAINED; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { /* In the case of gfxclk, only fine-grained dpm is honored. * Get min/max values from FW. -- cgit v1.2.3 From e6e9faba8100628990cccd13f0f044a648c303cf Mon Sep 17 00:00:00 2001 From: Benjamin Cheng Date: Mon, 13 Apr 2026 09:22:15 -0400 Subject: drm/amdgpu/vcn3: Avoid overflow on msg bound check As pointed out by SDL, the previous condition may be vulnerable to overflow. Fixes: b193019860d6 ("drm/amdgpu/vcn3: Prevent OOB reads when parsing dec msg") Cc: SDL Signed-off-by: Benjamin Cheng Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit db00257ac9e4a51eb2515aaea161a019f7125e10) --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 4924da5af5e7..81bba3ec2a93 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1973,6 +1973,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { uint32_t offset, size, *create; + uint64_t buf_end; if (msg[0] != RDECODE_MESSAGE_CREATE) continue; @@ -1980,7 +1981,8 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, offset = msg[1]; size = msg[2]; - if (size < 4 || offset + size > end - addr) { + if (size < 4 || check_add_overflow(offset, size, &buf_end) || + buf_end > end - addr) { DRM_ERROR("VCN message buffer exceeds BO bounds!\n"); r = -EINVAL; goto out; -- cgit v1.2.3 From 65bce27ea6192320448c30267ffc17ffa094e713 Mon Sep 17 00:00:00 2001 From: Benjamin Cheng Date: Mon, 13 Apr 2026 09:22:15 -0400 Subject: drm/amdgpu/vcn4: Avoid overflow on msg bound check As pointed out by SDL, the previous condition may be vulnerable to overflow. Fixes: 0a78f2bac142 ("drm/amdgpu/vcn4: Prevent OOB reads when parsing dec msg") Cc: SDL Signed-off-by: Benjamin Cheng Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 3c5367d950140d4ec7af830b2268a5a6fdaa3885) --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index bbdd017cbafb..ff7269bafae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1889,6 +1889,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { uint32_t offset, size, *create; + uint64_t buf_end; if (msg[0] != RDECODE_MESSAGE_CREATE) continue; @@ -1896,7 +1897,8 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, offset = msg[1]; size = msg[2]; - if (size < 4 || offset + size > end - addr) { + if (size < 4 || check_add_overflow(offset, size, &buf_end) || + buf_end > end - addr) { DRM_ERROR("VCN message buffer exceeds BO bounds!\n"); r = -EINVAL; goto out; -- cgit v1.2.3 From 55ea968389172306341a6600a2acb759862d366a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Apr 2026 07:30:56 -0600 Subject: io_uring/kbuf: kill dead struct io_buffer_list 'nr_entries' member This is only ever assigned, never used. The only used part is the calculated mask, which is used for indexing. Kill 'nr_entries'. Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 1 - io_uring/kbuf.h | 1 - 2 files changed, 2 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 8da2ff798170..43e4f8615fe8 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -680,7 +680,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) } #endif - bl->nr_entries = reg.ring_entries; bl->mask = reg.ring_entries - 1; bl->flags |= IOBL_BUF_RING; bl->buf_ring = br; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index bf15e26520d3..abf7052b556e 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -27,7 +27,6 @@ struct io_buffer_list { __u16 bgid; /* below is for ring provided buffers */ - __u16 nr_entries; __u16 head; __u16 mask; -- cgit v1.2.3 From 7deba791ad495ce1d7921683f4f7d1190fa210d1 Mon Sep 17 00:00:00 2001 From: Martin Michaelis Date: Thu, 23 Apr 2026 15:54:11 -0600 Subject: io_uring/kbuf: support min length left for incremental buffers Incrementally consumed buffer rings are generally fully consumed, but it's quite possible that the application has a minimum size it needs to meet to avoid truncation. Currently that minimum limit is 1 byte, but this should be a setting that is the hands of the application. For recvmsg multishot, a prime use case for incrementally consumed buffers, the application may get spurious -EFAULT returned at the end of an incrementally consumed buffer, as less space is available than the headers need. Grab a u32 field in struct io_uring_buf_reg, which the application can use to inform the kernel of the minimum size that should be available in an incrementally consumed buffer. If less than that is available, the current buffer is fully processed and the next one will be picked. Cc: stable@vger.kernel.org Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Link: https://github.com/axboe/liburing/issues/1433 Signed-off-by: Martin Michaelis [axboe: write commit message, change io_buffer_list member name] Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- io_uring/kbuf.c | 8 +++++++- io_uring/kbuf.h | 7 +++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 17ac1b785440..909fb7aea638 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -905,7 +905,8 @@ struct io_uring_buf_reg { __u32 ring_entries; __u16 bgid; __u16 flags; - __u64 resv[3]; + __u32 min_left; + __u32 resv[5]; }; /* argument for IORING_REGISTER_PBUF_STATUS */ diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 43e4f8615fe8..63061aa1cab9 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -47,7 +47,7 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) this_len = min_t(u32, len, buf_len); buf_len -= this_len; /* Stop looping for invalid buffer length of 0 */ - if (buf_len || !this_len) { + if (buf_len > bl->min_left_sub_one || !this_len) { WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len); WRITE_ONCE(buf->len, buf_len); return false; @@ -637,6 +637,10 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; + /* minimum left byte count is a property of incremental buffers */ + if (!(reg.flags & IOU_PBUF_RING_INC) && reg.min_left) + return -EINVAL; + bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -683,6 +687,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) bl->mask = reg.ring_entries - 1; bl->flags |= IOBL_BUF_RING; bl->buf_ring = br; + if (reg.min_left) + bl->min_left_sub_one = reg.min_left - 1; if (reg.flags & IOU_PBUF_RING_INC) bl->flags |= IOBL_INC; ret = io_buffer_add_list(ctx, bl, reg.bgid); diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index abf7052b556e..401773e1ef80 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -32,6 +32,13 @@ struct io_buffer_list { __u16 flags; + /* + * minimum required amount to be left to reuse an incrementally + * consumed buffer. If less than this is left at consumption time, + * buffer is done and head is incremented to the next buffer. + */ + __u32 min_left_sub_one; + struct io_mapped_region region; }; -- cgit v1.2.3 From df8599ee18c0e5fe343ffe0b4c379636b8bb839a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Apr 2026 14:42:18 -0600 Subject: io_uring/napi: cap busy_poll_to 10 msec Currently there's no cap on the maximum amount of time that napi is allowed to poll if no events are found, which can lead to kernel complaints on a task being stuck as there's no conditional rescheduling done within that loop. Just cap it to 10 msec in total, that's already way above any kind of sane value that will reap any benefits, yet low enough that it's nowhere near being able to trigger preemption complaints. Fixes: 8d0c12a80cde ("io-uring: add napi busy poll support") Signed-off-by: Jens Axboe --- io_uring/napi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/napi.c b/io_uring/napi.c index 4a10de03e426..8d68366a4b90 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -276,6 +276,8 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx, /* clean the napi list for new settings */ io_napi_free(ctx); WRITE_ONCE(ctx->napi_track_mode, napi->op_param); + /* cap NAPI at 10 msec of spin time */ + napi->busy_poll_to = min(10000, napi->busy_poll_to); WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); return 0; -- cgit v1.2.3 From f92d542577db878acfd21cc18dab23d03023b217 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 Apr 2026 15:29:50 -0400 Subject: selinux: fix avdcache auditing The per-task avdcache was incorrectly saving and reusing the audited vector computed by avc_audit_required() rather than recomputing based on the currently requested permissions and distinguishing the denied versus allowed cases. As a result, some permission checks were not being audited, e.g. directory write checks after a previously cached directory search check. Cc: stable@vger.kernel.org Fixes: dde3a5d0f4dce ("selinux: move avdcache to per-task security struct") Signed-off-by: Stephen Smalley [PM: line wrap tweaks] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 31 +++++++++++++------------------ security/selinux/include/objsec.h | 4 +--- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 59942d39ada7..0f704380a8c8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3209,15 +3209,13 @@ static inline int task_avdcache_search(struct task_security_struct *tsec, * @tsec: the task's security state * @isec: the inode associated with the cache entry * @avd: the AVD to cache - * @audited: the permission audit bitmask to cache * - * Update the AVD cache in @tsec with the @avdc and @audited info associated + * Update the AVD cache in @tsec with the @avd info associated * with @isec. */ static inline void task_avdcache_update(struct task_security_struct *tsec, struct inode_security_struct *isec, - struct av_decision *avd, - u32 audited) + struct av_decision *avd) { int spot; @@ -3229,9 +3227,7 @@ static inline void task_avdcache_update(struct task_security_struct *tsec, spot = (tsec->avdcache.dir_spot + 1) & (TSEC_AVDC_DIR_SIZE - 1); tsec->avdcache.dir_spot = spot; tsec->avdcache.dir[spot].isid = isec->sid; - tsec->avdcache.dir[spot].audited = audited; - tsec->avdcache.dir[spot].allowed = avd->allowed; - tsec->avdcache.dir[spot].permissive = avd->flags & AVD_FLAGS_PERMISSIVE; + tsec->avdcache.dir[spot].avd = *avd; tsec->avdcache.permissive_neveraudit = (avd->flags == (AVD_FLAGS_PERMISSIVE|AVD_FLAGS_NEVERAUDIT)); } @@ -3252,6 +3248,7 @@ static int selinux_inode_permission(struct inode *inode, int requested) struct task_security_struct *tsec; struct inode_security_struct *isec; struct avdc_entry *avdc; + struct av_decision avd, *avdp = &avd; int rc, rc2; u32 audited, denied; @@ -3273,23 +3270,21 @@ static int selinux_inode_permission(struct inode *inode, int requested) rc = task_avdcache_search(tsec, isec, &avdc); if (likely(!rc)) { /* Cache hit. */ - audited = perms & avdc->audited; - denied = perms & ~avdc->allowed; - if (unlikely(denied && enforcing_enabled() && - !avdc->permissive)) + avdp = &avdc->avd; + denied = perms & ~avdp->allowed; + if (unlikely(denied) && enforcing_enabled() && + !(avdp->flags & AVD_FLAGS_PERMISSIVE)) rc = -EACCES; } else { - struct av_decision avd; - /* Cache miss. */ rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, - perms, 0, &avd); - audited = avc_audit_required(perms, &avd, rc, - (requested & MAY_ACCESS) ? FILE__AUDIT_ACCESS : 0, - &denied); - task_avdcache_update(tsec, isec, &avd, audited); + perms, 0, avdp); + task_avdcache_update(tsec, isec, avdp); } + audited = avc_audit_required(perms, avdp, rc, + (requested & MAY_ACCESS) ? + FILE__AUDIT_ACCESS : 0, &denied); if (likely(!audited)) return rc; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index b19e5d978e82..3c0a16ec978b 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -32,9 +32,7 @@ struct avdc_entry { u32 isid; /* inode SID */ - u32 allowed; /* allowed permission bitmask */ - u32 audited; /* audited permission bitmask */ - bool permissive; /* AVC permissive flag */ + struct av_decision avd; /* av decision */ }; struct cred_security_struct { -- cgit v1.2.3 From be102efb832ef7e30e4cd4c2edf22bbf64ddf35a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 28 Apr 2026 12:52:28 +0100 Subject: ASoC: cs35l56: Fix illegal writes to OTP_MEM registers Mark the OTP_MEM registers as volatile so that regcache_sync() will not attempt to write to them. These registers hold a constant, and originally they were marked as readable non-volatile so that this value would be read into the regmap cache. The problem with this is regcache_sync() issues a write for any cached register that does not have a reg_default. Though these registers are constants and writing them in normal use cannot change OTP, it is illegal for the host to write to them. Fixes: e1830f66f6c6 ("ASoC: cs35l56: Add helper functions for amp calibration") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260428115228.158252-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e05d975ba794..033e56d5e9db 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -108,8 +108,6 @@ int cs35l56_set_patch(struct cs35l56_base *cs35l56_base) EXPORT_SYMBOL_NS_GPL(cs35l56_set_patch, "SND_SOC_CS35L56_SHARED"); static const struct reg_default cs35l56_reg_defaults[] = { - /* no defaults for OTP_MEM - first read populates cache */ - { CS35L56_ASP1_ENABLES1, 0x00000000 }, { CS35L56_ASP1_CONTROL1, 0x00000028 }, { CS35L56_ASP1_CONTROL2, 0x18180200 }, @@ -138,8 +136,6 @@ static const struct reg_default cs35l56_reg_defaults[] = { }; static const struct reg_default cs35l63_reg_defaults[] = { - /* no defaults for OTP_MEM - first read populates cache */ - { CS35L56_ASP1_ENABLES1, 0x00000000 }, { CS35L56_ASP1_CONTROL1, 0x00000028 }, { CS35L56_ASP1_CONTROL2, 0x18180200 }, @@ -282,6 +278,9 @@ static bool cs35l56_common_volatile_reg(unsigned int reg) case CS35L56_GLOBAL_ENABLES: /* owned by firmware */ case CS35L56_BLOCK_ENABLES: /* owned by firmware */ case CS35L56_BLOCK_ENABLES2: /* owned by firmware */ + case CS35L56_OTP_MEM_53: + case CS35L56_OTP_MEM_54: + case CS35L56_OTP_MEM_55: case CS35L56_SYNC_GPIO1_CFG ... CS35L56_ASP2_DIO_GPIO13_CFG: case CS35L56_UPDATE_REGS: case CS35L56_REFCLK_INPUT: /* owned by firmware */ -- cgit v1.2.3 From b89769f936a8fa9e66de72ddc1b71a9745a488e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Apr 2026 12:06:06 -0700 Subject: net: psp: check for device unregister when creating assoc psp_assoc_device_get_locked() obtains a psp_dev reference via psp_dev_get_for_sock() (which uses psp_dev_tryget() under RCU); it then acquires psd->lock and drops the reference. Before the lock is taken, psp_dev_unregister() can run to completion: take psd->lock, clear out state, unlock, drop the registration reference. The expectation is that the lock prevents device unregistration, but much like with netdevs special care has to be taken when "upgrading" a reference to a locked device. Add the missing check if device is still alive. psp_dev_is_registered() exists already but had no callers, which makes me wonder if I either forgot to add this or lost the check during refactoring... Reported-by: Yiming Qian Fixes: 6b46ca260e22 ("net: psp: add socket security association code") Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260427190606.366101-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/psp/psp_nl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/psp/psp_nl.c b/net/psp/psp_nl.c index 6afd7707ec12..0cc744a6e1c9 100644 --- a/net/psp/psp_nl.c +++ b/net/psp/psp_nl.c @@ -305,8 +305,13 @@ int psp_assoc_device_get_locked(const struct genl_split_ops *ops, psd = psp_dev_get_for_sock(socket->sk); if (psd) { - err = psp_dev_check_access(psd, genl_info_net(info)); - if (err) { + /* Extra care needed here, psp_dev_get_for_sock() only gives + * us access to struct psp_dev's memory, which is quite weak. + */ + mutex_lock(&psd->lock); + if (!psp_dev_is_registered(psd) || + psp_dev_check_access(psd, genl_info_net(info))) { + mutex_unlock(&psd->lock); psp_dev_put(psd); psd = NULL; } @@ -319,7 +324,6 @@ int psp_assoc_device_get_locked(const struct genl_split_ops *ops, id = info->attrs[PSP_A_ASSOC_DEV_ID]; if (psd) { - mutex_lock(&psd->lock); if (id && psd->id != nla_get_u32(id)) { mutex_unlock(&psd->lock); NL_SET_ERR_MSG_ATTR(info->extack, id, -- cgit v1.2.3 From b718342a7fbaa2dff5fefc31988c07af8c6cbc21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Apr 2026 12:58:56 -0700 Subject: net: psp: require admin permission for dev-set and key-rotate The dev-set and key-rotate netlink operations modify shared device state (PSP version configuration and cryptographic key material, respectively) but do not require CAP_NET_ADMIN. The only access control is psp_dev_check_access() which merely verifies netns membership. Fixes: 00c94ca2b99e ("psp: base PSP device support") Reviewed-by: Daniel Zahka Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260427195856.401223-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/psp.yaml | 2 ++ net/psp/psp-nl-gen.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml index 100c36cda8e5..bfcd6e4ecb85 100644 --- a/Documentation/netlink/specs/psp.yaml +++ b/Documentation/netlink/specs/psp.yaml @@ -188,6 +188,7 @@ operations: name: dev-set doc: Set the configuration of a PSP device. attribute-set: dev + flags: [admin-perm] do: request: attributes: @@ -207,6 +208,7 @@ operations: name: key-rotate doc: Rotate the device key. attribute-set: dev + flags: [admin-perm] do: request: attributes: diff --git a/net/psp/psp-nl-gen.c b/net/psp/psp-nl-gen.c index 22a48d0fa378..953309952cef 100644 --- a/net/psp/psp-nl-gen.c +++ b/net/psp/psp-nl-gen.c @@ -76,7 +76,7 @@ static const struct genl_split_ops psp_nl_ops[] = { .post_doit = psp_device_unlock, .policy = psp_dev_set_nl_policy, .maxattr = PSP_A_DEV_PSP_VERSIONS_ENA, - .flags = GENL_CMD_CAP_DO, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = PSP_CMD_KEY_ROTATE, @@ -85,7 +85,7 @@ static const struct genl_split_ops psp_nl_ops[] = { .post_doit = psp_device_unlock, .policy = psp_key_rotate_nl_policy, .maxattr = PSP_A_DEV_ID, - .flags = GENL_CMD_CAP_DO, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = PSP_CMD_RX_ASSOC, -- cgit v1.2.3 From a201aef1a88b675e9eb8487e27d14e2eef3cef80 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Tue, 28 Apr 2026 21:22:49 +0200 Subject: ASoC: codecs: ab8500: Fix casting of private data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ab8500_filter_controls[i].private_value is initialized using .private_value = (unsigned long)&(struct filter_control) {.count = xcount, .min = xmin, .max = xmax} thus it's a pointer to a struct filter_control casted to unsigned long. So to get back that pointer .private_data must be cast back, not its address. Fixes: 679d7abdc754 ("ASoC: codecs: Add AB8500 codec-driver") Signed-off-by: Christian A. Ehrhardt Signed-off-by: Uwe Kleine-König (The Capable Hub) Link: https://patch.msgid.link/20260428192255.2294705-2-u.kleine-koenig@baylibre.com Signed-off-by: Mark Brown --- sound/soc/codecs/ab8500-codec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index fdda1b747bf7..8ab2e60f80b4 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c @@ -2496,13 +2496,13 @@ static int ab8500_codec_probe(struct snd_soc_component *component) return status; } fc = (struct filter_control *) - &ab8500_filter_controls[AB8500_FILTER_ANC_FIR].private_value; + ab8500_filter_controls[AB8500_FILTER_ANC_FIR].private_value; drvdata->anc_fir_values = (long *)fc->value; fc = (struct filter_control *) - &ab8500_filter_controls[AB8500_FILTER_ANC_IIR].private_value; + ab8500_filter_controls[AB8500_FILTER_ANC_IIR].private_value; drvdata->anc_iir_values = (long *)fc->value; fc = (struct filter_control *) - &ab8500_filter_controls[AB8500_FILTER_SID_FIR].private_value; + ab8500_filter_controls[AB8500_FILTER_SID_FIR].private_value; drvdata->sid_fir_values = (long *)fc->value; snd_soc_dapm_disable_pin(dapm, "ANC Configure Input"); -- cgit v1.2.3 From 576a5d2bad4814c881a829576b1261b9b8159d2b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Apr 2026 10:46:40 -0400 Subject: netfilter: skip recording stale or retransmitted INIT An INIT whose init_tag matches the peer's vtag does not provide new state information. It indicates either: - a stale INIT (after INIT-ACK has already been seen on the same side), or - a retransmitted INIT (after INIT has already been recorded on the same side). In both cases, the INIT must not update ct->proto.sctp.init[] state, since it does not advance the handshake tracking and may otherwise corrupt INIT/INIT-ACK validation logic. Allow INIT processing only when the conntrack entry is newly created (SCTP_CONNTRACK_NONE), or when the init_tag differs from the stored peer vtag. Note it skips the check for the ct with old_state SCTP_CONNTRACK_NONE in nf_conntrack_sctp_packet(), as it is just created in sctp_new() where it set ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag. Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Xin Long Reviewed-by: Marcelo Ricardo Leitner Acked-by: Florian Westphal Link: https://patch.msgid.link/ee56c3e416452b2a40589a2a85245ac2ad5e9f4b.1777214801.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/netfilter/nf_conntrack_proto_sctp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 645d2c43ebf7..7e10fa65cbdd 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -466,9 +466,13 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, if (!ih) goto out_unlock; - if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) - ct->proto.sctp.init[!dir] = 0; - ct->proto.sctp.init[dir] = 1; + /* Do not record INIT matching peer vtag (stale or retransmitted INIT). */ + if (old_state == SCTP_CONNTRACK_NONE || + ct->proto.sctp.vtag[!dir] != ih->init_tag) { + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + } pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; -- cgit v1.2.3 From 8a92cb475ca90d84db769e4d4383e631ace0d6e5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 26 Apr 2026 10:46:41 -0400 Subject: sctp: discard stale INIT after handshake completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After an association reaches ESTABLISHED, the peer’s init_tag is already known from the handshake. Any subsequent INIT with the same init_tag is not a valid restart, but a delayed or duplicate INIT. Drop such INIT chunks in sctp_sf_do_unexpected_init() instead of processing them as new association attempts. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/5788c76c1ee122a3ed00189e88dcf9df1fba226c.1777214801.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7b823d759141..8e89a870780c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1556,6 +1556,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( /* Tag the variable length parameters. */ chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr)); + if (asoc->state >= SCTP_STATE_ESTABLISHED) { + /* Discard INIT matching peer vtag after handshake completion (stale INIT). */ + if (ntohl(chunk->subh.init_hdr->init_tag) == asoc->peer.i.init_tag) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, -- cgit v1.2.3 From aa6c6d9ee064aabfede4402fd1283424e649ca19 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Sun, 26 Apr 2026 09:53:51 -0700 Subject: bareudp: fix NULL pointer dereference in bareudp_fill_metadata_dst() bareudp_fill_metadata_dst() passes bareudp->sock to udp_tunnel6_dst_lookup() in the IPv6 path without a NULL check. The socket is only created in bareudp_open() and NULLed in bareudp_stop(), so calling this function while the device is down triggers a NULL dereference via sock->sk. BUG: kernel NULL pointer dereference, address: 0000000000000018 RIP: 0010:udp_tunnel6_dst_lookup (net/ipv6/ip6_udp_tunnel.c:160) Call Trace: bareudp_fill_metadata_dst (drivers/net/bareudp.c:532) do_execute_actions (net/openvswitch/actions.c:901) ovs_execute_actions (net/openvswitch/actions.c:1589) ovs_packet_cmd_execute (net/openvswitch/datapath.c:700) genl_family_rcv_msg_doit (net/netlink/genetlink.c:1114) genl_rcv_msg (net/netlink/genetlink.c:1209) netlink_rcv_skb (net/netlink/af_netlink.c:2550) Add a NULL check returning -ESHUTDOWN, consistent with the xmit paths in the same driver. Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260426165350.1663137-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 0df3208783ad..da5866ba0699 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -529,6 +529,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct in6_addr saddr; struct socket *sock = rcu_dereference(bareudp->sock); + if (!sock) + return -ESHUTDOWN; + dst = udp_tunnel6_dst_lookup(skb, dev, bareudp->net, sock, 0, &saddr, &info->key, sport, bareudp->port, info->key.tos, -- cgit v1.2.3 From 44967ac3785ebef6442377708925181d4a0eb1c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Apr 2026 08:36:02 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_stats() (I) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_stats() runs without qdisc spinlock being held. In this first patch, I add READ_ONCE()/WRITE_ONCE() annotations for the following fields: - way_hits - way_misses - way_collisions - sparse_flow_count - decaying_flow_count Other annotations are added in following patches, to ease code review. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: "Toke Høiland-Jørgensen" Link: https://patch.msgid.link/20260427083606.459355-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 02e1fa4577ae..bcc601fc486b 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -813,7 +813,7 @@ skip_hash: i++, k = (k + 1) % CAKE_SET_WAYS) { if (q->tags[outer_hash + k] == flow_hash) { if (i) - q->way_hits++; + WRITE_ONCE(q->way_hits, q->way_hits + 1); if (!q->flows[outer_hash + k].set) { /* need to increment host refcnts */ @@ -831,7 +831,7 @@ skip_hash: for (i = 0; i < CAKE_SET_WAYS; i++, k = (k + 1) % CAKE_SET_WAYS) { if (!q->flows[outer_hash + k].set) { - q->way_misses++; + WRITE_ONCE(q->way_misses, q->way_misses + 1); allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); goto found; @@ -841,7 +841,7 @@ skip_hash: /* With no empty queues, default to the original * queue, accept the collision, update the host tags. */ - q->way_collisions++; + WRITE_ONCE(q->way_collisions, q->way_collisions + 1); allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); @@ -1917,11 +1917,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (!flow->set) { list_add_tail(&flow->flowchain, &b->new_flows); } else { - b->decaying_flow_count--; + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count - 1); list_move_tail(&flow->flowchain, &b->new_flows); } flow->set = CAKE_SET_SPARSE; - b->sparse_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1); flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { @@ -1929,7 +1929,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, * in the bulk rotation. */ flow->set = CAKE_SET_BULK; - b->sparse_flow_count--; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); b->bulk_flow_count++; cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2149,7 +2149,7 @@ retry: */ if (flow->set == CAKE_SET_SPARSE) { if (flow->head) { - b->sparse_flow_count--; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); b->bulk_flow_count++; cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2192,27 +2192,27 @@ retry: cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); - b->decaying_flow_count++; + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count + 1); } else if (flow->set == CAKE_SET_SPARSE || flow->set == CAKE_SET_SPARSE_WAIT) { - b->sparse_flow_count--; - b->decaying_flow_count++; + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count + 1); } flow->set = CAKE_SET_DECAYING; } else { /* remove empty queue from the flowchain */ list_del_init(&flow->flowchain); if (flow->set == CAKE_SET_SPARSE || - flow->set == CAKE_SET_SPARSE_WAIT) - b->sparse_flow_count--; - else if (flow->set == CAKE_SET_BULK) { + flow->set == CAKE_SET_SPARSE_WAIT) { + WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); + } else if (flow->set == CAKE_SET_BULK) { b->bulk_flow_count--; cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); - } else - b->decaying_flow_count--; - + } else { + WRITE_ONCE(b->decaying_flow_count, b->decaying_flow_count - 1); + } flow->set = CAKE_SET_NONE; } goto begin; @@ -3050,12 +3050,12 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) PUT_TSTAT_U32(BASE_DELAY_US, ktime_to_us(ns_to_ktime(b->base_delay))); - PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits); - PUT_TSTAT_U32(WAY_MISSES, b->way_misses); - PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions); + PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits)); + PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses)); + PUT_TSTAT_U32(WAY_COLLISIONS, READ_ONCE(b->way_collisions)); - PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count + - b->decaying_flow_count); + PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) + + READ_ONCE(b->decaying_flow_count)); PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); -- cgit v1.2.3 From 91a96427b93b9ba27413077b7e825d2fefbfa134 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Apr 2026 08:36:03 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_stats() (II) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_stats() runs without qdisc spinlock being held. In this second patch, I add READ_ONCE()/WRITE_ONCE() annotations for the following fields: - bulk_flow_count - unresponsive_flow_count - max_skblen - flow_quantum Other annotations are added in following patches, to ease code review. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: "Toke Høiland-Jørgensen" Link: https://patch.msgid.link/20260427083606.459355-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index bcc601fc486b..d7465ee4c550 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1590,7 +1590,8 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) } if (cobalt_queue_full(&flow->cvars, &b->cparams, now)) - b->unresponsive_flow_count++; + WRITE_ONCE(b->unresponsive_flow_count, + b->unresponsive_flow_count + 1); len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; @@ -1795,7 +1796,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(len > b->max_skblen)) - b->max_skblen = len; + WRITE_ONCE(b->max_skblen, len); if (qdisc_pkt_segs(skb) > 1 && q->config->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; @@ -1930,7 +1931,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, */ flow->set = CAKE_SET_BULK; WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); - b->bulk_flow_count++; + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1); cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2150,7 +2151,7 @@ retry: if (flow->set == CAKE_SET_SPARSE) { if (flow->head) { WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); - b->bulk_flow_count++; + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count + 1); cake_inc_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_inc_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2177,7 +2178,8 @@ retry: if (!skb) { /* this queue was actually empty */ if (cobalt_queue_empty(&flow->cvars, &b->cparams, now)) - b->unresponsive_flow_count--; + WRITE_ONCE(b->unresponsive_flow_count, + b->unresponsive_flow_count - 1); if (flow->cvars.p_drop || flow->cvars.count || ktime_before(now, flow->cvars.drop_next)) { @@ -2187,7 +2189,7 @@ retry: list_move_tail(&flow->flowchain, &b->decaying_flows); if (flow->set == CAKE_SET_BULK) { - b->bulk_flow_count--; + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count - 1); cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2206,7 +2208,7 @@ retry: flow->set == CAKE_SET_SPARSE_WAIT) { WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count - 1); } else if (flow->set == CAKE_SET_BULK) { - b->bulk_flow_count--; + WRITE_ONCE(b->bulk_flow_count, b->bulk_flow_count - 1); cake_dec_srchost_bulk_flow_count(b, flow, q->config->flow_mode); cake_dec_dsthost_bulk_flow_count(b, flow, q->config->flow_mode); @@ -2329,9 +2331,9 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, u8 rate_shft = 0; u64 rate_ns = 0; - b->flow_quantum = 1514; if (rate) { - b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL); + WRITE_ONCE(b->flow_quantum, + max(min(rate >> 12, 1514ULL), 300ULL)); rate_shft = 34; rate_ns = ((u64)NSEC_PER_SEC) << rate_shft; rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate)); @@ -2339,8 +2341,10 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, rate_ns >>= 1; rate_shft--; } - } /* else unlimited, ie. zero delay */ - + } else { + /* else unlimited, ie. zero delay */ + WRITE_ONCE(b->flow_quantum, 1514); + } b->tin_rate_bps = rate; b->tin_rate_ns = rate_ns; b->tin_rate_shft = rate_shft; @@ -3056,11 +3060,11 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) PUT_TSTAT_U32(SPARSE_FLOWS, READ_ONCE(b->sparse_flow_count) + READ_ONCE(b->decaying_flow_count)); - PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); - PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); - PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); + PUT_TSTAT_U32(BULK_FLOWS, READ_ONCE(b->bulk_flow_count)); + PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, READ_ONCE(b->unresponsive_flow_count)); + PUT_TSTAT_U32(MAX_SKBLEN, READ_ONCE(b->max_skblen)); - PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum); + PUT_TSTAT_U32(FLOW_QUANTUM, READ_ONCE(b->flow_quantum)); nla_nest_end(d->skb, ts); } -- cgit v1.2.3 From 276a98a434964088fccd4745db5b34d6e831e358 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Apr 2026 08:36:04 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_stats() (III) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_stats() runs without qdisc spinlock being held. In this third patch, I add READ_ONCE()/WRITE_ONCE() annotations for the following fields: - packets - tin_dropped - tin_ecn_mark - ack_drops - peak_delay - avge_delay - base_delay Other annotations are added in following patches, to ease code review. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: "Toke Høiland-Jørgensen" Link: https://patch.msgid.link/20260427083606.459355-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index d7465ee4c550..c5aae31565e9 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1600,7 +1600,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) sch->qstats.backlog -= len; flow->dropped++; - b->tin_dropped++; + WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); @@ -1820,7 +1820,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, numsegs++; slen += segs->len; q->buffer_used += segs->truesize; - b->packets++; + WRITE_ONCE(b->packets, b->packets + 1); } /* stats */ @@ -1844,7 +1844,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, ack = cake_ack_filter(q, flow); if (ack) { - b->ack_drops++; + WRITE_ONCE(b->ack_drops, b->ack_drops + 1); sch->qstats.drops++; ack_pkt_len = qdisc_pkt_len(ack); b->bytes += ack_pkt_len; @@ -1860,7 +1860,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->packets++; + WRITE_ONCE(b->packets, b->packets + 1); b->bytes += len - ack_pkt_len; b->backlogs[idx] += len - ack_pkt_len; b->tin_backlog += len - ack_pkt_len; @@ -2236,7 +2236,7 @@ retry: b->tin_deficit -= len; } flow->dropped++; - b->tin_dropped++; + WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); qdisc_dequeue_drop(sch, skb, reason); @@ -2244,17 +2244,19 @@ retry: goto retry; } - b->tin_ecn_mark += !!flow->cvars.ecn_marked; + WRITE_ONCE(b->tin_ecn_mark, b->tin_ecn_mark + !!flow->cvars.ecn_marked); qdisc_bstats_update(sch, skb); WRITE_ONCE(q->last_active, now); /* collect delay stats */ delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); - b->avge_delay = cake_ewma(b->avge_delay, delay, 8); - b->peak_delay = cake_ewma(b->peak_delay, delay, - delay > b->peak_delay ? 2 : 8); - b->base_delay = cake_ewma(b->base_delay, delay, - delay < b->base_delay ? 2 : 8); + WRITE_ONCE(b->avge_delay, cake_ewma(b->avge_delay, delay, 8)); + WRITE_ONCE(b->peak_delay, + cake_ewma(b->peak_delay, delay, + delay > b->peak_delay ? 2 : 8)); + WRITE_ONCE(b->base_delay, + cake_ewma(b->base_delay, delay, + delay < b->base_delay ? 2 : 8)); len = cake_advance_shaper(q, b, skb, now, false); flow->deficit -= len; @@ -3042,17 +3044,17 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) PUT_TSTAT_U32(INTERVAL_US, ktime_to_us(ns_to_ktime(b->cparams.interval))); - PUT_TSTAT_U32(SENT_PACKETS, b->packets); - PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped); - PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark); - PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops); + PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets)); + PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped)); + PUT_TSTAT_U32(ECN_MARKED_PACKETS, READ_ONCE(b->tin_ecn_mark)); + PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, READ_ONCE(b->ack_drops)); PUT_TSTAT_U32(PEAK_DELAY_US, - ktime_to_us(ns_to_ktime(b->peak_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->peak_delay)))); PUT_TSTAT_U32(AVG_DELAY_US, - ktime_to_us(ns_to_ktime(b->avge_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->avge_delay)))); PUT_TSTAT_U32(BASE_DELAY_US, - ktime_to_us(ns_to_ktime(b->base_delay))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->base_delay)))); PUT_TSTAT_U32(WAY_INDIRECT_HITS, READ_ONCE(b->way_hits)); PUT_TSTAT_U32(WAY_MISSES, READ_ONCE(b->way_misses)); -- cgit v1.2.3 From 8fab48d87745a6ab1cec594b8d5865d9ae2db879 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Apr 2026 08:36:05 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_stats() (IV) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_stats() runs without qdisc spinlock being held. In this fourth patch, I add READ_ONCE()/WRITE_ONCE() annotations for the following fields: - avg_peak_bandwidth - buffer_limit - buffer_max_used - avg_netoff - max_netlen - max_adjlen - min_netlen - min_adjlen - active_queues - tin_rate_bps - bytes - tin_backlog Other annotations are added in following patch, to ease code review. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20260427083606.459355-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 90 +++++++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index c5aae31565e9..975f5d6d6982 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1379,9 +1379,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off) len -= off; if (qd->max_netlen < len) - qd->max_netlen = len; + WRITE_ONCE(qd->max_netlen, len); if (qd->min_netlen > len) - qd->min_netlen = len; + WRITE_ONCE(qd->min_netlen, len); len += q->rate_overhead; @@ -1401,9 +1401,9 @@ static u32 cake_calc_overhead(struct cake_sched_data *qd, u32 len, u32 off) } if (qd->max_adjlen < len) - qd->max_adjlen = len; + WRITE_ONCE(qd->max_adjlen, len); if (qd->min_adjlen > len) - qd->min_adjlen = len; + WRITE_ONCE(qd->min_adjlen, len); return len; } @@ -1416,7 +1416,7 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) u16 segs = qdisc_pkt_segs(skb); u32 len = qdisc_pkt_len(skb); - q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); + WRITE_ONCE(q->avg_netoff, cake_ewma(q->avg_netoff, off << 16, 8)); if (segs == 1) return cake_calc_overhead(q, len, off); @@ -1596,7 +1596,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; b->backlogs[idx] -= len; - b->tin_backlog -= len; + WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; flow->dropped++; @@ -1824,11 +1824,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->bytes += slen; b->backlogs[idx] += slen; - b->tin_backlog += slen; sch->qstats.backlog += slen; q->avg_window_bytes += slen; + WRITE_ONCE(b->bytes, b->bytes + slen); + WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen); qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); @@ -1847,7 +1847,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, WRITE_ONCE(b->ack_drops, b->ack_drops + 1); sch->qstats.drops++; ack_pkt_len = qdisc_pkt_len(ack); - b->bytes += ack_pkt_len; + WRITE_ONCE(b->bytes, b->bytes + ack_pkt_len); q->buffer_used += skb->truesize - ack->truesize; if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, ack, now, true); @@ -1861,11 +1861,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* stats */ WRITE_ONCE(b->packets, b->packets + 1); - b->bytes += len - ack_pkt_len; b->backlogs[idx] += len - ack_pkt_len; - b->tin_backlog += len - ack_pkt_len; sch->qstats.backlog += len - ack_pkt_len; q->avg_window_bytes += len - ack_pkt_len; + WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len); + WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len); } if (q->overflow_timeout) @@ -1895,9 +1895,9 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; b = div64_u64(b, window_interval); - q->avg_peak_bandwidth = - cake_ewma(q->avg_peak_bandwidth, b, - b > q->avg_peak_bandwidth ? 2 : 8); + WRITE_ONCE(q->avg_peak_bandwidth, + cake_ewma(q->avg_peak_bandwidth, b, + b > q->avg_peak_bandwidth ? 2 : 8)); q->avg_window_bytes = 0; q->avg_window_begin = now; @@ -1938,7 +1938,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (q->buffer_used > q->buffer_max_used) - q->buffer_max_used = q->buffer_used; + WRITE_ONCE(q->buffer_max_used, q->buffer_used); if (q->buffer_used <= q->buffer_limit) return NET_XMIT_SUCCESS; @@ -1978,7 +1978,7 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) skb = dequeue_head(flow); len = qdisc_pkt_len(skb); b->backlogs[q->cur_flow] -= len; - b->tin_backlog -= len; + WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; q->buffer_used -= skb->truesize; sch->q.qlen--; @@ -2043,7 +2043,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) cake_configure_rates(sch, new_rate, true); q->last_checked_active = now; - q->active_queues = num_active_qs; + WRITE_ONCE(q->active_queues, num_active_qs); } begin: @@ -2347,7 +2347,7 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, /* else unlimited, ie. zero delay */ WRITE_ONCE(b->flow_quantum, 1514); } - b->tin_rate_bps = rate; + WRITE_ONCE(b->tin_rate_bps, rate); b->tin_rate_ns = rate_ns; b->tin_rate_shft = rate_shft; @@ -2617,25 +2617,27 @@ static void cake_reconfigure(struct Qdisc *sch) { struct cake_sched_data *qd = qdisc_priv(sch); struct cake_sched_config *q = qd->config; + u32 buffer_limit; cake_configure_rates(sch, qd->config->rate_bps, false); if (q->buffer_config_limit) { - qd->buffer_limit = q->buffer_config_limit; + buffer_limit = q->buffer_config_limit; } else if (q->rate_bps) { u64 t = q->rate_bps * q->interval; do_div(t, USEC_PER_SEC / 4); - qd->buffer_limit = max_t(u32, t, 4U << 20); + buffer_limit = max_t(u32, t, 4U << 20); } else { - qd->buffer_limit = ~0; + buffer_limit = ~0; } sch->flags &= ~TCQ_F_CAN_BYPASS; - qd->buffer_limit = min(qd->buffer_limit, - max(sch->limit * psched_mtu(qdisc_dev(sch)), - q->buffer_config_limit)); + WRITE_ONCE(qd->buffer_limit, + min(buffer_limit, + max(sch->limit * psched_mtu(qdisc_dev(sch)), + q->buffer_config_limit))); } static int cake_config_change(struct cake_sched_config *q, struct nlattr *opt, @@ -2780,10 +2782,10 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, return ret; if (overhead_changed) { - qd->max_netlen = 0; - qd->max_adjlen = 0; - qd->min_netlen = ~0; - qd->min_adjlen = ~0; + WRITE_ONCE(qd->max_netlen, 0); + WRITE_ONCE(qd->max_adjlen, 0); + WRITE_ONCE(qd->min_netlen, ~0); + WRITE_ONCE(qd->min_adjlen, ~0); } if (qd->tins) { @@ -3001,15 +3003,15 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) goto nla_put_failure; \ } while (0) - PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth); - PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit); - PUT_STAT_U32(MEMORY_USED, q->buffer_max_used); - PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16)); - PUT_STAT_U32(MAX_NETLEN, q->max_netlen); - PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); - PUT_STAT_U32(MIN_NETLEN, q->min_netlen); - PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); - PUT_STAT_U32(ACTIVE_QUEUES, q->active_queues); + PUT_STAT_U64(CAPACITY_ESTIMATE64, READ_ONCE(q->avg_peak_bandwidth)); + PUT_STAT_U32(MEMORY_LIMIT, READ_ONCE(q->buffer_limit)); + PUT_STAT_U32(MEMORY_USED, READ_ONCE(q->buffer_max_used)); + PUT_STAT_U32(AVG_NETOFF, ((READ_ONCE(q->avg_netoff) + 0x8000) >> 16)); + PUT_STAT_U32(MAX_NETLEN, READ_ONCE(q->max_netlen)); + PUT_STAT_U32(MAX_ADJLEN, READ_ONCE(q->max_adjlen)); + PUT_STAT_U32(MIN_NETLEN, READ_ONCE(q->min_netlen)); + PUT_STAT_U32(MIN_ADJLEN, READ_ONCE(q->min_adjlen)); + PUT_STAT_U32(ACTIVE_QUEUES, READ_ONCE(q->active_queues)); #undef PUT_STAT_U32 #undef PUT_STAT_U64 @@ -3035,9 +3037,9 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) if (!ts) goto nla_put_failure; - PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps); - PUT_TSTAT_U64(SENT_BYTES64, b->bytes); - PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog); + PUT_TSTAT_U64(THRESHOLD_RATE64, READ_ONCE(b->tin_rate_bps)); + PUT_TSTAT_U64(SENT_BYTES64, READ_ONCE(b->bytes)); + PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog)); PUT_TSTAT_U32(TARGET_US, ktime_to_us(ns_to_ktime(b->cparams.target))); @@ -3304,10 +3306,10 @@ static int cake_mq_change(struct Qdisc *sch, struct nlattr *opt, struct cake_sched_data *qd = qdisc_priv(chld); if (overhead_changed) { - qd->max_netlen = 0; - qd->max_adjlen = 0; - qd->min_netlen = ~0; - qd->min_adjlen = ~0; + WRITE_ONCE(qd->max_netlen, 0); + WRITE_ONCE(qd->max_adjlen, 0); + WRITE_ONCE(qd->min_netlen, ~0); + WRITE_ONCE(qd->min_adjlen, ~0); } if (qd->tins) { -- cgit v1.2.3 From a6c95b833dc17e84d16a8ac0f40fd0931616a52d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Apr 2026 08:36:06 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_stats() (V) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_stats() runs without qdisc spinlock being held. In this final patch, I add READ_ONCE()/WRITE_ONCE() annotations for cparams.target and cparams.interval. Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: "Toke Høiland-Jørgensen" Link: https://patch.msgid.link/20260427083606.459355-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 975f5d6d6982..13c6d1869a14 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2356,10 +2356,11 @@ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, byte_target_ns = (byte_target * rate_ns) >> rate_shft; - b->cparams.target = max((byte_target_ns * 3) / 2, target_ns); - b->cparams.interval = max(rtt_est_ns + - b->cparams.target - target_ns, - b->cparams.target * 2); + WRITE_ONCE(b->cparams.target, + max((byte_target_ns * 3) / 2, target_ns)); + WRITE_ONCE(b->cparams.interval, + max(rtt_est_ns + b->cparams.target - target_ns, + b->cparams.target * 2)); b->cparams.mtu_time = byte_target_ns; b->cparams.p_inc = 1 << 24; /* 1/256 */ b->cparams.p_dec = 1 << 20; /* 1/4096 */ @@ -3042,9 +3043,9 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) PUT_TSTAT_U32(BACKLOG_BYTES, READ_ONCE(b->tin_backlog)); PUT_TSTAT_U32(TARGET_US, - ktime_to_us(ns_to_ktime(b->cparams.target))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.target)))); PUT_TSTAT_U32(INTERVAL_US, - ktime_to_us(ns_to_ktime(b->cparams.interval))); + ktime_to_us(ns_to_ktime(READ_ONCE(b->cparams.interval)))); PUT_TSTAT_U32(SENT_PACKETS, READ_ONCE(b->packets)); PUT_TSTAT_U32(DROPPED_PACKETS, READ_ONCE(b->tin_dropped)); -- cgit v1.2.3 From d62c6f2df5c0e1390b9a1f45b1b52689e3f234f0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 27 Apr 2026 07:30:35 -0700 Subject: netconsole: return count instead of strnlen(buf, count) from store callbacks Several configfs store callbacks in netconsole end with: ret = strnlen(buf, count); This under-reports the number of bytes consumed when the input contains an embedded NUL within count, telling the VFS that fewer bytes were written than userspace actually handed in. A conformant partial-write loop would then retry the trailing bytes against a callback that has already accepted them. Every other configfs driver in the tree returns count directly from its store callbacks once parsing has succeeded, including drivers/nvme/target/configfs.c, drivers/gpio/gpio-sim.c, drivers/most/configfs.c, drivers/block/null_blk/main.c, drivers/pci/endpoint/pci-ep-cfs.c, and the rest of the configfs users. netconsole was the outlier (along with drivers/infiniband/core/cma_configfs.c, which has the same latent issue). Align netconsole with the rest of the configfs ecosystem: return count once the parser/validator has accepted the input. The numeric and boolean parsers (kstrtobool, kstrtou16, mac_pton, netpoll_parse_ip_addr) have already validated the meaningful prefix; any trailing bytes are padding and should simply be reported as consumed. Fixes: 0bcc1816188e ("[NET] netconsole: Support dynamic reconfiguration using configfs") Reviewed-by: Simon Horman Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260427-netconsole_ai_fixes-v2-1-59965f29d9cc@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 205384dab89a..76d7fbf9e188 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -752,7 +752,7 @@ static ssize_t enabled_store(struct config_item *item, unregister_netcons_consoles(); } - ret = strnlen(buf, count); + ret = count; /* Deferred cleanup */ netconsole_process_cleanups(); out_unlock: @@ -781,7 +781,7 @@ static ssize_t release_store(struct config_item *item, const char *buf, nt->release = release; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -807,7 +807,7 @@ static ssize_t extended_store(struct config_item *item, const char *buf, goto out_unlock; nt->extended = extended; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -830,7 +830,7 @@ static ssize_t dev_name_store(struct config_item *item, const char *buf, trim_newline(nt->np.dev_name, IFNAMSIZ); dynamic_netconsole_mutex_unlock(); - return strnlen(buf, count); + return count; } static ssize_t local_port_store(struct config_item *item, const char *buf, @@ -849,7 +849,7 @@ static ssize_t local_port_store(struct config_item *item, const char *buf, ret = kstrtou16(buf, 10, &nt->np.local_port); if (ret < 0) goto out_unlock; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -871,7 +871,7 @@ static ssize_t remote_port_store(struct config_item *item, ret = kstrtou16(buf, 10, &nt->np.remote_port); if (ret < 0) goto out_unlock; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -896,7 +896,7 @@ static ssize_t local_ip_store(struct config_item *item, const char *buf, goto out_unlock; nt->np.ipv6 = !!ipv6; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -921,7 +921,7 @@ static ssize_t remote_ip_store(struct config_item *item, const char *buf, goto out_unlock; nt->np.ipv6 = !!ipv6; - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -957,7 +957,7 @@ static ssize_t remote_mac_store(struct config_item *item, const char *buf, goto out_unlock; memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN); - ret = strnlen(buf, count); + ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); return ret; @@ -1133,7 +1133,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_MSGID); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; @@ -1162,7 +1162,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_RELEASE); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; @@ -1191,7 +1191,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_TASKNAME); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; @@ -1225,7 +1225,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, disable_sysdata_feature(nt, SYSDATA_CPU_NR); unlock_ok: - ret = strnlen(buf, count); + ret = count; dynamic_netconsole_mutex_unlock(); mutex_unlock(&netconsole_subsys.su_mutex); return ret; -- cgit v1.2.3 From e6dd94252b0fa7b4fcc00577c6898432c5d97a08 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 27 Apr 2026 07:30:36 -0700 Subject: netconsole: avoid clobbering userdatum value on truncated write userdatum_value_store() bounds count by MAX_EXTRADATA_VALUE_LEN (200) and then copies straight into udm->value, which is itself 200 bytes: if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; ... ret = strscpy(udm->value, buf, sizeof(udm->value)); if (ret < 0) goto out_unlock; If userspace writes exactly MAX_EXTRADATA_VALUE_LEN bytes with no NUL within them, strscpy() copies 199 bytes plus a NUL into udm->value and returns -E2BIG. The function jumps to out_unlock and reports the error to userspace, but udm->value has already been overwritten with the truncated string and update_userdata() is skipped, so the corruption is not yet visible on the wire. The next successful write to any userdatum entry under the same target calls update_userdata(), which packs udm->value into the active netconsole payload. From that point on, every netconsole message carries the silently truncated value, and userspace has no indication that a previous, error-returning write left state behind. Tighten the entry check from "count > MAX_EXTRADATA_VALUE_LEN" to "count >= MAX_EXTRADATA_VALUE_LEN". With count strictly less than sizeof(udm->value), strscpy() can no longer return -E2BIG here, so the corrupting truncation path is removed entirely. Fixes: 8a6d5fec6c7f ("net: netconsole: add a userdata config_group member to netconsole_target") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260427-netconsole_ai_fixes-v2-2-59965f29d9cc@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 76d7fbf9e188..595e09bd1ccf 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1076,15 +1076,13 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, struct userdata *ud; ssize_t ret; - if (count > MAX_EXTRADATA_VALUE_LEN) + if (count >= MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; mutex_lock(&netconsole_subsys.su_mutex); dynamic_netconsole_mutex_lock(); - - ret = strscpy(udm->value, buf, sizeof(udm->value)); - if (ret < 0) - goto out_unlock; + /* count is bounded above, so strscpy() cannot truncate here */ + strscpy(udm->value, buf, sizeof(udm->value)); trim_newline(udm->value, sizeof(udm->value)); ud = to_userdata(item->ci_parent); -- cgit v1.2.3 From 92ceb7bff62c2606f664c204750eca0b85d44112 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 27 Apr 2026 07:30:37 -0700 Subject: netconsole: propagate device name truncation in dev_name_store() dev_name_store() calls strscpy(nt->np.dev_name, buf, IFNAMSIZ) without checking the return value. If userspace writes an interface name longer than IFNAMSIZ - 1, strscpy() silently truncates and returns -E2BIG, but the function ignores it and reports a fully successful write back to userspace. If a real interface happens to match the truncated name, netconsole will bind to the wrong device on the next enable, sending kernel logs and panic output to an unintended network segment with no indication to userspace that anything was rewritten. Reject writes whose length cannot fit in nt->np.dev_name up front: if (count >= IFNAMSIZ) return -ENAMETOOLONG; This is not a big deal of a problem, but, it is still the correct approach. Fixes: 0bcc1816188e57 ("[NET] netconsole: Support dynamic reconfiguration using configfs") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260427-netconsole_ai_fixes-v2-3-59965f29d9cc@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 595e09bd1ccf..b3b36e3ddd03 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -817,6 +817,13 @@ static ssize_t dev_name_store(struct config_item *item, const char *buf, size_t count) { struct netconsole_target *nt = to_target(item); + size_t len = count; + + /* Account for a trailing newline appended by tools like echo */ + if (len && buf[len - 1] == '\n') + len--; + if (len >= IFNAMSIZ) + return -ENAMETOOLONG; dynamic_netconsole_mutex_lock(); if (nt->state == STATE_ENABLED) { -- cgit v1.2.3 From 869cd6490fafe09c89a15d01610e8a03932d79f0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 27 Apr 2026 07:30:38 -0700 Subject: netconsole: restore userdatum value on update_userdata() failure userdatum_value_store() updates udm->value first and only then calls update_userdata() to rebuild the on-the-wire payload. If update_userdata() fails (e.g. -ENOMEM from kmalloc), the function returns the error to userspace, but udm->value already holds the new string while the live nt->userdata buffer still reflects the old one. The next successful write to any sibling userdatum on the same target will call update_userdata() again, which walks every entry and packs the now-stale udm->value into the payload. The failed write is thus silently activated later, with no indication to userspace that the value it tried to set was rejected. Snapshot the previous value before overwriting udm->value and restore it if update_userdata() fails so the visible state and the active payload stay consistent. Fixes: eb83801af2dc ("netconsole: Dynamic allocation of userdata buffer") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260427-netconsole_ai_fixes-v2-4-59965f29d9cc@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index b3b36e3ddd03..57dd6821a8aa 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1079,6 +1079,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, size_t count) { struct userdatum *udm = to_userdatum(item); + char old_value[MAX_EXTRADATA_VALUE_LEN]; struct netconsole_target *nt; struct userdata *ud; ssize_t ret; @@ -1088,6 +1089,8 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, mutex_lock(&netconsole_subsys.su_mutex); dynamic_netconsole_mutex_lock(); + /* Snapshot for rollback if update_userdata() fails below */ + strscpy(old_value, udm->value, sizeof(old_value)); /* count is bounded above, so strscpy() cannot truncate here */ strscpy(udm->value, buf, sizeof(udm->value)); trim_newline(udm->value, sizeof(udm->value)); @@ -1095,8 +1098,11 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, ud = to_userdata(item->ci_parent); nt = userdata_to_target(ud); ret = update_userdata(nt); - if (ret < 0) + if (ret < 0) { + /* Restore the previous value so it matches the live payload */ + strscpy(udm->value, old_value, sizeof(udm->value)); goto out_unlock; + } ret = count; out_unlock: dynamic_netconsole_mutex_unlock(); -- cgit v1.2.3 From 5f95c21fc23a7ef22b4d27d1ed9bb55557ffb926 Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Mon, 27 Apr 2026 21:54:33 +0200 Subject: mptcp: sockopt: set timestamp flags on subflow socket, not msk Both mptcp_setsockopt_sol_socket_tstamp() and mptcp_setsockopt_sol_socket_timestamping() iterate over subflows, acquire the subflow socket lock, but then erroneously pass the MPTCP msk socket to sock_set_timestamp() / sock_set_timestamping() instead of the subflow ssk. As a result, the timestamp flags are set on the wrong socket and have no effect on the actual subflows. Pass ssk instead of sk to both helpers. Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows") Cc: stable@vger.kernel.org Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-1-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index de90a2897d2d..79db15903e7a 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -161,7 +161,7 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamp(sk, optname, !!val); + sock_set_timestamp(ssk, optname, !!val); unlock_sock_fast(ssk, slow); } @@ -237,7 +237,7 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); - sock_set_timestamping(sk, optname, timestamping); + sock_set_timestamping(ssk, optname, timestamping); unlock_sock_fast(ssk, slow); } -- cgit v1.2.3 From b5c52908d52c6c8eb8933264aa6087a0600fd892 Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Mon, 27 Apr 2026 21:54:34 +0200 Subject: mptcp: fix scheduling with atomic in timestamp sockopt Using lock_sock_fast() (atomic context) around sock_set_timestamp() and sock_set_timestamping() is unsafe, as both helpers can sleep. Replace lock_sock_fast() with sleepable lock_sock()/release_sock() to avoid scheduling while atomic panic. Fixes: 9061f24bf82e ("mptcp: sockopt: propagate timestamp request to subflows") Cc: stable@vger.kernel.org Reported-by: Sashiko Closes: https://sashiko.dev/#/patchset/20260420093343.16443-1-gang.yan@linux.dev Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-2-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 79db15903e7a..0efe40be2fde 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -159,10 +159,10 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + lock_sock(ssk); sock_set_timestamp(ssk, optname, !!val); - unlock_sock_fast(ssk, slow); + release_sock(ssk); } release_sock(sk); @@ -235,10 +235,10 @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + lock_sock(ssk); sock_set_timestamping(ssk, optname, timestamping); - unlock_sock_fast(ssk, slow); + release_sock(ssk); } release_sock(sk); -- cgit v1.2.3 From f14d6e9c3678a067f304abba561e0c5446c7e845 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 27 Apr 2026 21:54:35 +0200 Subject: mptcp: fastclose msk when linger time is 0 The SO_LINGER socket option has been supported for a while with MPTCP sockets [1], but it didn't cause the equivalent of a TCP reset as expected when enabled and its time was set to 0. This was causing some behavioural differences with TCP where some connections were not promptly stopped as expected. To fix that, an extra condition is checked at close() time before sending an MP_FASTCLOSE, the MPTCP equivalent of a TCP reset. Note that backporting up to [1] will be difficult as more changes are needed to be able to send MP_FASTCLOSE. It seems better to stop at [2], which was supposed to already imitate TCP. Validated with MPTCP packetdrill tests [3]. Fixes: 268b12387460 ("mptcp: setsockopt: support SO_LINGER") [1] Fixes: d21f83485518 ("mptcp: use fastclose on more edge scenarios") [2] Cc: stable@vger.kernel.org Reported-by: Lance Tuller Closes: https://github.com/lance0/xfr/pull/67 Link: https://github.com/multipath-tcp/packetdrill/pull/196 [3] Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-3-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 718e910ff23f..4546a8b09884 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3302,7 +3302,8 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_data_avail(msk) || timeout < 0) { + if (mptcp_data_avail(msk) || timeout < 0 || + (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ -- cgit v1.2.3 From 1774d3cf3cf17baaf30c095606cda496268283b3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 27 Apr 2026 21:54:36 +0200 Subject: mptcp: pm: kernel: reset fullmesh counter after flush This variable counts how many MPTCP endpoints have a 'fullmesh' flag set. After having flushed all MPTCP endpoints, it is then needed to reset this counter. Without this reset, this counter exposed to the userspace is wrong, but also non-fullmesh endpoints added after the flush will not be taken into account to create subflows in reaction to ADD_ADDRs. Fixes: f88191c7f361 ("mptcp: pm: in-kernel: record fullmesh endp nb") Cc: stable@vger.kernel.org Reported-by: Sashiko Closes: https://sashiko.dev/#/patchset/20260422-mptcp-inc-limits-v6-0-903181771530%40kernel.org?part=15 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260427-net-mptcp-misc-fixes-7-1-rc2-v1-4-7432b7f279fa@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 0ebf43be9939..c9f1e5af3cd3 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -1278,6 +1278,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet) WRITE_ONCE(pernet->endp_signal_max, 0); WRITE_ONCE(pernet->endp_subflow_max, 0); WRITE_ONCE(pernet->endp_laminar_max, 0); + WRITE_ONCE(pernet->endp_fullmesh_max, 0); pernet->endpoints = 0; } -- cgit v1.2.3 From 3e75021f615ceee8562e6455c335936b39929ffb Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Fri, 24 Apr 2026 16:20:32 +0800 Subject: clk: spacemit: k3: mark top_dclk as CLK_IS_CRITICAL top_dclk is the DDR bus clock. If it is gated by clk_disable_unused, all memory-mapped bus transactions cease to function, causing DMA engines to hang and general system instability. Mark it CLK_IS_CRITICAL so the CCF never gates it during the unused clock sweep. Fixes: e371a77255b8 ("clk: spacemit: k3: add the clock tree") Reviewed-by: Brian Masney Signed-off-by: Troy Mitchell Signed-off-by: Stephen Boyd --- drivers/clk/spacemit/ccu-k3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/spacemit/ccu-k3.c b/drivers/clk/spacemit/ccu-k3.c index e98afd59f05c..bb8b75bdbdb3 100644 --- a/drivers/clk/spacemit/ccu-k3.c +++ b/drivers/clk/spacemit/ccu-k3.c @@ -846,7 +846,7 @@ static const struct clk_parent_data top_parents[] = { CCU_PARENT_HW(pll6_d3), }; CCU_MUX_DIV_GATE_FC_DEFINE(top_dclk, top_parents, APMU_TOP_DCLK_CTRL, 5, 3, - BIT(8), 2, 3, BIT(1), 0); + BIT(8), 2, 3, BIT(1), CLK_IS_CRITICAL); static const struct clk_parent_data ucie_parents[] = { CCU_PARENT_HW(pll1_d8_307p2), -- cgit v1.2.3 From 79a1886be1564a009cd2a003bada15ed6153f819 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 25 Feb 2026 17:55:05 +0100 Subject: clk: eyeq: use the auxiliary device creation helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auxiliary device creation of this driver is simple enough to use the available auxiliary device creation helper. Use it and remove some boilerplate code. Tested-by: Théo Lebrun # On Mobileye EyeQ5 Signed-off-by: Jerome Brunet Reviewed-by: Luca Ceresoli Signed-off-by: Théo Lebrun Signed-off-by: Stephen Boyd --- drivers/clk/clk-eyeq.c | 57 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 45 deletions(-) diff --git a/drivers/clk/clk-eyeq.c b/drivers/clk/clk-eyeq.c index c1dccedf8d5b..ae1aaf274336 100644 --- a/drivers/clk/clk-eyeq.c +++ b/drivers/clk/clk-eyeq.c @@ -321,38 +321,18 @@ static void eqc_probe_init_fixed_factors(struct device *dev, } } -static void eqc_auxdev_release(struct device *dev) -{ - struct auxiliary_device *adev = to_auxiliary_dev(dev); - - kfree(adev); -} - -static int eqc_auxdev_create(struct device *dev, void __iomem *base, - const char *name, u32 id) +static void eqc_auxdev_create_optional(struct device *dev, void __iomem *base, + const char *name) { struct auxiliary_device *adev; - int ret; - - adev = kzalloc_obj(*adev); - if (!adev) - return -ENOMEM; - - adev->name = name; - adev->dev.parent = dev; - adev->dev.platform_data = (void __force *)base; - adev->dev.release = eqc_auxdev_release; - adev->id = id; - ret = auxiliary_device_init(adev); - if (ret) - return ret; - - ret = auxiliary_device_add(adev); - if (ret) - auxiliary_device_uninit(adev); - - return ret; + if (name) { + adev = devm_auxiliary_device_create(dev, name, + (void __force *)base); + if (!adev) + dev_warn(dev, "failed creating auxiliary device %s.%s\n", + KBUILD_MODNAME, name); + } } static int eqc_probe(struct platform_device *pdev) @@ -364,7 +344,6 @@ static int eqc_probe(struct platform_device *pdev) unsigned int i, clk_count; struct resource *res; void __iomem *base; - int ret; data = device_get_match_data(dev); if (!data) @@ -378,21 +357,9 @@ static int eqc_probe(struct platform_device *pdev) if (!base) return -ENOMEM; - /* Init optional reset auxiliary device. */ - if (data->reset_auxdev_name) { - ret = eqc_auxdev_create(dev, base, data->reset_auxdev_name, 0); - if (ret) - dev_warn(dev, "failed creating auxiliary device %s.%s: %d\n", - KBUILD_MODNAME, data->reset_auxdev_name, ret); - } - - /* Init optional pinctrl auxiliary device. */ - if (data->pinctrl_auxdev_name) { - ret = eqc_auxdev_create(dev, base, data->pinctrl_auxdev_name, 0); - if (ret) - dev_warn(dev, "failed creating auxiliary device %s.%s: %d\n", - KBUILD_MODNAME, data->pinctrl_auxdev_name, ret); - } + /* Init optional auxiliary devices. */ + eqc_auxdev_create_optional(dev, base, data->reset_auxdev_name); + eqc_auxdev_create_optional(dev, base, data->pinctrl_auxdev_name); if (data->pll_count + data->div_count + data->fixed_factor_count == 0) return 0; /* Zero clocks, we are done. */ -- cgit v1.2.3 From a25ab518f355e1f0dcbea24ee26418dfcd6944b5 Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Wed, 25 Feb 2026 17:55:06 +0100 Subject: clk: eyeq: add EyeQ5 children auxiliary device for generic PHYs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Grow our clk-eyeq family; it knows how to spawn reset provider and pin controller children. Expand with a generic PHY driver on EyeQ5. Reviewed-by: Luca Ceresoli Signed-off-by: Théo Lebrun Signed-off-by: Stephen Boyd --- drivers/clk/clk-eyeq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/clk-eyeq.c b/drivers/clk/clk-eyeq.c index ae1aaf274336..d9303c2c7aa5 100644 --- a/drivers/clk/clk-eyeq.c +++ b/drivers/clk/clk-eyeq.c @@ -110,6 +110,7 @@ struct eqc_match_data { const char *reset_auxdev_name; const char *pinctrl_auxdev_name; + const char *eth_phy_auxdev_name; unsigned int early_clk_count; }; @@ -360,6 +361,7 @@ static int eqc_probe(struct platform_device *pdev) /* Init optional auxiliary devices. */ eqc_auxdev_create_optional(dev, base, data->reset_auxdev_name); eqc_auxdev_create_optional(dev, base, data->pinctrl_auxdev_name); + eqc_auxdev_create_optional(dev, base, data->eth_phy_auxdev_name); if (data->pll_count + data->div_count + data->fixed_factor_count == 0) return 0; /* Zero clocks, we are done. */ @@ -520,6 +522,7 @@ static const struct eqc_match_data eqc_eyeq5_match_data = { .reset_auxdev_name = "reset", .pinctrl_auxdev_name = "pinctrl", + .eth_phy_auxdev_name = "phy", .early_clk_count = ARRAY_SIZE(eqc_eyeq5_early_plls) + ARRAY_SIZE(eqc_eyeq5_early_fixed_factors), -- cgit v1.2.3 From 4ac170432cf74b753cf59bcd0d449dced48585da Mon Sep 17 00:00:00 2001 From: Théo Lebrun Date: Wed, 25 Feb 2026 17:55:07 +0100 Subject: reset: eyeq: drop device_set_of_node_from_dev() done by parent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our parent driver (clk-eyeq) now does the device_set_of_node_from_dev(dev, dev->parent) call through the newly introduced devm_auxiliary_device_create() helper. Doing it again in the reset-eyeq probe would be redundant. Drop both the WARN_ON() and the device_set_of_node_from_dev() call. Also fix the following comment that talks about "our newfound OF node". Signed-off-by: Jerome Brunet Reviewed-by: Philipp Zabel Acked-by: Philipp Zabel Signed-off-by: Théo Lebrun Signed-off-by: Stephen Boyd --- drivers/reset/reset-eyeq.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/reset/reset-eyeq.c b/drivers/reset/reset-eyeq.c index 791b7283111e..1a3857983897 100644 --- a/drivers/reset/reset-eyeq.c +++ b/drivers/reset/reset-eyeq.c @@ -422,13 +422,6 @@ static int eqr_of_xlate_twocells(struct reset_controller_dev *rcdev, return eqr_of_xlate_internal(rcdev, reset_spec->args[0], reset_spec->args[1]); } -static void eqr_of_node_put(void *_dev) -{ - struct device *dev = _dev; - - of_node_put(dev->of_node); -} - static int eqr_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -439,21 +432,8 @@ static int eqr_probe(struct auxiliary_device *adev, int ret; /* - * We are an auxiliary device of clk-eyeq. We do not have an OF node by - * default; let's reuse our parent's OF node. - */ - WARN_ON(dev->of_node); - device_set_of_node_from_dev(dev, dev->parent); - if (!dev->of_node) - return -ENODEV; - - ret = devm_add_action_or_reset(dev, eqr_of_node_put, dev); - if (ret) - return ret; - - /* - * Using our newfound OF node, we can get match data. We cannot use - * device_get_match_data() because it does not match reused OF nodes. + * Get match data. We cannot use device_get_match_data() because it does + * not accept reused OF nodes; see device_set_of_node_from_dev(). */ match = of_match_node(dev->driver->of_match_table, dev->of_node); if (!match || !match->data) -- cgit v1.2.3 From 6b4afbaaa342eaa52172e0be5ef8d1fcbf9ff460 Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Wed, 29 Apr 2026 09:38:47 +0800 Subject: ASoC: spacemit: move hw constraints from hw_params to startup Hardware constraints should be applied in the startup callback rather than hw_params, as hw_params may be called too late for the constraints to take effect properly. Move the channel count and format constraints for I2S and DSP_A/DSP_B modes into a new startup callback. This also tightens the I2S mode channel constraint from 1-2 to exactly 2, matching the actual hardware behavior. Signed-off-by: Troy Mitchell Link: https://patch.msgid.link/20260429-k3-i2s-v1-2-2fe99db11ecb@linux.spacemit.com Signed-off-by: Mark Brown --- sound/soc/spacemit/k1_i2s.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/sound/soc/spacemit/k1_i2s.c b/sound/soc/spacemit/k1_i2s.c index 1cb99f1abc7c..bb73d32a1b09 100644 --- a/sound/soc/spacemit/k1_i2s.c +++ b/sound/soc/spacemit/k1_i2s.c @@ -106,6 +106,37 @@ static void spacemit_i2s_init(struct spacemit_i2s_dev *i2s) writel(0, i2s->base + SSINTEN); } +static int spacemit_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct spacemit_i2s_dev *i2s = snd_soc_dai_get_drvdata(dai); + + switch (i2s->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, + 2, 2); + snd_pcm_hw_constraint_mask64(substream->runtime, + SNDRV_PCM_HW_PARAM_FORMAT, + SNDRV_PCM_FMTBIT_S16_LE); + break; + case SND_SOC_DAIFMT_DSP_A: + case SND_SOC_DAIFMT_DSP_B: + snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, + 1, 1); + snd_pcm_hw_constraint_mask64(substream->runtime, + SNDRV_PCM_HW_PARAM_FORMAT, + SNDRV_PCM_FMTBIT_S32_LE); + break; + default: + dev_dbg(i2s->dev, "unexpected format type"); + return -EINVAL; + } + + return 0; +} + static int spacemit_i2s_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -157,22 +188,9 @@ static int spacemit_i2s_hw_params(struct snd_pcm_substream *substream, dma_data->maxburst = 32; dma_data->addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; } - - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_CHANNELS, - 1, 2); - snd_pcm_hw_constraint_mask64(substream->runtime, - SNDRV_PCM_HW_PARAM_FORMAT, - SNDRV_PCM_FMTBIT_S16_LE); break; case SND_SOC_DAIFMT_DSP_A: case SND_SOC_DAIFMT_DSP_B: - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_CHANNELS, - 1, 1); - snd_pcm_hw_constraint_mask64(substream->runtime, - SNDRV_PCM_HW_PARAM_FORMAT, - SNDRV_PCM_FMTBIT_S32_LE); break; default: dev_dbg(i2s->dev, "unexpected format type"); @@ -303,6 +321,7 @@ static int spacemit_i2s_dai_remove(struct snd_soc_dai *dai) static const struct snd_soc_dai_ops spacemit_i2s_dai_ops = { .probe = spacemit_i2s_dai_probe, .remove = spacemit_i2s_dai_remove, + .startup = spacemit_i2s_startup, .hw_params = spacemit_i2s_hw_params, .set_sysclk = spacemit_i2s_set_sysclk, .set_fmt = spacemit_i2s_set_fmt, -- cgit v1.2.3 From 03dcb5b68a96b51157ec2d17042fa2f0106828ae Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Wed, 29 Apr 2026 09:38:48 +0800 Subject: ASoC: spacemit: adjust FIFO trigger threshold to half FIFO size Set both TX and RX FIFO trigger thresholds (TFT/RFT) to 0xF (half of the 32-entry FIFO) instead of 5. This provides better DMA efficiency by allowing more data to accumulate before triggering a DMA request, reducing the number of DMA transactions needed. Signed-off-by: Troy Mitchell Link: https://patch.msgid.link/20260429-k3-i2s-v1-3-2fe99db11ecb@linux.spacemit.com Signed-off-by: Mark Brown --- sound/soc/spacemit/k1_i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/spacemit/k1_i2s.c b/sound/soc/spacemit/k1_i2s.c index bb73d32a1b09..43481f387c44 100644 --- a/sound/soc/spacemit/k1_i2s.c +++ b/sound/soc/spacemit/k1_i2s.c @@ -93,8 +93,8 @@ static void spacemit_i2s_init(struct spacemit_i2s_dev *i2s) u32 sscr_val, sspsp_val, ssfcr_val, ssrwt_val; sscr_val = SSCR_TRAIL | SSCR_FRF_PSP; - ssfcr_val = FIELD_PREP(SSFCR_FIELD_TFT, 5) | - FIELD_PREP(SSFCR_FIELD_RFT, 5) | + ssfcr_val = FIELD_PREP(SSFCR_FIELD_TFT, 0xF) | + FIELD_PREP(SSFCR_FIELD_RFT, 0xF) | SSFCR_RSRE | SSFCR_TSRE; ssrwt_val = SSRWT_RWOT; sspsp_val = SSPSP_SFRMP; -- cgit v1.2.3 From 69056753231f483cc1e40db52228aac42fd4c93d Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Wed, 15 Apr 2026 16:30:49 -0400 Subject: MAINTAINERS: add myself as a reviewer for the clk subsystem I've reviewed a lot clk patches for parts of the subsystem that typically doesn't get much review. Add myself as a reviewer so that I don't miss anything. Link: https://lore.kernel.org/linux-clk/?q=f%3Abmasney%40redhat.com Signed-off-by: Brian Masney Signed-off-by: Stephen Boyd --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..384ce5ee7323 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6358,6 +6358,7 @@ F: include/uapi/linux/comedi.h COMMON CLK FRAMEWORK M: Michael Turquette M: Stephen Boyd +R: Brian Masney L: linux-clk@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-clk/list/ -- cgit v1.2.3 From de019f203b0d472c98ead4081ad4f05d92c9b826 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Apr 2026 11:50:27 +0200 Subject: clk: rk808: fix OF node reference imbalance The driver reuses the OF node of the parent multi-function device but fails to take another reference to balance the one dropped by the platform bus code when unbinding the MFD and deregistering the child devices. Fix this by using the intended helper for reusing OF nodes. Fixes: 2dc51ca822e4 ("clk: RK808: Reduce 'struct rk808' usage") Cc: stable@vger.kernel.org # 6.5 Cc: Sebastian Reichel Signed-off-by: Johan Hovold Reviewed-by: Sebastian Reichel Reviewed-by: Brian Masney Reviewed-by: Heiko Stuebner Signed-off-by: Stephen Boyd --- drivers/clk/clk-rk808.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-rk808.c b/drivers/clk/clk-rk808.c index f7412b137e5e..5a75b5c91555 100644 --- a/drivers/clk/clk-rk808.c +++ b/drivers/clk/clk-rk808.c @@ -153,7 +153,7 @@ static int rk808_clkout_probe(struct platform_device *pdev) struct rk808_clkout *rk808_clkout; int ret; - dev->of_node = pdev->dev.parent->of_node; + device_set_of_node_from_dev(dev, dev->parent); rk808_clkout = devm_kzalloc(dev, sizeof(*rk808_clkout), GFP_KERNEL); -- cgit v1.2.3 From 2d80392a97cf205a766d75539b4c814a4f5e7490 Mon Sep 17 00:00:00 2001 From: Abhinav Mahadevan Date: Tue, 28 Apr 2026 21:20:00 +0530 Subject: ALSA: usb-audio: Fix quirk entry placement for PreSonus AudioBox USB The quirk entry for PreSonus AudioBox USB was mistakenly placed inside a disabled #if 0 block. Move it to the correct position after the Fixes: 34fe4a9df247 ("ALSA: usb-audio: Add quirk for PreSonus AudioBox USB") Signed-off-by: Abhinav Mahadevan Link: https://patch.msgid.link/20260428155117.5170-1-abhi220204@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 803e03d4d77b..4e9cfff4047f 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2652,6 +2652,9 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, + +#endif /* disabled */ + { /* * The AudioBox USB advertises S24_3LE as the only supported format @@ -2700,7 +2703,6 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, -#endif /* disabled */ { /* -- cgit v1.2.3 From 077c593dacf7ee33511468e4f29417d795cf07a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Apr 2026 08:17:56 +0200 Subject: ALSA: hda: Avoid WARN_ON() for HDMI chmap slot checks At parsing the channel mapping for HDMI, the current code may spew WARN_ON() unnecessarily for the case where only invalid (zero) channel maps are given from the hardware. Drop WARN_ON() and reorganize the code a bit for avoiding the hdmi_slot over the array size. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221390 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260428061800.80527-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/core/hdmi_chmap.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/hda/core/hdmi_chmap.c b/sound/hda/core/hdmi_chmap.c index 7b276047f85a..c897fc443467 100644 --- a/sound/hda/core/hdmi_chmap.c +++ b/sound/hda/core/hdmi_chmap.c @@ -353,13 +353,16 @@ static void hdmi_std_setup_channel_mapping(struct hdac_chmap *chmap, if (hdmi_channel_mapping[ca][1] == 0) { int hdmi_slot = 0; /* fill actual channel mappings in ALSA channel (i) order */ - for (i = 0; i < ch_alloc->channels; i++) { - while (!WARN_ON(hdmi_slot >= 8) && - !ch_alloc->speakers[7 - hdmi_slot]) - hdmi_slot++; /* skip zero slots */ + for (i = 0; i < ch_alloc->channels && hdmi_slot < 8; i++) { + while (!ch_alloc->speakers[7 - hdmi_slot]) { + /* skip zero slots */ + if (++hdmi_slot >= 8) + goto out; + } hdmi_channel_mapping[ca][i] = (i << 4) | hdmi_slot++; } + out: /* fill the rest of the slots with ALSA channel 0xf */ for (hdmi_slot = 0; hdmi_slot < 8; hdmi_slot++) if (!ch_alloc->speakers[7 - hdmi_slot]) -- cgit v1.2.3 From b0e2333a231107adedd38c6fcfe1adc6162716fc Mon Sep 17 00:00:00 2001 From: wangdicheng Date: Tue, 28 Apr 2026 16:04:50 +0800 Subject: ALSA: hda/conexant: Fix missing error check for jack detection In cx_probe(), the return value of snd_hda_jack_detect_enable_callback() is ignored. This function returns a pointer, and if it fails (e.g., due to memory allocation failure), it returns an error pointer which must be checked using IS_ERR(). If the registration fails, the driver continues to probe, but the jack detection callback will not be registered. This can lead to a kernel crash later when the driver attempts to handle jack events or accesses the uninitialized structure. Check the return value using IS_ERR() and propagate the error via PTR_ERR() to the probe caller. Fixes: 7aeb25908648 ("ALSA: hda/conexant: Fix headset auto detect fail in cx8070 and SN6140") Signed-off-by: wangdicheng Link: https://patch.msgid.link/20260428080450.108801-1-wangdich9700@163.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/conexant.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/conexant.c b/sound/hda/codecs/conexant.c index 3a9717df39b4..e3b6aaabe3a9 100644 --- a/sound/hda/codecs/conexant.c +++ b/sound/hda/codecs/conexant.c @@ -1175,6 +1175,7 @@ static void add_cx5051_fake_mutes(struct hda_codec *codec) static int cx_probe(struct hda_codec *codec, const struct hda_device_id *id) { struct conexant_spec *spec; + struct hda_jack_callback *callback; int err; codec_info(codec, "%s: BIOS auto-probing.\n", codec->core.chip_name); @@ -1190,7 +1191,12 @@ static int cx_probe(struct hda_codec *codec, const struct hda_device_id *id) case 0x14f11f86: case 0x14f11f87: spec->is_cx11880_sn6140 = true; - snd_hda_jack_detect_enable_callback(codec, 0x19, cx_update_headset_mic_vref); + callback = snd_hda_jack_detect_enable_callback(codec, 0x19, + cx_update_headset_mic_vref); + if (IS_ERR(callback)) { + err = PTR_ERR(callback); + goto error; + } break; } -- cgit v1.2.3 From 90df4957a3271adf391b3432cd76a40887cf3273 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 28 Apr 2026 14:05:31 +0100 Subject: ALSA: hda: cs35l56: Fix uninitialized value in cs35l56_hda_read_acpi() Eliminate the uninitialized 'nval' in cs35l56_hda_read_acpi() if a system-specific quirk overrides processing of the dev-index property. The value is now stored in a new 'num_amps' member of struct cs35l56_hda so that the quirk handler can set the value. The quirk for the Lenovo Yoga Book 9i GenX replaces the values from the dev-index property with hardcoded indexes. So cs35l56_hda_read_acpi() would then skip reading the property. But this left the 'nval' local variable uninitialized when it is later passed to cirrus_scodec_get_speaker_id(). Fixes: 40b1c2f9b299 ("ALSA: hda/cs35l56: Workaround bad dev-index on Lenovo Yoga Book 9i GenX") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-sound/aenFesLAStjrVNy8@stanley.mountain/T/#u Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260428130531.169600-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 12 +++++++----- sound/hda/codecs/side-codecs/cs35l56_hda.h | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index dc25960a4f23..4c8d01799931 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -976,6 +976,7 @@ static int cs35l56_hda_system_resume(struct device *dev) static int cs35l56_hda_fixup_yoga9(struct cs35l56_hda *cs35l56, int *bus_addr) { /* The cirrus,dev-index property has the wrong values */ + cs35l56->num_amps = 2; switch (*bus_addr) { case 0x30: cs35l56->index = 1; @@ -1025,7 +1026,6 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) char hid_string[8]; struct acpi_device *adev; const char *property, *sub; - size_t nval; int i, ret; /* @@ -1061,13 +1061,14 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) ret = -EINVAL; goto err; } - nval = ret; + cs35l56->num_amps = ret; - ret = device_property_read_u32_array(cs35l56->base.dev, property, values, nval); + ret = device_property_read_u32_array(cs35l56->base.dev, property, values, + cs35l56->num_amps); if (ret) goto err; - for (i = 0; i < nval; i++) { + for (i = 0; i < cs35l56->num_amps; i++) { if (values[i] == id) { cs35l56->index = i; break; @@ -1090,7 +1091,8 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) "Read ACPI _SUB failed(%ld): fallback to generic firmware\n", PTR_ERR(sub)); } else { - ret = cirrus_scodec_get_speaker_id(cs35l56->base.dev, cs35l56->index, nval, -1); + ret = cirrus_scodec_get_speaker_id(cs35l56->base.dev, cs35l56->index, + cs35l56->num_amps, -1); if (ret == -ENOENT) { cs35l56->system_name = sub; } else if (ret >= 0) { diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.h b/sound/hda/codecs/side-codecs/cs35l56_hda.h index cb4b5e7356a3..3705af7c186b 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.h +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.h @@ -26,6 +26,7 @@ struct cs35l56_hda { struct work_struct dsp_work; int index; + int num_amps; const char *system_name; const char *amp_name; -- cgit v1.2.3 From e052a1f7199260eda4d6ca08a59c3b98738f8491 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Wed, 29 Apr 2026 13:42:06 +0800 Subject: ALSA: hda/tas2781: Fix incorrect bit update for non-book-zero or book 0 pages >1 In TAS2781 SPI mode, when accessing non-book-zero or page numbers greater than 1 in book 0, an additional byte must be read. The first byte in such cases is a dummy byte and should be ignored. Fixes: 9fa6a693ad8d ("ALSA: hda/tas2781: Remove tas2781_spi_fwlib.c and leverage SND_SOC_TAS2781_FMWLIB") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20260429054206.429-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda_spi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda_spi.c b/sound/hda/codecs/side-codecs/tas2781_hda_spi.c index 560f2385212d..0e4f3553f273 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda_spi.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda_spi.c @@ -132,10 +132,18 @@ static int tasdevice_spi_dev_update_bits(struct tasdevice_priv *tas_priv, int ret, val; /* - * In our TAS2781 SPI mode, read/write was masked in last bit of - * address, it cause regmap_update_bits() not work as expected. + * In TAS2781 SPI mode, when accessing non-book-zero or page numbers + * greater than 1 in book 0, an additional byte must be read. The + * first byte in such cases is a dummy byte and should be ignored. */ - ret = tasdevice_dev_read(tas_priv, chn, reg, &val); + if ((TASDEVICE_BOOK_ID(reg) > 0) || (TASDEVICE_PAGE_ID(reg) > 1)) { + unsigned char buf[2]; + + ret = tasdevice_dev_bulk_read(tas_priv, chn, reg, buf, 2); + val = buf[1]; + } else { + ret = tasdevice_dev_read(tas_priv, chn, reg, &val); + } if (ret < 0) { dev_err(tas_priv->dev, "%s, E=%d\n", __func__, ret); return ret; -- cgit v1.2.3 From 883a32793c86091ea37bb84f88cc697d019e7a5d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Apr 2026 12:38:47 +0200 Subject: efi/libstub: Move efi_relocate_kernel() into its only remaining user LoongArch is the only arch that still uses efi_relocate_kernel(), so before making changes to it that LoongArch needs, turn it into a private function. Move efi_low_alloc_above() into mem.c while at it, and drop the relocate.c source file altogether. Tested-by: WANG Rui Reviewed-by: Thomas Huth Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile | 2 +- drivers/firmware/efi/libstub/efistub.h | 7 -- drivers/firmware/efi/libstub/loongarch-stub.c | 79 ++++++++++++ drivers/firmware/efi/libstub/mem.c | 82 +++++++++++++ drivers/firmware/efi/libstub/relocate.c | 166 -------------------------- 5 files changed, 162 insertions(+), 174 deletions(-) delete mode 100644 drivers/firmware/efi/libstub/relocate.c diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 983a438e35f3..cfedb3025c26 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -66,7 +66,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ file.o mem.o random.o randomalloc.o pci.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ - alignedmem.o relocate.o printk.o vsprintf.o + alignedmem.o printk.o vsprintf.o # include the stub's libfdt dependencies from lib/ when needed libfdt-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c \ diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 979a21818cc1..fd91fc15ec81 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1104,13 +1104,6 @@ efi_status_t efi_allocate_pages_aligned(unsigned long size, unsigned long *addr, efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min); -efi_status_t efi_relocate_kernel(unsigned long *image_addr, - unsigned long image_size, - unsigned long alloc_size, - unsigned long preferred_addr, - unsigned long alignment, - unsigned long min_addr); - efi_status_t efi_parse_options(char const *cmdline); void efi_parse_option_graphics(char *option); diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index 736b6aae323d..8c538a5243d9 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -14,6 +14,85 @@ extern int kernel_asize; extern int kernel_fsize; extern int kernel_entry; +/** + * efi_relocate_kernel() - copy memory area + * @image_addr: pointer to address of memory area to copy + * @image_size: size of memory area to copy + * @alloc_size: minimum size of memory to allocate, must be greater or + * equal to image_size + * @preferred_addr: preferred target address + * @alignment: minimum alignment of the allocated memory area. It + * should be a power of two. + * @min_addr: minimum target address + * + * Copy a memory area to a newly allocated memory area aligned according + * to @alignment but at least EFI_ALLOC_ALIGN. If the preferred address + * is not available, the allocated address will not be below @min_addr. + * On exit, @image_addr is updated to the target copy address that was used. + * + * This function is used to copy the Linux kernel verbatim. It does not apply + * any relocation changes. + * + * Return: status code + */ +static +efi_status_t efi_relocate_kernel(unsigned long *image_addr, + unsigned long image_size, + unsigned long alloc_size, + unsigned long preferred_addr, + unsigned long alignment, + unsigned long min_addr) +{ + unsigned long cur_image_addr; + unsigned long new_addr = 0; + efi_status_t status; + unsigned long nr_pages; + efi_physical_addr_t efi_addr = preferred_addr; + + if (!image_addr || !image_size || !alloc_size) + return EFI_INVALID_PARAMETER; + if (alloc_size < image_size) + return EFI_INVALID_PARAMETER; + + cur_image_addr = *image_addr; + + /* + * The EFI firmware loader could have placed the kernel image + * anywhere in memory, but the kernel has restrictions on the + * max physical address it can run at. Some architectures + * also have a preferred address, so first try to relocate + * to the preferred address. If that fails, allocate as low + * as possible while respecting the required alignment. + */ + nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &efi_addr); + new_addr = efi_addr; + /* + * If preferred address allocation failed allocate as low as + * possible. + */ + if (status != EFI_SUCCESS) { + status = efi_low_alloc_above(alloc_size, alignment, &new_addr, + min_addr); + } + if (status != EFI_SUCCESS) { + efi_err("Failed to allocate usable memory for kernel.\n"); + return status; + } + + /* + * We know source/dest won't overlap since both memory ranges + * have been allocated by UEFI, so we can safely use memcpy. + */ + memcpy((void *)new_addr, (void *)cur_image_addr, image_size); + + /* Return the new address of the relocated image. */ + *image_addr = new_addr; + + return status; +} + efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr, diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index 9c82259eea81..59f3f83de50c 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -124,3 +124,85 @@ void efi_free(unsigned long size, unsigned long addr) nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; efi_bs_call(free_pages, addr, nr_pages); } + +/** + * efi_low_alloc_above() - allocate pages at or above given address + * @size: size of the memory area to allocate + * @align: minimum alignment of the allocated memory area. It should + * a power of two. + * @addr: on exit the address of the allocated memory + * @min: minimum address to used for the memory allocation + * + * Allocate at the lowest possible address that is not below @min as + * EFI_LOADER_DATA. The allocated pages are aligned according to @align but at + * least EFI_ALLOC_ALIGN. The first allocated page will not below the address + * given by @min. + * + * Return: status code + */ +efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long min) +{ + struct efi_boot_memmap *map __free(efi_pool) = NULL; + efi_status_t status; + unsigned long nr_pages; + int i; + + status = efi_get_memory_map(&map, false); + if (status != EFI_SUCCESS) + return status; + + /* + * Enforce minimum alignment that EFI or Linux requires when + * requesting a specific address. We are doing page-based (or + * larger) allocations, and both the address and size must meet + * alignment constraints. + */ + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; + + size = round_up(size, EFI_ALLOC_ALIGN); + nr_pages = size / EFI_PAGE_SIZE; + for (i = 0; i < map->map_size / map->desc_size; i++) { + efi_memory_desc_t *desc; + unsigned long m = (unsigned long)map->map; + u64 start, end; + + desc = efi_memdesc_ptr(m, map->desc_size, i); + + if (desc->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (desc->attribute & EFI_MEMORY_HOT_PLUGGABLE) + continue; + + if (efi_soft_reserve_enabled() && + (desc->attribute & EFI_MEMORY_SP)) + continue; + + if (desc->num_pages < nr_pages) + continue; + + start = desc->phys_addr; + end = start + desc->num_pages * EFI_PAGE_SIZE; + + if (start < min) + start = min; + + start = round_up(start, align); + if ((start + size) > end) + continue; + + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, &start); + if (status == EFI_SUCCESS) { + *addr = start; + break; + } + } + + if (i == map->map_size / map->desc_size) + return EFI_NOT_FOUND; + + return EFI_SUCCESS; +} diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c deleted file mode 100644 index d4264bfb6dc1..000000000000 --- a/drivers/firmware/efi/libstub/relocate.c +++ /dev/null @@ -1,166 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include - -#include "efistub.h" - -/** - * efi_low_alloc_above() - allocate pages at or above given address - * @size: size of the memory area to allocate - * @align: minimum alignment of the allocated memory area. It should - * a power of two. - * @addr: on exit the address of the allocated memory - * @min: minimum address to used for the memory allocation - * - * Allocate at the lowest possible address that is not below @min as - * EFI_LOADER_DATA. The allocated pages are aligned according to @align but at - * least EFI_ALLOC_ALIGN. The first allocated page will not below the address - * given by @min. - * - * Return: status code - */ -efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, - unsigned long *addr, unsigned long min) -{ - struct efi_boot_memmap *map __free(efi_pool) = NULL; - efi_status_t status; - unsigned long nr_pages; - int i; - - status = efi_get_memory_map(&map, false); - if (status != EFI_SUCCESS) - return status; - - /* - * Enforce minimum alignment that EFI or Linux requires when - * requesting a specific address. We are doing page-based (or - * larger) allocations, and both the address and size must meet - * alignment constraints. - */ - if (align < EFI_ALLOC_ALIGN) - align = EFI_ALLOC_ALIGN; - - size = round_up(size, EFI_ALLOC_ALIGN); - nr_pages = size / EFI_PAGE_SIZE; - for (i = 0; i < map->map_size / map->desc_size; i++) { - efi_memory_desc_t *desc; - unsigned long m = (unsigned long)map->map; - u64 start, end; - - desc = efi_memdesc_ptr(m, map->desc_size, i); - - if (desc->type != EFI_CONVENTIONAL_MEMORY) - continue; - - if (desc->attribute & EFI_MEMORY_HOT_PLUGGABLE) - continue; - - if (efi_soft_reserve_enabled() && - (desc->attribute & EFI_MEMORY_SP)) - continue; - - if (desc->num_pages < nr_pages) - continue; - - start = desc->phys_addr; - end = start + desc->num_pages * EFI_PAGE_SIZE; - - if (start < min) - start = min; - - start = round_up(start, align); - if ((start + size) > end) - continue; - - status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, &start); - if (status == EFI_SUCCESS) { - *addr = start; - break; - } - } - - if (i == map->map_size / map->desc_size) - return EFI_NOT_FOUND; - - return EFI_SUCCESS; -} - -/** - * efi_relocate_kernel() - copy memory area - * @image_addr: pointer to address of memory area to copy - * @image_size: size of memory area to copy - * @alloc_size: minimum size of memory to allocate, must be greater or - * equal to image_size - * @preferred_addr: preferred target address - * @alignment: minimum alignment of the allocated memory area. It - * should be a power of two. - * @min_addr: minimum target address - * - * Copy a memory area to a newly allocated memory area aligned according - * to @alignment but at least EFI_ALLOC_ALIGN. If the preferred address - * is not available, the allocated address will not be below @min_addr. - * On exit, @image_addr is updated to the target copy address that was used. - * - * This function is used to copy the Linux kernel verbatim. It does not apply - * any relocation changes. - * - * Return: status code - */ -efi_status_t efi_relocate_kernel(unsigned long *image_addr, - unsigned long image_size, - unsigned long alloc_size, - unsigned long preferred_addr, - unsigned long alignment, - unsigned long min_addr) -{ - unsigned long cur_image_addr; - unsigned long new_addr = 0; - efi_status_t status; - unsigned long nr_pages; - efi_physical_addr_t efi_addr = preferred_addr; - - if (!image_addr || !image_size || !alloc_size) - return EFI_INVALID_PARAMETER; - if (alloc_size < image_size) - return EFI_INVALID_PARAMETER; - - cur_image_addr = *image_addr; - - /* - * The EFI firmware loader could have placed the kernel image - * anywhere in memory, but the kernel has restrictions on the - * max physical address it can run at. Some architectures - * also have a preferred address, so first try to relocate - * to the preferred address. If that fails, allocate as low - * as possible while respecting the required alignment. - */ - nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; - status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, &efi_addr); - new_addr = efi_addr; - /* - * If preferred address allocation failed allocate as low as - * possible. - */ - if (status != EFI_SUCCESS) { - status = efi_low_alloc_above(alloc_size, alignment, &new_addr, - min_addr); - } - if (status != EFI_SUCCESS) { - efi_err("Failed to allocate usable memory for kernel.\n"); - return status; - } - - /* - * We know source/dest won't overlap since both memory ranges - * have been allocated by UEFI, so we can safely use memcpy. - */ - memcpy((void *)new_addr, (void *)cur_image_addr, image_size); - - /* Return the new address of the relocated image. */ - *image_addr = new_addr; - - return status; -} -- cgit v1.2.3 From ad6f4f3ea72f866176f9dd6031c8778da088c686 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 27 Apr 2026 16:47:20 +0800 Subject: efi/loongarch: Implement efi_cache_sync_image() Provide a LoongArch implementation of efi_cache_sync_image() to ensure instruction cache coherency after the kernel image is relocated. Signed-off-by: WANG Rui Reviewed-by: Huacai Chen Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/loongarch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c index 9825f5218137..f7938d5c196a 100644 --- a/drivers/firmware/efi/libstub/loongarch.c +++ b/drivers/firmware/efi/libstub/loongarch.c @@ -18,6 +18,11 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; } +void efi_cache_sync_image(unsigned long image_base, unsigned long alloc_size) +{ + asm volatile ("ibar 0" ::: "memory"); +} + struct exit_boot_struct { efi_memory_desc_t *runtime_map; int runtime_entry_count; -- cgit v1.2.3 From cda92ac47c024d84f6b8294e462d6272039a10ac Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 27 Apr 2026 16:47:21 +0800 Subject: efi/libstub: Synchronize instruction cache after kernel relocation The relocated kernel image is copied to its new location using memcpy(). On architectures with separate instruction and data caches, the copied instructions may remain stale in the instruction cache, leading to the execution of outdated contents. Call efi_cache_sync_image() after the relocation copy to ensure the instruction cache is synchronized with the updated memory contents before control is transferred to the relocated kernel. Signed-off-by: WANG Rui Reviewed-by: Huacai Chen Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/loongarch-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index 8c538a5243d9..c87ac7025107 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -86,6 +86,7 @@ efi_status_t efi_relocate_kernel(unsigned long *image_addr, * have been allocated by UEFI, so we can safely use memcpy. */ memcpy((void *)new_addr, (void *)cur_image_addr, image_size); + efi_cache_sync_image(new_addr, image_size); /* Return the new address of the relocated image. */ *image_addr = new_addr; -- cgit v1.2.3 From bc7304f3ae20972d11db6e0b1b541c63feda5f05 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 28 Apr 2026 12:34:25 +0200 Subject: futex: Prevent lockup in requeue-PI during signal/ timeout wakeup During wait-requeue-pi (task A) and requeue-PI (task B) the following race can happen: Task A Task B futex_wait_requeue_pi() futex_setup_timer() futex_do_wait() futex_requeue() CLASS(hb, hb1)(&key1); CLASS(hb, hb2)(&key2); *timeout* futex_requeue_pi_wakeup_sync() requeue_state = Q_REQUEUE_PI_IGNORE *blocks on hb->lock* futex_proxy_trylock_atomic() futex_requeue_pi_prepare() Q_REQUEUE_PI_IGNORE => -EAGAIN double_unlock_hb(hb1, hb2) *retry* Task B acquires both hb locks and attempts to acquire the PI-lock of the top most waiter (task B). Task A is leaving early due to a signal/ timeout and started removing itself from the queue. It updates its requeue_state but can not remove it from the list because this requires the hb lock which is owned by task B. Usually task A is able to swoop the lock after task B unlocked it. However if task B is of higher priority then task A may not be able to wake up in time and acquire the lock before task B gets it again. Especially on a UP system where A is never scheduled. As a result task A blocks on the lock and task B busy loops, trying to make progress but live locks the system instead. Tragic. This can be fixed by removing the top most waiter from the list in this case. This allows task B to grab the next top waiter (if any) in the next iteration and make progress. Remove the top most waiter if futex_requeue_pi_prepare() fails. Let the waiter conditionally remove itself from the list in handle_early_requeue_pi_wakeup(). Fixes: 07d91ef510fb1 ("futex: Prevent requeue_pi() lock nesting issue on RT") Reported-by: Moritz Klammler Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260428103425.dywXyPd3@linutronix.de Closes: https://lore.kernel.org/all/VE1PR06MB6894BE61C173D802365BE19DFF4CA@VE1PR06MB6894.eurprd06.prod.outlook.com --- kernel/futex/requeue.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index d818b4d47f1b..b597cb3d17fc 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -319,8 +319,11 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, return -EINVAL; /* Ensure that this does not race against an early wakeup */ - if (!futex_requeue_pi_prepare(top_waiter, NULL)) + if (!futex_requeue_pi_prepare(top_waiter, NULL)) { + plist_del(&top_waiter->list, &hb1->chain); + futex_hb_waiters_dec(hb1); return -EAGAIN; + } /* * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit @@ -722,10 +725,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, /* * We were woken prior to requeue by a timeout or a signal. - * Unqueue the futex_q and determine which it was. + * Conditionally unqueue the futex_q and determine which it was. */ - plist_del(&q->list, &hb->chain); - futex_hb_waiters_dec(hb); + if (!plist_node_empty(&q->list)) { + plist_del(&q->list, &hb->chain); + futex_hb_waiters_dec(hb); + } /* Handle spurious wakeups gracefully */ ret = -EWOULDBLOCK; -- cgit v1.2.3 From 28465227c80fe417b4013c432be1f3737cb9f9a3 Mon Sep 17 00:00:00 2001 From: Ruijie Li Date: Wed, 29 Apr 2026 00:41:43 +0800 Subject: xfrm: provide message size for XFRM_MSG_MAPPING The compat 64=>32 translation path handles XFRM_MSG_MAPPING, but xfrm_msg_min[] does not provide the native payload size for this message type. Add the missing XFRM_MSG_MAPPING entry so compat translation can size and translate mapping notifications correctly. Fixes: 5461fc0c8d9f ("xfrm/compat: Add 64=>32-bit messages translator") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruijie Li Signed-off-by: Ren Wei Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d56450f61669..38a90e5ee3d9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3323,6 +3323,7 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping), [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; -- cgit v1.2.3 From 14acf9652e5690de3c7486c6db5fb8dafd0a32a3 Mon Sep 17 00:00:00 2001 From: Michal Kosiorek Date: Wed, 29 Apr 2026 10:54:51 +0200 Subject: xfrm: defensively unhash xfrm_state lists in __xfrm_state_delete KASAN reproduces a slab-use-after-free in __xfrm_state_delete()'s hlist_del_rcu calls under syzkaller load on linux-6.12.y stable (reproduced on 6.12.47, also reachable via the same code path on torvalds/master and on the ipsec tree). Nine unique signatures cluster in the xfrm_state lifecycle, the load-bearing one being: BUG: KASAN: slab-use-after-free in __hlist_del include/linux/list.h:990 [inline] BUG: KASAN: slab-use-after-free in hlist_del_rcu include/linux/rculist.h:516 [inline] BUG: KASAN: slab-use-after-free in __xfrm_state_delete net/xfrm/xfrm_state.c Write of size 8 at addr ffff8881198bcb70 by task kworker/u8:9/435 Workqueue: netns cleanup_net Call Trace: __hlist_del / hlist_del_rcu __xfrm_state_delete xfrm_state_delete xfrm_state_flush xfrm_state_fini ops_exit_list cleanup_net The other observed signatures hit the same slab object from __xfrm_state_lookup, xfrm_alloc_spi, __xfrm_state_insert and an OOB write variant of __xfrm_state_delete, all on the byseq/byspi hash chains. __xfrm_state_delete() guards its byseq and byspi unhashes with value-based predicates: if (x->km.seq) hlist_del_rcu(&x->byseq); if (x->id.spi) hlist_del_rcu(&x->byspi); while everywhere else in the file (e.g. state_cache, state_cache_input) the safer hlist_unhashed() check is used. xfrm_alloc_spi() sets x->id.spi = newspi inside xfrm_state_lock and then immediately inserts into byspi, but a path that observes x->id.spi != 0 outside of xfrm_state_lock can still skip-or-hit the byspi unhash inconsistently with whether x is actually on the list. The same holds for x->km.seq versus byseq, and the bydst/bysrc unhashes have no predicate at all, so a second __xfrm_state_delete() on the same object writes through LIST_POISON pprev. The defensive change here: - Use hlist_del_init_rcu() instead of hlist_del_rcu() on bydst, bysrc, byseq and byspi so a second deletion is a no-op rather than a write through LIST_POISON pprev. The byseq/byspi nodes are already initialised in xfrm_state_alloc(). - Test hlist_unhashed() rather than the value predicate for byseq/byspi, so the unhash decision tracks list state rather than mutable scalar fields. Empirical verification: applied this patch on top of v6.12.47, rebuilt, and re-ran the same syzkaller harness for 1h16m on a previously-crashy configuration that produced ~100 hits each of slab-use-after-free Read in xfrm_alloc_spi / Read in __xfrm_state_lookup / Write in __xfrm_state_delete. After the patch, 7.1M execs across 32 VMs at ~1550 exec/sec produced zero xfrm_state UAF/OOB hits. /proc/slabinfo confirms the xfrm_state slab is actively allocated and freed during the run (~143 KiB resident), so the fuzzer is still exercising those code paths -- they just no longer crash. Reproduction: - Linux 6.12.47 x86_64 + KASAN_GENERIC + KASAN_INLINE + KCOV - syzkaller @ 746545b8b1e4c3a128db8652b340d3df90ce61db - 32 QEMU/KVM VMs x 2 vCPU on AWS c5.metal bare metal - 9 unique signatures collected in ~9h, all within xfrm_state lifecycle Fixes: fe9f1d8779cb ("xfrm: add state hashtable keyed by seq") Fixes: 7b4dc3600e48 ("[XFRM]: Do not add a state whose SPI is zero to the SPI hash.") Reported-by: Michal Kosiorek Tested-by: Michal Kosiorek Cc: stable@vger.kernel.org Signed-off-by: Michal Kosiorek Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1748d374abca..686014d39429 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -818,17 +818,17 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del_rcu(&x->bydst); - hlist_del_rcu(&x->bysrc); - if (x->km.seq) - hlist_del_rcu(&x->byseq); + hlist_del_init_rcu(&x->bydst); + hlist_del_init_rcu(&x->bysrc); + if (!hlist_unhashed(&x->byseq)) + hlist_del_init_rcu(&x->byseq); if (!hlist_unhashed(&x->state_cache)) hlist_del_rcu(&x->state_cache); if (!hlist_unhashed(&x->state_cache_input)) hlist_del_rcu(&x->state_cache_input); - if (x->id.spi) - hlist_del_rcu(&x->byspi); + if (!hlist_unhashed(&x->byspi)) + hlist_del_init_rcu(&x->byspi); net->xfrm.state_num--; xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); -- cgit v1.2.3 From a1fc7bf6677eb547167cb72b3bcafdc34b976692 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 22 Apr 2026 12:29:56 -0400 Subject: drm/amd/display: Restore 5s vbl offdelay for NV3x+ DGPUs [Why] Rapid vblank off is causing flip-done timeouts for NV3x and newer family of GPUs that support more idle optimization features. A proper fix requires further investigation. In lieu of it, let's workaround it for now. [How] For NV3x and newer family of DGPUs, restore the old 5s vblank off timer. Fixes: 9b47278cec98 ("drm/amd/display: temp w/a for dGPU to enter idle optimizations") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3787 Link: https://lore.kernel.org/amd-gfx/20260217191632.1243826-1-sysdadmin@m1k.cloud/ Tested-by: Michele Palazzi Reviewed-by: Mario Limonciello Signed-off-by: Leo Li Signed-off-by: Alex Deucher (cherry picked from commit df482c2d441b090161633566b7a0755f1bbd55c2) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f8f9953565f6..5fc5d5608506 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9408,9 +9408,21 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, if (acrtc_state) { timing = &acrtc_state->stream->timing; - if (amdgpu_ip_version(adev, DCE_HWIP, 0) < - IP_VERSION(3, 5, 0) || - !(adev->flags & AMD_IS_APU)) { + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= + IP_VERSION(3, 2, 0) && + !(adev->flags & AMD_IS_APU)) { + /* + * DGPUs NV3x and newer that support idle optimizations + * experience intermittent flip-done timeouts on cursor + * updates. Restore 5s offdelay behavior for now. + * + * Discussion on the issue: + * https://lore.kernel.org/amd-gfx/20260217191632.1243826-1-sysdadmin@m1k.cloud/ + */ + config.offdelay_ms = 5000; + config.disable_immediate = false; + } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) < + IP_VERSION(3, 5, 0)) { /* * Older HW and DGPU have issues with instant off; * use a 2 frame offdelay. -- cgit v1.2.3 From 494941aa772dab79251543764db6cd14bd337e43 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 28 Apr 2026 13:40:40 +0200 Subject: drm/amd/display: Allow embedded connectors without DDC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some laptops, the embedded panel may not have a DDC (display data channel) available. On these, the EDID may be hardcoded in ACPI or the VBIOS. In this case, use GPIO_DDC_LINE_UNKNOWN and don't fail. Fixes: def3488eb0fd ("drm/amd/display: refactor HPD to increase flexibility") Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/5192 Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 75b8a6ca0e8bc3ce24572f854e95f8721b321179) --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c | 3 +++ drivers/gpu/drm/amd/display/dc/link/link_factory.c | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7f55ba09b191..37714d4371fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1682,7 +1682,7 @@ struct dc_scratch_space { struct dc_link_training_overrides preferred_training_settings; struct dp_audio_test_data audio_test_data; - uint8_t ddc_hw_inst; + enum gpio_ddc_line ddc_hw_inst; uint8_t hpd_src; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index a2c46350e44e..95f8b7c7d657 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -646,6 +646,9 @@ failure: enum gpio_ddc_line dal_ddc_get_line( const struct ddc *ddc) { + if (!ddc) + return GPIO_DDC_LINE_UNKNOWN; + return (enum gpio_ddc_line)dal_gpio_get_enum(ddc->pin_data); } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 7e7682d7dfc8..ae4c4ad05baa 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -568,7 +568,9 @@ static bool construct_phy(struct dc_link *link, goto ddc_create_fail; } - if (!link->ddc->ddc_pin) { + /* Embedded display connectors such as LVDS may not have DDC. */ + if (!link->ddc->ddc_pin && + !dc_is_embedded_signal(link->connector_signal)) { DC_ERROR("Failed to get I2C info for connector!\n"); goto ddc_create_fail; } -- cgit v1.2.3 From ac27e3f99035f132f23bc0409d0e57f11f054c70 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 28 Apr 2026 13:40:41 +0200 Subject: drm/amd/display: Allow DCE link encoder without AUX registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow constructing the DCE link encoder without DDC, which means the AUX registers array will be NULL. This is necessary to support embedded connectors without DDC. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/5192 Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 87f30b101af62590faf6020d106da07efdda199b) --- drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 5f40ae9e3120..e15fd1454d3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -1102,7 +1102,9 @@ void dce110_link_encoder_hw_init( ASSERT(result == BP_RESULT_OK); } - aux_initialize(enc110); + + if (enc110->aux_regs) + aux_initialize(enc110); /* reinitialize HPD. * hpd_initialize() will pass DIG_FE id to HW context. -- cgit v1.2.3 From 880498a1943f865529819f778df3b9945ca57262 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 28 Apr 2026 13:40:42 +0200 Subject: drm/amd/display: Allow constructing DCE6 link encoder without DDC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the DDC channel ID is set to CHANNEL_ID_UNKNOWN, pass NULL to the AUX regs array. This is necessary to support embedded connectors without DDC. Fixes: 7c15fd86aaec ("drm/amd/display: dc/dce: add initial DCE6 support (v10)") Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/5192 Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 38a70e50b22a188ff601740d64dd75f46213121f) --- drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 6a25dcfcdf17..d2d56a1c4b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -753,7 +753,8 @@ static struct link_encoder *dce60_link_encoder_create( enc_init_data, &link_enc_feature, &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->channel == CHANNEL_ID_UNKNOWN ? + NULL : &link_enc_aux_regs[enc_init_data->channel - 1], enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; -- cgit v1.2.3 From 60af4605ef35ecb7ad649a8534b83a2f7c69576d Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 28 Apr 2026 13:40:43 +0200 Subject: drm/amd/display: Allow constructing DCE8 link encoder without DDC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the DDC channel ID is set to CHANNEL_ID_UNKNOWN, pass NULL to the AUX regs array. This is necessary to support embedded connectors without DDC. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/5192 Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 155baf3038c1af50b602723022ed869b38e86a99) --- drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 33be49b3c1b1..6c00497e9a01 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -760,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create( enc_init_data, &link_enc_feature, &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->channel == CHANNEL_ID_UNKNOWN ? + NULL : &link_enc_aux_regs[enc_init_data->channel - 1], enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; -- cgit v1.2.3 From 9ea16f64189bf7b6ba50fc7f0325b3c1f836d105 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 28 Apr 2026 13:40:44 +0200 Subject: drm/amd/display: Read EDID from VBIOS embedded panel info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some board manufacturers hardcode the EDID for the embedded panel in the VBIOS. This EDID should be used when the panel doesn't have a DDC. For reference, see the legacy non-DC display code: amdgpu_atombios_encoder_get_lcd_info() This is necessary to support embedded connectors without DDC. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/5192 Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit eb105e63b474c11ef6a84a1c6b18100d851ff364) --- drivers/gpu/drm/amd/display/dc/bios/bios_parser.c | 62 ++++++++++++++++++++++ .../amd/display/include/grph_object_ctrl_defs.h | 4 ++ 2 files changed, 66 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index e270b1d2457c..c307f42fe0b9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -1313,6 +1313,60 @@ static enum bp_result bios_parser_get_embedded_panel_info( return BP_RESULT_FAILURE; } +static enum bp_result get_embedded_panel_extra_info( + struct bios_parser *bp, + struct embedded_panel_info *info, + const uint32_t table_offset) +{ + uint8_t *record = bios_get_image(&bp->base, table_offset, 1); + ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record; + ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record; + + while (*record != ATOM_RECORD_END_TYPE) { + switch (*record) { + case LCD_MODE_PATCH_RECORD_MODE_TYPE: + record += sizeof(ATOM_PATCH_RECORD_MODE); + break; + case LCD_RTS_RECORD_TYPE: + record += sizeof(ATOM_LCD_RTS_RECORD); + break; + case LCD_CAP_RECORD_TYPE: + record += sizeof(ATOM_LCD_MODE_CONTROL_CAP); + break; + case LCD_FAKE_EDID_PATCH_RECORD_TYPE: + fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; + if (fake_edid_record->ucFakeEDIDLength) { + if (fake_edid_record->ucFakeEDIDLength == 128) + info->fake_edid_size = + fake_edid_record->ucFakeEDIDLength; + else + info->fake_edid_size = + fake_edid_record->ucFakeEDIDLength * 128; + + info->fake_edid = fake_edid_record->ucFakeEDIDString; + + record += struct_size(fake_edid_record, + ucFakeEDIDString, + info->fake_edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; + } + break; + case LCD_PANEL_RESOLUTION_RECORD_TYPE: + panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; + info->panel_width_mm = panel_res_record->usHSize; + info->panel_height_mm = panel_res_record->usVSize; + record += sizeof(ATOM_PANEL_RESOLUTION_PATCH_RECORD); + break; + default: + return BP_RESULT_BADBIOSTABLE; + } + } + + return BP_RESULT_OK; +} + static enum bp_result get_embedded_panel_info_v1_2( struct bios_parser *bp, struct embedded_panel_info *info) @@ -1429,6 +1483,10 @@ static enum bp_result get_embedded_panel_info_v1_2( if (ATOM_PANEL_MISC_API_ENABLED & lvds->ucLVDS_Misc) info->lcd_timing.misc_info.API_ENABLED = true; + if (lvds->usExtInfoTableOffset) + return get_embedded_panel_extra_info(bp, info, + le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info)); + return BP_RESULT_OK; } @@ -1554,6 +1612,10 @@ static enum bp_result get_embedded_panel_info_v1_3( (uint32_t) (ATOM_PANEL_MISC_V13_GREY_LEVEL & lvds->ucLCD_Misc) >> ATOM_PANEL_MISC_V13_GREY_LEVEL_SHIFT; + if (lvds->usExtInfoTableOffset) + return get_embedded_panel_extra_info(bp, info, + le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info)); + return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index 38a77fa9b4af..a0f03fb67605 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -153,6 +153,10 @@ struct embedded_panel_info { uint32_t drr_enabled; uint32_t min_drr_refresh_rate; bool realtek_eDPToLVDS; + uint16_t panel_width_mm; + uint16_t panel_height_mm; + uint16_t fake_edid_size; + const uint8_t *fake_edid; }; struct dc_firmware_info { -- cgit v1.2.3 From 019155e2bd3e2cec425553195e9f9bc76bb0f848 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 28 Apr 2026 13:40:45 +0200 Subject: drm/amd/display: Use EDID from VBIOS embedded panel info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an embedded panel has no DDC, read the EDID from the VBIOS embedded panel info and use that. Fixes: 7c7f5b15be65 ("drm/amd/display: Refactor edid read.") Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/5192 Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 399b9abc353c62f6e37d38325edbdb6c2c00411c) --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 3b8ae7798a93..a3cb05490dc9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1032,6 +1032,45 @@ dm_helpers_read_acpi_edid(struct amdgpu_dm_connector *aconnector) return drm_edid_read_custom(connector, dm_helpers_probe_acpi_edid, connector); } +static const struct drm_edid * +dm_helpers_read_vbios_hardcoded_edid(struct dc_link *link, struct amdgpu_dm_connector *aconnector) +{ + struct dc_bios *bios = link->ctx->dc_bios; + struct embedded_panel_info info; + const struct drm_edid *edid; + enum bp_result r; + + if (!dc_is_embedded_signal(link->connector_signal) || + !bios->funcs->get_embedded_panel_info) + return NULL; + + memset(&info, 0, sizeof(info)); + r = bios->funcs->get_embedded_panel_info(bios, &info); + + if (r != BP_RESULT_OK) { + dm_error("Error when reading embedded panel info: %u\n", r); + return NULL; + } + + if (!info.fake_edid || !info.fake_edid_size) { + dm_error("Embedded panel info doesn't contain an EDID\n"); + return NULL; + } + + edid = drm_edid_alloc(info.fake_edid, info.fake_edid_size); + + if (!drm_edid_valid(edid)) { + dm_error("EDID from embedded panel info is invalid\n"); + drm_edid_free(edid); + return NULL; + } + + aconnector->base.display_info.width_mm = info.panel_width_mm; + aconnector->base.display_info.height_mm = info.panel_height_mm; + + return edid; +} + void populate_hdmi_info_from_connector(struct drm_hdmi_info *hdmi, struct dc_edid_caps *edid_caps) { edid_caps->scdc_present = hdmi->scdc.supported; @@ -1052,6 +1091,9 @@ enum dc_edid_status dm_helpers_read_local_edid( if (link->aux_mode) ddc = &aconnector->dm_dp_aux.aux.ddc; + else if (link->ddc_hw_inst == GPIO_DDC_LINE_UNKNOWN && + dc_is_embedded_signal(link->connector_signal)) + ddc = NULL; else ddc = &aconnector->i2c->base; @@ -1065,6 +1107,8 @@ enum dc_edid_status dm_helpers_read_local_edid( drm_edid = dm_helpers_read_acpi_edid(aconnector); if (drm_edid) drm_info(connector->dev, "Using ACPI provided EDID for %s\n", connector->name); + else if (!ddc) + drm_edid = dm_helpers_read_vbios_hardcoded_edid(link, aconnector); else drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); -- cgit v1.2.3 From ab4ad35e58a74c0fc51e5b0bcfb56523e97ff65f Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Wed, 22 Apr 2026 12:49:00 +0200 Subject: smb: server: handle readdir_info_level_struct_sz() error early exit in smb2_populate_readdir_entry() if the requested info_level is unknown. Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 21825a69c29a..47b7af631f7b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3946,7 +3946,13 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level, goto free_conv_name; } - struct_sz = readdir_info_level_struct_sz(info_level) + conv_len; + struct_sz = readdir_info_level_struct_sz(info_level); + if (struct_sz == -EOPNOTSUPP) { + rc = -EINVAL; + goto free_conv_name; + } + + struct_sz += conv_len; next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT); d_info->last_entry_off_align = next_entry_offset - struct_sz; -- cgit v1.2.3 From c444139cb747bf6de1922b39900fdf02281490f4 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 25 Apr 2026 18:38:28 +0900 Subject: ksmbd: rewrite stop_sessions() with restartable iteration stop_sessions() walks conn_list with hash_for_each() and, for every entry, drops conn_list_lock across the transport ->shutdown() call before re-acquiring the read lock to continue the loop. The hash walk relies on cross-iteration state (the current bucket and the hlist position), which is not preserved across unlock/relock: if another thread performs a list mutation during the unlocked window, the ongoing iteration becomes unreliable and can re-visit connections that have already been handled or skip connections that have not. The outer `if (!hash_empty(conn_list)) goto again;` retry masks the symptom in the common case but does not address the unsafe iteration itself. Reframe the loop so it never relies on iterator state across unlock/relock. Under conn_list_lock held for read, pick the first connection whose ->shutdown() has not yet been issued by this path, pin it by taking an extra reference, record that fact on the connection and mark it EXITING while still inside the locked walk, then drop the lock. Then call ->shutdown() outside the lock, drop the pin (freeing the connection if the handler already released its reference), and restart from the top. Use a new per-connection flag, conn->stop_called, as the "shutdown issued from stop_sessions()" marker rather than reusing the status state. ksmbd_conn_set_exiting() is also invoked by ksmbd_sessions_deregister() on sibling channels of a multichannel session without issuing a transport shutdown, so treating KSMBD_SESS_EXITING as "already handled here" would skip connections that still need shutdown() to wake their handler out of recv(), leaving the outer retry waiting indefinitely for the hash to drain. stop_sessions() is serialised by init_lock in ksmbd_conn_transport_destroy(), so writing stop_called under the read lock has no other writer. Set EXITING inside the locked walk so the selection, the stop_called marker, and the status transition all happen together, and guard against regressing a connection that has already advanced to KSMBD_SESS_RELEASING on its own (for example, if the handler exited its receive loop for an unrelated reason between teardown steps). When the pin drop is the last put, release the transport and pair ida_destroy(&target->async_ida) with the ida_init() done in ksmbd_conn_alloc(), so stop_sessions() retiring a connection on its own does not leak the xarray backing of the embedded async_ida. The outer retry with msleep() is kept to wait for handler threads to reach ksmbd_conn_free() and drain the hash. Observed with an instrumented build that logs one line per visit and widens the unlocked window before ->shutdown() by 200 ms, under five concurrent cifs mounts (nosharesock, one connection each): * Current code: the same connection address is revisited many times during a single stop_sessions() call and ->shutdown() is invoked well beyond the number of live connections before the hash finally drains. * Rewritten code: each live connection produces exactly one ->shutdown() call; the function returns as soon as the hash is empty. Functional teardown via `ksmbd.control --shutdown` with the same five mounts completes cleanly on the rewritten path. Performance is observably unchanged. Tearing down N concurrent nosharesock cifs connections with `ksmbd.control --shutdown` + `rmmod ksmbd` takes essentially the same wall time before and after the rewrite: N before after 10 4.93s 5.34s 30 7.34s 7.03s 50 7.31s 7.01s (3-run avg: 7.04s vs 7.25s) 100 6.98s 6.78s 200 6.77s 6.89s and the number of ->shutdown() calls equals the number of live connections on both paths when the race is not widened. The teardown is dominated by the msleep(100)-based outer retry waiting for handler threads to run ksmbd_conn_free(), not by the iteration itself; the restartable loop's worst-case O(N^2) visit cost is in the microseconds even at N=200 and sits far below the msleep(100) granularity. Applied alone on top of ksmbd-for-next-next, this patch does not introduce a new leak site. Under the same reproducer (10x concurrent-holders + ss -K + ksmbd.control --shutdown + rmmod), the tree still shows the pre-existing per-connection transport leak count that arises when the last refcount drop lands in one of ksmbd_conn_r_count_dec(), __free_opinfo() or session_fd_check() - all of which end with a bare kfree() today. kmemleak backtraces for the unreferenced objects point into the TCP accept path (sk_clone -> inet_csk_clone_lock, sock_alloc_inode) and none involve stop_sessions(). Plugging those bare-kfree sites is the responsibility of the follow-up patch. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Signed-off-by: DaeMyung Kang Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 46 ++++++++++++++++++++++++++++++++++++++-------- fs/smb/server/connection.h | 1 + 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index fbbc0529743f..c5aac4946cbe 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -540,24 +540,54 @@ out: static void stop_sessions(void) { - struct ksmbd_conn *conn; + struct ksmbd_conn *conn, *target; struct ksmbd_transport *t; + bool any; int bkt; + /* + * Serialised via init_lock; no concurrent stop_sessions() can + * touch conn->stop_called, so writing it under the read lock is + * safe. + */ again: + target = NULL; + any = false; down_read(&conn_list_lock); hash_for_each(conn_list, bkt, conn, hlist) { - t = conn->transport; - ksmbd_conn_set_exiting(conn); - if (t->ops->shutdown) { - up_read(&conn_list_lock); + any = true; + if (conn->stop_called) + continue; + atomic_inc(&conn->refcnt); + conn->stop_called = true; + /* + * Mark the connection EXITING while still holding the + * read lock so the selection and the status transition + * happen together. Do not regress a connection that has + * already advanced to RELEASING on its own (e.g. the + * handler exited its receive loop for an unrelated + * reason). + */ + if (READ_ONCE(conn->status) != KSMBD_SESS_RELEASING) + ksmbd_conn_set_exiting(conn); + target = conn; + break; + } + up_read(&conn_list_lock); + + if (target) { + t = target->transport; + if (t->ops->shutdown) t->ops->shutdown(t); - down_read(&conn_list_lock); + if (atomic_dec_and_test(&target->refcnt)) { + ida_destroy(&target->async_ida); + t->ops->free_transport(t); + kfree(target); } + goto again; } - up_read(&conn_list_lock); - if (!hash_empty(conn_list)) { + if (any) { msleep(100); goto again; } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index ae21a1bd4c70..de2d46941c93 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -49,6 +49,7 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; + bool stop_called; union { __be32 inet_addr; #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.2.3 From a0fc362f095330f7b3f68ac0c55ef8da18290c87 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 26 Mar 2026 14:01:16 -0700 Subject: drm/xe: Drop registration of guc_submit_wedged_fini from xe_guc_submit_wedge() xe_guc_submit_wedge() runs in the DMA-fence signaling path, where GFP_KERNEL memory allocations are not permitted. However, registering guc_submit_wedged_fini via drmm_add_action_or_reset() triggers such an allocation. Avoid this by moving the logic from guc_submit_wedged_fini() into guc_submit_fini(), where wedged exec queue references are dropped during normal teardown. Fixes: 8ed9aaae39f3 ("drm/xe: Force wedged state and block GT reset upon any GPU hang") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260326210116.202585-3-matthew.brost@intel.com (cherry picked from commit 4a706bd93c4fb156a13477e26ffdf2e633edeb10) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a145234f662b..10556156eaad 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -259,24 +259,12 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg) } static void guc_submit_fini(void *arg) -{ - struct xe_guc *guc = arg; - - /* Forcefully kill any remaining exec queues */ - xe_guc_ct_stop(&guc->ct); - guc_submit_reset_prepare(guc); - xe_guc_softreset(guc); - xe_guc_submit_stop(guc); - xe_uc_fw_sanitize(&guc->fw); - xe_guc_submit_pause_abort(guc); -} - -static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; unsigned long index; + /* Drop any wedged queue refs */ mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { if (exec_queue_wedged(q)) { @@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg) } } mutex_unlock(&guc->submission_state.lock); + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc, void xe_guc_submit_wedge(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; - int err; xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); @@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc) return; if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } - mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) if (xe_exec_queue_get_unless_zero(q)) -- cgit v1.2.3 From 2bc0cce2724f74dde914d11fabb35b3a912c8329 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 31 Mar 2026 18:12:17 +0000 Subject: drm/xe/vm: Add missing pad and extensions check Add missing pad and extensions check to xe_vm_get_property_ioctl v2: - Combine with other check (Auld) Fixes: 50c577eab051 ("drm/xe/xe_vm: Implement xe_vm_get_property_ioctl") Suggested-by: Matthew Auld Signed-off-by: Jonathan Cavitt Reviewed-by: Matthew Auld Reviewed-by: Matthew Brost Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20260331181216.37775-2-jonathan.cavitt@intel.com (cherry picked from commit 896070686b16cc45cca7854be2049923b2b303d3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..1720205c09ca 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -4156,7 +4156,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, int ret = 0; if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || - args->reserved[2]))) + args->reserved[2] || args->extensions || + args->pad))) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); -- cgit v1.2.3 From 9d7ca81b3019905c36c8cae9c306827325ba5878 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 1 Apr 2026 13:12:44 -0700 Subject: drm/xe: Drop redundant rtp entries for Wa_14019988906 & Wa_14019877138 There appears to have been a silent merge conflict between some commits updating the workaround tables on Xe's -fixes and -next branches: - Commit bc6387a2e0c1 ("drm/xe/xe2_hpg: Fix handling of Wa_14019988906 & Wa_14019877138") from the fixes branch moved the Xe2_HPG instance of two workarounds touching the PSS_CHICKEN register from the engine_was[] table to the lrc_was[] table; the equivalent implementation for all other platforms/IPs were already properly located on lrc_was[]. This commit on the fixes branch is a cherry-pick of commit e04c609eedf4 ("drm/xe/xe2_hpg: Fix handling of Wa_14019988906 & Wa_14019877138") that already existed on the next branch. - Commit 55b19abb6c44 ("drm/xe: Consolidate workaround entries for Wa_14019877138") and commit c2142a1a8415 ("drm/xe: Consolidate workaround entries for Wa_14019988906") consolidated the individual entries per IP generation for each workaround into single, larger range-based entries. During merge conflict resolution the Xe2_HPG-specific entries (i.e., those with rule "GRAPHICS_VERSION_RANGE(2001, 2002)") were accidentally resurrected, even though the table already contains the consolidated entries that match a superset of thse ranges. These redundant entries don't cause any build failures but do trigger a dmesg error during probe on BMG-G21 devices: xe 0000:03:00.0: [drm] *ERROR* Tile0: GT0: discarding save-restore reg 7044 (clear: 00000400, set: 00000400, masked: yes, mcr: yes): ret=-22 xe 0000:03:00.0: [drm] *ERROR* Tile0: GT0: discarding save-restore reg 7044 (clear: 00000020, set: 00000020, masked: yes, mcr: yes): ret=-22 Re-drop the Xe2_HPG-specific table entries to eliminate the error. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/work_items/7433 Fixes: 17b95278ae6a ("Merge tag 'drm-xe-next-2026-03-02' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next") Cc: Dave Airlie Signed-off-by: Matt Roper Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260401-wa_merge_conflict-v1-1-b477ab53fedc@intel.com Signed-off-by: Maarten Lankhorst (cherry picked from commit c79bc999442ff3c0908ab8bce92b2a3cb7d59861) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_wa.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 546296f0220b..4b1cbced06be 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, -- cgit v1.2.3 From 68fdf2c943bbba75d4f3a5c5546bc764f5886c13 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Wed, 1 Apr 2026 19:10:51 -0300 Subject: drm/xe/xe3p_lpg: Add missing indirect ring state feature flag Even though commit 8fcb7dfb8bbf ("drm/xe/xe3p_lpg: Add support for graphics IP 35.10") mentions that the support for Indirect Ring State exists for Xe3p_LPG, it missed actually setting the feature flag in graphics_xe3p_lpg. Fix that by adding the missing member. Fixes: 8fcb7dfb8bbf ("drm/xe/xe3p_lpg: Add support for graphics IP 35.10") Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260401-xe3p_lpg-indirect-ring-state-v1-1-0e4b5edf6898@intel.com Signed-off-by: Gustavo Sousa (cherry picked from commit ec4f4970eb744fd7d6d135f40f5c83bd05982e72) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 01673d2b2464..9f98d0334164 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = { static const struct xe_graphics_desc graphics_xe3p_lpg = { XE2_GFX_FEATURES, + .has_indirect_ring_state = 1, .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), .num_geometry_xecore_fuse_regs = 3, .num_compute_xecore_fuse_regs = 3, -- cgit v1.2.3 From 2299d73562e68e85e358289438924572b01cfe19 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Apr 2026 15:50:29 -0700 Subject: drm/xe/tuning: Use proper register offset for GAMSTLB_CTRL From Xe2 onward (i.e., all platforms officially supported by the Xe driver), the GAMSTLB_CTRL register is located at offset 0x477C and represented by the macro "GAMSTLB_CTRL" in code. However the register formerly resided at offset 0xCF4C on Xe1-era platforms, and we also have macro XEHP_GAMSTLB_CTRL that represents this old offset in the unofficial/developer-only Xe1 code. When tuning for the register was added for Xe3p_LPG, the old Xe1-era macro was accidentally used instead of the proper macro for Xe2 and beyond, causing the tuning to not be applied properly. Use the proper definition so that the correct offset is written to. Bspec: 59298 Fixes: 377c89bfaa5d ("drm/xe/xe3p_lpg: Set STLB bank hash mode to 4KB") Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20260410-xe3p_tuning-v1-2-e206a62ee38f@intel.com Signed-off-by: Matt Roper (cherry picked from commit 0b1676eafdd1ba5a5436bdca0d2a25ce56699783) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_tuning.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index f8de6a4bf189..0b78ec2bc6a4 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), IS_INTEGRATED), - XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, + XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE, BANK_HASH_4KB_MODE)) }, }; -- cgit v1.2.3 From 9407936237c98104873550219efedc286f28bbe9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 10 Apr 2026 15:50:30 -0700 Subject: drm/xe: Mark ROW_CHICKEN5 as a masked register ROW_CHICKEN5 is a masked register (i.e., to adjust the value of any of the lower 16 bits, the corresponding bit in the upper 16 bits must also be set). Add the XE_REG_OPTION_MASKED to its definition; failure to do so will cause workaround updates of this register to not apply properly. Bspec: 56853 Fixes: 835cd6cbb0d0 ("drm/xe/xe3p_lpg: Add initial workarounds for graphics version 35.10") Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20260410-xe3p_tuning-v1-3-e206a62ee38f@intel.com Signed-off-by: Matt Roper (cherry picked from commit cd84bfbba7feb4c1e72356f14de026dfda1a9e2a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4ebaa0888a43..9c88ca3ce768 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -583,7 +583,7 @@ #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) #define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) -#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) +#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED) #define CPSS_AWARE_DIS REG_BIT(3) #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) -- cgit v1.2.3 From 03f2499c51dffce611b065b2894406beb9f2ebe0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 8 Apr 2026 15:27:44 -0700 Subject: drm/xe/debugfs: Correct printing of register whitelist ranges The register-save-restore debugfs prints whitelist entries as offset ranges. E.g., REG[0x39319c-0x39319f]: allow read access for a single dword-sized register. However the GENMASK value used to set the lower bits to '1' for the upper bound of the whitelist range incorrectly included one more bit than it should have, causing the whitelist ranges to sometimes appear twice as large as they really were. For example, REG[0x6210-0x6217]: allow rw access was also intended to be a single dword-sized register whitelist (with a range 0x6210-0x6213) but was printed incorrectly as a qword-sized range because one too many bits was flipped on. Similar 'off by one' logic was applied when printing 4-dword register ranges and 64-dword register ranges as well. Correct the GENMASK logic to print these ranges in debugfs correctly. No impact outside of correcting the misleading debugfs output. Fixes: d855d2246ea6 ("drm/xe: Print whitelist while applying") Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260408-regsr_wl_range-v1-1-e9a28c8b4264@intel.com Signed-off-by: Matt Roper (cherry picked from commit 1a2a722ff96749734a5585dfe7f0bea7719caa8b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 80577e4b7437..8cc313182968 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, } range_start = reg & REG_GENMASK(25, range_bit); - range_end = range_start | REG_GENMASK(range_bit, 0); + range_end = range_start | REG_GENMASK(range_bit - 1, 0); switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { case RING_FORCE_TO_NONPRIV_ACCESS_RW: -- cgit v1.2.3 From 0a5e695095c557d2380131b613dea4e8d90371be Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:25 +0100 Subject: firmware: arm_ffa: Check for NULL FF-A ID table while driver registration The bus match callback assumes that every FF-A driver provides an id_table and dereferences it unconditionally. Enforce that contract at registration time so a buggy client driver cannot crash the bus during match. Fixes: 92743071464f ("firmware: arm_ffa: Ensure drivers provide a probe function") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-1-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c index 9576862d89c4..601c3418e0d9 100644 --- a/drivers/firmware/arm_ffa/bus.c +++ b/drivers/firmware/arm_ffa/bus.c @@ -26,6 +26,8 @@ static int ffa_device_match(struct device *dev, const struct device_driver *drv) id_table = to_ffa_driver(drv)->id_table; ffa_dev = to_ffa_dev(dev); + if (!id_table) + return 0; while (!uuid_is_null(&id_table->uuid)) { /* @@ -123,7 +125,7 @@ int ffa_driver_register(struct ffa_driver *driver, struct module *owner, { int ret; - if (!driver->probe) + if (!driver->probe || !driver->id_table) return -EINVAL; driver->driver.bus = &ffa_bus_type; -- cgit v1.2.3 From 09527e2c534911619d7e098729711100290bc3e1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:26 +0100 Subject: firmware: arm_ffa: Skip free_pages on RX buffer alloc failure If the RX buffer allocation fails in ffa_init(), the error path jumps to free_pages even though no buffer has been allocated yet. Route that case directly to free_drv_info so the cleanup path is only used after at least one RX/TX buffer allocation has succeeded. Fixes: 3bbfe9871005 ("firmware: arm_ffa: Add initial Arm FFA driver support") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-2-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index eb2782848283..e6a051b20cb7 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -2067,7 +2067,7 @@ static int __init ffa_init(void) drv_info->rx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL); if (!drv_info->rx_buffer) { ret = -ENOMEM; - goto free_pages; + goto free_drv_info; } drv_info->tx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL); -- cgit v1.2.3 From 9b5597af8bc51c25342ab11896532644b181d302 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:27 +0100 Subject: firmware: arm_ffa: Avoid collapsing NPI work from different CPUs Notification pending interrupts are registered as per-CPU IRQs, but the driver queues all NPI handling through a single shared work_struct. That allows queue_work_on() calls from different CPUs to collapse onto a single pending work item even though the work function uses the CPU it runs on to fetch and handle per-CPU notifications. Move notif_pcpu_work into the per-CPU ffa_pcpu_irq state and initialize one work item per CPU. This keeps NPI handling independent per CPU and avoids losing notifications when multiple CPUs queue work concurrently. Link: https://patch.msgid.link/20260428-ffa_fixes-v2-3-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index e6a051b20cb7..4e66c7325a4e 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -87,6 +87,7 @@ static inline int ffa_to_linux_errno(int errno) struct ffa_pcpu_irq { struct ffa_drv_info *info; + struct work_struct notif_pcpu_work; }; struct ffa_drv_info { @@ -106,7 +107,6 @@ struct ffa_drv_info { unsigned int cpuhp_state; struct ffa_pcpu_irq __percpu *irq_pcpu; struct workqueue_struct *notif_pcpu_wq; - struct work_struct notif_pcpu_work; struct work_struct sched_recv_irq_work; struct xarray partition_info; DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); @@ -1539,8 +1539,9 @@ ffa_self_notif_handle(u16 vcpu, bool is_per_vcpu, void *cb_data) static void notif_pcpu_irq_work_fn(struct work_struct *work) { - struct ffa_drv_info *info = container_of(work, struct ffa_drv_info, + struct ffa_pcpu_irq *pcpu = container_of(work, struct ffa_pcpu_irq, notif_pcpu_work); + struct ffa_drv_info *info = pcpu->info; ffa_self_notif_handle(smp_processor_id(), true, info); } @@ -1811,7 +1812,7 @@ static irqreturn_t notif_pend_irq_handler(int irq, void *irq_data) struct ffa_drv_info *info = pcpu->info; queue_work_on(smp_processor_id(), info->notif_pcpu_wq, - &info->notif_pcpu_work); + &pcpu->notif_pcpu_work); return IRQ_HANDLED; } @@ -1928,8 +1929,11 @@ static int ffa_init_pcpu_irq(void) if (!irq_pcpu) return -ENOMEM; - for_each_present_cpu(cpu) + for_each_present_cpu(cpu) { per_cpu_ptr(irq_pcpu, cpu)->info = drv_info; + INIT_WORK(&per_cpu_ptr(irq_pcpu, cpu)->notif_pcpu_work, + notif_pcpu_irq_work_fn); + } drv_info->irq_pcpu = irq_pcpu; @@ -1958,7 +1962,6 @@ static int ffa_init_pcpu_irq(void) } INIT_WORK(&drv_info->sched_recv_irq_work, ffa_sched_recv_irq_work_fn); - INIT_WORK(&drv_info->notif_pcpu_work, notif_pcpu_irq_work_fn); drv_info->notif_pcpu_wq = create_workqueue("ffa_pcpu_irq_notification"); if (!drv_info->notif_pcpu_wq) return -EINVAL; -- cgit v1.2.3 From 9985d5357ed93af0d1933969c247e966957730e1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:28 +0100 Subject: firmware: arm_ffa: Fix per-vcpu self notifications handling in workqueue Per-vcpu notification handling already runs from a per-cpu work item on the target cpu. Routing that path back through smp_call_function_single() re-enters the call-function IPI path and executes the notification handler with interrupts disabled. That makes the framework path unsafe, since it takes a mutex, allocates memory with GFP_KERNEL, and invokes client callbacks. Handle per-vcpu self notifications directly from the existing per-cpu work item instead. This keeps the per-vcpu path in task context and avoids the extra IPI hop entirely. Fixes: 3a3e2b83e805 ("firmware: arm_ffa: Avoid queuing work when running on the worker queue") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-4-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 4e66c7325a4e..2241e851f7ae 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1543,7 +1543,7 @@ static void notif_pcpu_irq_work_fn(struct work_struct *work) notif_pcpu_work); struct ffa_drv_info *info = pcpu->info; - ffa_self_notif_handle(smp_processor_id(), true, info); + notif_get_and_handle(info); } static const struct ffa_info_ops ffa_drv_info_ops = { -- cgit v1.2.3 From 6d3daa9b8d313f42d52e75590310f26a29b61b44 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:29 +0100 Subject: firmware: arm_ffa: Unregister bus notifier on teardown for FF-A v1.0 For FF-A v1.0 the driver registers a bus notifier to backfill UUID matching, but the notifier was never unregistered on cleanup paths. Track the registration state and unregister it during teardown and early partition-setup failure. Fixes: 9dd15934f60d ("firmware: arm_ffa: Move the FF-A v1.0 NULL UUID workaround to bus notifier") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-5-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 2241e851f7ae..a122814eb6d7 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -101,6 +101,7 @@ struct ffa_drv_info { bool mem_ops_native; bool msg_direct_req2_supp; bool bitmap_created; + bool bus_notifier_registered; bool notif_enabled; unsigned int sched_recv_irq; unsigned int notif_pend_irq; @@ -1630,6 +1631,15 @@ static struct notifier_block ffa_bus_nb = { .notifier_call = ffa_bus_notifier, }; +static void ffa_bus_notifier_unregister(void) +{ + if (!drv_info->bus_notifier_registered) + return; + + bus_unregister_notifier(&ffa_bus_type, &ffa_bus_nb); + drv_info->bus_notifier_registered = false; +} + static int ffa_xa_add_partition_info(struct ffa_device *dev) { struct ffa_dev_part_info *info; @@ -1713,6 +1723,8 @@ static void ffa_partitions_cleanup(void) struct list_head *phead; unsigned long idx; + ffa_bus_notifier_unregister(); + /* Clean up/free all registered devices */ ffa_devices_unregister(); @@ -1740,11 +1752,14 @@ static int ffa_setup_partitions(void) ret = bus_register_notifier(&ffa_bus_type, &ffa_bus_nb); if (ret) pr_err("Failed to register FF-A bus notifiers\n"); + else + drv_info->bus_notifier_registered = true; } count = ffa_partition_probe(&uuid_null, &pbuf); if (count <= 0) { pr_info("%s: No partitions found, error %d\n", __func__, count); + ffa_bus_notifier_unregister(); return -EINVAL; } -- cgit v1.2.3 From 36c6bac158816ede655f298a3f76e5a350eaa90e Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Wed, 8 Apr 2026 11:01:47 +0000 Subject: drm/xe: Add memory pool with shadow support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a memory pool to allocate sub-ranges from a BO-backed pool using drm_mm. Signed-off-by: Satyanarayana K V P Cc: Matthew Brost Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Michal Wajdeczko Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260408110145.1639937-5-satyanarayana.k.v.p@intel.com (cherry picked from commit 1ce3229f8f269a245ff3b8c65ffae36b4d6afb93) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_mem_pool.c | 403 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_mem_pool.h | 35 +++ drivers/gpu/drm/xe/xe_mem_pool_types.h | 21 ++ 4 files changed, 460 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_mem_pool.c create mode 100644 drivers/gpu/drm/xe/xe_mem_pool.h create mode 100644 drivers/gpu/drm/xe/xe_mem_pool_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 49de1c22a469..03242e8b3d87 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -88,6 +88,7 @@ xe-y += xe_bb.o \ xe_irq.o \ xe_late_bind_fw.o \ xe_lrc.o \ + xe_mem_pool.o \ xe_migrate.o \ xe_mmio.o \ xe_mmio_gem.o \ diff --git a/drivers/gpu/drm/xe/xe_mem_pool.c b/drivers/gpu/drm/xe/xe_mem_pool.c new file mode 100644 index 000000000000..d5e24d6aa88d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include + +#include + +#include "instructions/xe_mi_commands.h" +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_map.h" +#include "xe_mem_pool.h" +#include "xe_mem_pool_types.h" +#include "xe_tile_printk.h" + +/** + * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an + * XE tile. + * + * The XE memory pool is a DRM MM manager that provides sub-allocation of memory + * from a backing buffer object (BO) on a specific XE tile. It is designed to + * manage memory for GPU workloads, allowing for efficient allocation and + * deallocation of memory regions within the BO. + * + * The memory pool maintains a primary BO that is pinned in the GGTT and mapped + * into the CPU address space for direct access. Optionally, it can also maintain + * a shadow BO that can be used for atomic updates to the primary BO's contents. + * + * The API provided by the memory pool allows clients to allocate and free memory + * regions, retrieve GPU and CPU addresses, and synchronize data between the + * primary and shadow BOs as needed. + */ +struct xe_mem_pool { + /** @base: Range allocator over [0, @size) in bytes */ + struct drm_mm base; + /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */ + struct xe_bo *bo; + /** @shadow: Shadow BO for atomic command updates. */ + struct xe_bo *shadow; + /** @swap_guard: Timeline guard updating @bo and @shadow */ + struct mutex swap_guard; + /** @cpu_addr: CPU virtual address of the active BO. */ + void *cpu_addr; + /** @is_iomem: Indicates if the BO mapping is I/O memory. */ + bool is_iomem; +}; + +static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node) +{ + return container_of(node->sa_node.mm, struct xe_mem_pool, base); +} + +static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool) +{ + return pool->bo->tile; +} + +static void fini_pool_action(struct drm_device *drm, void *arg) +{ + struct xe_mem_pool *pool = arg; + + if (pool->is_iomem) + kvfree(pool->cpu_addr); + + drm_mm_takedown(&pool->base); +} + +static int pool_shadow_init(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool->bo->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_bo *shadow; + int ret; + + xe_assert(xe, !pool->shadow); + + ret = drmm_mutex_init(&xe->drm, &pool->swap_guard); + if (ret) + return ret; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&pool->swap_guard); + fs_reclaim_release(GFP_KERNEL); + } + shadow = xe_managed_bo_create_pin_map(xe, tile, + xe_bo_size(pool->bo), + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(shadow)) + return PTR_ERR(shadow); + + pool->shadow = shadow; + + return 0; +} + +/** + * xe_mem_pool_init() - Initialize memory pool. + * @tile: the &xe_tile where allocate. + * @size: number of bytes to allocate. + * @guard: the size of the guard region at the end of the BO that is not + * sub-allocated, in bytes. + * @flags: flags to use to create shadow pool. + * + * Initializes a memory pool for sub-allocating memory from a backing BO on the + * specified XE tile. The backing BO is pinned in the GGTT and mapped into + * the CPU address space for direct access. Optionally, a shadow BO can also be + * initialized for atomic updates to the primary BO's contents. + * + * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure. + */ +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_mem_pool *pool; + struct xe_bo *bo; + u32 managed_size; + int ret; + + xe_tile_assert(tile, size > guard); + managed_size = size - guard; + + pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(bo)) { + xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n", + size / SZ_1K, bo); + return ERR_CAST(bo); + } + pool->bo = bo; + pool->is_iomem = bo->vmap.is_iomem; + + if (pool->is_iomem) { + pool->cpu_addr = kvzalloc(size, GFP_KERNEL); + if (!pool->cpu_addr) + return ERR_PTR(-ENOMEM); + } else { + pool->cpu_addr = bo->vmap.vaddr; + } + + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) { + ret = pool_shadow_init(pool); + + if (ret) + goto out_err; + } + + drm_mm_init(&pool->base, 0, managed_size); + ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool); + if (ret) + return ERR_PTR(ret); + + return pool; + +out_err: + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) + xe_tile_err(tile, + "Failed to initialize shadow BO for mem pool (%d)\n", ret); + if (bo->vmap.is_iomem) + kvfree(pool->cpu_addr); + return ERR_PTR(ret); +} + +/** + * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Copies the entire contents of the primary pool to the shadow pool. This must + * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY + * flag to ensure that the shadow pool has the same initial contents as the primary + * pool. After this initial synchronization, clients can choose to synchronize the + * shadow pool with the primary pool on a node basis using + * xe_mem_pool_sync_shadow_locked() as needed. + * + * Return: None. + */ +void xe_mem_pool_sync(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + + xe_tile_assert(tile, pool->shadow); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, 0, + pool->cpu_addr, xe_bo_size(pool->bo)); +} + +/** + * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Swaps the primary buffer object with the shadow buffer object in the mem + * pool. This allows for atomic updates to the contents of the primary BO + * by first writing to the shadow BO and then swapping it with the primary BO. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + swap(pool->bo, pool->shadow); + if (!pool->bo->vmap.is_iomem) + pool->cpu_addr = pool->bo->vmap.vaddr; +} + +/** + * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool. + * @node: the node allocated in the memory pool. + * + * Copies the specified batch buffer from the primary pool to the shadow pool. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, + sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool. + * @pool: the memory pool + * + * Returns: GGTT address of the memory pool. + */ +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool) +{ + return xe_bo_ggtt_addr(pool->bo); +} + +/** + * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool. + * @pool: the memory pool + * + * Returns: CPU virtual address of memory pool. + */ +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool) +{ + return pool->cpu_addr; +} + +/** + * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap + * operations on a memory pool. + * @pool: the memory pool + * + * Returns: Swap guard mutex or NULL if shadow pool is not created. + */ +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool) +{ + if (!pool->shadow) + return NULL; + + return &pool->swap_guard; +} + +/** + * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation + * to the GPU memory. + * @node: the node allocated in the memory pool to flush. + */ +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the + * sub-allocation. + * @node: the node allocated in the memory pool to read back. + */ +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start, + &pool->bo->vmap, sa_node->start, sa_node->size); +} + +/** + * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool. + * + * Returns: node structure or an ERR_PTR(-ENOMEM). + */ +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void) +{ + struct xe_mem_pool_node *node = kzalloc_obj(*node); + + if (!node) + return ERR_PTR(-ENOMEM); + + return node; +} + +/** + * xe_mem_pool_insert_node() - Insert a node into the memory pool. + * @pool: the memory pool to insert into + * @node: the node to insert + * @size: the size of the node to be allocated in bytes. + * + * Inserts a node into the specified memory pool using drm_mm for + * allocation. + * + * Returns: 0 on success or a negative error code on failure. + */ +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size) +{ + if (!pool) + return -EINVAL; + + return drm_mm_insert_node(&pool->base, &node->sa_node, size); +} + +/** + * xe_mem_pool_free_node() - Free a node allocated from the memory pool. + * @node: the node to free + * + * Returns: None. + */ +void xe_mem_pool_free_node(struct xe_mem_pool_node *node) +{ + if (!node) + return; + + drm_mm_remove_node(&node->sa_node); + kfree(node); +} + +/** + * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node. + * @node: the node allocated in the memory pool + * + * Returns: CPU virtual address of the node. + */ +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + + return xe_mem_pool_cpu_addr(pool) + node->sa_node.start; +} + +/** + * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging. + * @pool: the memory pool info be dumped. + * @p: The DRM printer to use for output. + * + * Only the drm managed region is dumped, not the state of the BOs or any other + * pool information. + * + * Returns: None. + */ +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p) +{ + drm_mm_print(&pool->base, p); +} diff --git a/drivers/gpu/drm/xe/xe_mem_pool.h b/drivers/gpu/drm/xe/xe_mem_pool.h new file mode 100644 index 000000000000..89cd2555fe91 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ +#ifndef _XE_MEM_POOL_H_ +#define _XE_MEM_POOL_H_ + +#include +#include + +#include +#include "xe_mem_pool_types.h" + +struct drm_printer; +struct xe_mem_pool; +struct xe_tile; + +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags); +void xe_mem_pool_sync(struct xe_mem_pool *pool); +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool); +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node); +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool); +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool); +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool); +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node); +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node); +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void); +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size); +void xe_mem_pool_free_node(struct xe_mem_pool_node *node); +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node); +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mem_pool_types.h b/drivers/gpu/drm/xe/xe_mem_pool_types.h new file mode 100644 index 000000000000..d5e926c93351 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_MEM_POOL_TYPES_H_ +#define _XE_MEM_POOL_TYPES_H_ + +#include + +#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0) + +/** + * struct xe_mem_pool_node - Sub-range allocations from mem pool. + */ +struct xe_mem_pool_node { + /** @sa_node: drm_mm_node for this allocation. */ + struct drm_mm_node sa_node; +}; + +#endif -- cgit v1.2.3 From 1460eae74fbbb27d5c5b159dba021e41c6ace4c1 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Wed, 8 Apr 2026 11:01:48 +0000 Subject: drm/xe/vf: Use drm mm instead of drm sa for CCS read/write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The suballocator algorithm tracks a hole cursor at the last allocation and tries to allocate after it. This is optimized for fence-ordered progress, where older allocations are expected to become reusable first. In fence-enabled mode, that ordering assumption holds. In fence-disabled mode, allocations may be freed in arbitrary order, so limiting allocation to the current hole window can miss valid free space and fail allocations despite sufficient total space. Use DRM memory manager instead of sub-allocator to get rid of this issue as CCS read/write operations do not use fences. Fixes: 864690cf4dd6 ("drm/xe/vf: Attach and detach CCS copy commands with BO") Signed-off-by: Satyanarayana K V P Cc: Matthew Brost Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Michal Wajdeczko Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260408110145.1639937-6-satyanarayana.k.v.p@intel.com (cherry picked from commit 6c84b493012aeb05dec29c709377bf0e17ac6815) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo_types.h | 3 +- drivers/gpu/drm/xe/xe_migrate.c | 56 +++++++++++++++++------------- drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 54 +++++++++++++++------------- drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h | 5 +-- 4 files changed, 63 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff8317bfc1ae..9d19940b8fc0 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -18,6 +18,7 @@ #include "xe_ggtt_types.h" struct xe_device; +struct xe_mem_pool_node; struct xe_vm; #define XE_BO_MAX_PLACEMENTS 3 @@ -88,7 +89,7 @@ struct xe_bo { bool ccs_cleared; /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ - struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; /** * @cpu_caching: CPU caching mode. Currently only used for userspace diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fc918b4fba54..5fdc89ed5256 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -29,6 +29,7 @@ #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" +#include "xe_mem_pool.h" #include "xe_mocs.h" #include "xe_printk.h" #include "xe_pt.h" @@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, u32 batch_size, batch_size_allocated; struct xe_device *xe = gt_to_xe(gt); struct xe_res_cursor src_it, ccs_it; + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u64 size = xe_bo_size(src_bo); - struct xe_bb *bb = NULL; + struct xe_mem_pool_node *bb; u64 src_L0, src_L0_ofs; + struct xe_bb xe_bb_tmp; u32 src_L0_pt; int err; @@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } - bb = xe_bb_alloc(gt); + bb = xe_mem_pool_alloc_node(); if (IS_ERR(bb)) return PTR_ERR(bb); bb_pool = ctx->mem.ccs_bb_pool; - scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { - xe_sa_bo_swap_shadow(bb_pool); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - err = xe_bb_init(bb, bb_pool, batch_size); + err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32)); if (err) { xe_gt_err(gt, "BB allocation failed.\n"); - xe_bb_free(bb, NULL); + kfree(bb); return err; } @@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size = xe_bo_size(src_bo); batch_size = 0; + xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 }; /* * Emit PTE and copy commands here. * The CCS copy command can only support limited size. If the size to be @@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); batch_size += EMIT_COPY_CCS_DW; - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src); - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); + flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt, src_L0_ofs, dst_is_pltt, src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); size -= src_L0; } - xe_assert(xe, (batch_size_allocated == bb->len)); + xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len)); + xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32)); src_bo->bb_ccs[read_write] = bb; xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_sa_bo_sync_shadow(bb->bo); + xe_mem_pool_sync_shadow_locked(bb); } return 0; @@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, enum xe_sriov_vf_ccs_rw_ctxs read_write) { - struct xe_bb *bb = src_bo->bb_ccs[read_write]; + struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write]; struct xe_device *xe = xe_bo_device(src_bo); + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u32 *cs; xe_assert(xe, IS_SRIOV_VF(xe)); @@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, ctx = &xe->sriov.vf.ccs.contexts[read_write]; bb_pool = ctx->mem.ccs_bb_pool; - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); - xe_sa_bo_swap_shadow(bb_pool); - - cs = xe_sa_bo_cpu_addr(bb->bo); - memset(cs, MI_NOOP, bb->len * sizeof(u32)); - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - xe_sa_bo_sync_shadow(bb->bo); + cs = xe_mem_pool_node_cpu_addr(bb); + memset(cs, MI_NOOP, bb->sa_node.size); + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_bb_free(bb, NULL); - src_bo->bb_ccs[read_write] = NULL; + xe_mem_pool_sync_shadow_locked(bb); + xe_mem_pool_free_node(bb); + src_bo->bb_ccs[read_write] = NULL; + } } /** diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index db023fb66a27..09b99fb2608b 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -14,9 +14,9 @@ #include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_lrc.h" +#include "xe_mem_pool.h" #include "xe_migrate.h" #include "xe_pm.h" -#include "xe_sa.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" @@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe) static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) { + struct xe_mem_pool *pool; struct xe_device *xe = tile_to_xe(tile); - struct xe_sa_manager *sa_manager; + u32 *pool_cpu_addr, *last_dw_addr; u64 bb_pool_size; - int offset, err; + int err; bb_pool_size = get_ccs_bb_pool_size(xe); xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); - sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, - XE_SA_BO_MANAGER_FLAG_SHADOW); - - if (IS_ERR(sa_manager)) { - xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", - sa_manager); - err = PTR_ERR(sa_manager); + pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32), + XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY); + if (IS_ERR(pool)) { + xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n", + pool); + err = PTR_ERR(pool); return err; } - offset = 0; - xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, - bb_pool_size); - xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP, - bb_pool_size); + pool_cpu_addr = xe_mem_pool_cpu_addr(pool); + memset(pool_cpu_addr, 0, bb_pool_size); - offset = bb_pool_size - sizeof(u32); - xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); - xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END); + last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1; + *last_dw_addr = MI_BATCH_BUFFER_END; - ctx->mem.ccs_bb_pool = sa_manager; + /** + * Sync the main copy and shadow copy so that the shadow copy is + * replica of main copy. We sync only BBs after init part. So, we + * need to make sure the main pool and shadow copy are in sync after + * this point. This is needed as GuC may read the BB commands from + * shadow copy. + */ + xe_mem_pool_sync(pool); + ctx->mem.ccs_bb_pool = pool; return 0; } static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); u32 dw[10], i = 0; @@ -388,7 +392,7 @@ err_ret: #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); @@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_mem_pool_node *bb; struct xe_tile *tile; - struct xe_bb *bb; int err = 0; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_bb *bb; + struct xe_mem_pool_node *bb; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) */ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) { - struct xe_sa_manager *bb_pool; enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_mem_pool *bb_pool; if (!IS_VF_CCS_READY(xe)) return; @@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); drm_printf(p, "-------------------------\n"); - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + xe_mem_pool_dump(bb_pool, p); drm_puts(p, "\n"); } } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h index 22c499943d2a..6fc8f97ef3f4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs { XE_SRIOV_VF_CCS_CTX_COUNT }; -struct xe_migrate; -struct xe_sa_manager; - /** * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. */ @@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx { /** @mem: memory data */ struct { /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ - struct xe_sa_manager *ccs_bb_pool; + struct xe_mem_pool *ccs_bb_pool; } mem; }; -- cgit v1.2.3 From f8c4151d50b12923b67819ebf03c1c6782c984c1 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 9 Apr 2026 00:34:49 +0000 Subject: drm/xe: Fix potential NULL deref in xe_exec_queue_tlb_inval_last_fence_put_unlocked xe_exec_queue_tlb_inval_last_fence_put_unlocked() uses q->vm->xe as the first argument to xe_assert(). This function is called unconditionally from xe_exec_queue_destroy() for all queues, including kernel queues that have q->vm == NULL (e.g., queues created during GT init in xe_gt_record_default_lrcs() with vm=NULL). While current compilers optimize away the q->vm->xe dereference (even in CONFIG_DRM_XE_DEBUG=y builds, the compiler pushes the dereference into the WARN branch that is only taken when the assert condition is false), the code is semantically incorrect and constitutes undefined behavior in the C abstract machine for the NULL pointer case. Use gt_to_xe(q->gt) instead, which is always valid for any exec queue. This is consistent with how xe_exec_queue_destroy() itself obtains the xe_device pointer in its own xe_assert at the top of the function. Fixes: b2d7ec41f2a3 ("drm/xe: Attach last fence to TLB invalidation job queues") Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260409003449.3405767-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 96078a1c68bf97f17fd1d08c3f58f5c5cc9ccd65) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index b287d0e0e60a..8de8ec784a03 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1760,7 +1760,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, unsigned int type) { - xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); dma_fence_put(q->tlb_inval[type].last_fence); -- cgit v1.2.3 From 09a8f3c1c11977a6e10c167f26dd298790b31c32 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 8 Apr 2026 17:52:52 +0000 Subject: drm/xe/bo: Fix bo leak on unaligned size validation in xe_bo_init_locked() When type is ttm_bo_type_device and aligned_size != size, the function returns an error without freeing a caller-provided bo, violating the documented contract that bo is freed on failure. Add xe_bo_free(bo) before returning the error. Fixes: 4e03b584143e ("drm/xe/uapi: Reject bo creation of unaligned size") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260408175255.3402838-2-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 601c2aa087b6f21014300a3f107a08ee4dde7bdf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a7c2dc7f224c..c5e9befc6ba3 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2342,8 +2342,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, alignment = SZ_4K >> PAGE_SHIFT; } - if (type == ttm_bo_type_device && aligned_size != size) + if (type == ttm_bo_type_device && aligned_size != size) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (!bo) { bo = xe_bo_alloc(); -- cgit v1.2.3 From 1d0adf2fd94fb0c0037c643fadd8f2cf3cffc009 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 8 Apr 2026 17:52:53 +0000 Subject: drm/xe/bo: Fix bo leak on GGTT flag validation in xe_bo_init_locked() When XE_BO_FLAG_GGTT_ALL is set without XE_BO_FLAG_GGTT, the function returns an error without freeing a caller-provided bo, violating the documented contract that bo is freed on failure. Add xe_bo_free(bo) before returning the error. Fixes: 5a3b0df25d6a ("drm/xe: Allow bo mapping on multiple ggtts") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260408175255.3402838-3-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 3fbd6cf43cac7b60757f3ce3d95195d3843a902c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index c5e9befc6ba3..4075edf97421 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, } /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ - if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && -- cgit v1.2.3 From 93a528f67ce5095bcab46a69839eca97f43dd352 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 8 Apr 2026 17:52:54 +0000 Subject: drm/xe: Fix bo leak in xe_dma_buf_init_obj() on allocation failure When drm_gpuvm_resv_object_alloc() fails, the pre-allocated storage bo is not freed. Add xe_bo_free(storage) before returning the error. xe_dma_buf_init_obj() calls xe_bo_init_locked(), which frees the bo on error. Therefore, xe_dma_buf_init_obj() must also free the bo on its own error paths. Otherwise, since xe_gem_prime_import() cannot distinguish whether the failure originated from xe_dma_buf_init_obj() or from xe_bo_init_locked(), it cannot safely decide whether the bo should be freed. Add comments documenting the ownership semantics: on success, ownership of storage is transferred to the returned drm_gem_object; on failure, storage is freed before returning. v2: Add comments to explain the free logic. Fixes: eb289a5f6cc6 ("drm/xe: Convert xe_dma_buf.c for exhaustive eviction") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260408175255.3402838-4-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 78a6c5f899f22338bbf48b44fb8950409c5a69b9) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_dma_buf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 7f9602b3363d..c0937c090d33 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -258,6 +258,13 @@ out_unlock: return ERR_PTR(ret); } +/* + * Takes ownership of @storage: on success it is transferred to the returned + * drm_gem_object; on failure it is freed before returning the error. + * This matches the contract of xe_bo_init_locked() which frees @storage on + * its error paths, so callers need not (and must not) free @storage after + * this call. + */ static struct drm_gem_object * xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, struct dma_buf *dma_buf) @@ -271,8 +278,10 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret = 0; dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!dummy_obj) + if (!dummy_obj) { + xe_bo_free(storage); return ERR_PTR(-ENOMEM); + } dummy_obj->resv = resv; xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { @@ -281,6 +290,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, if (ret) break; + /* xe_bo_init_locked() frees storage on error */ bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); -- cgit v1.2.3 From 111ab678471bf1f90d078d5513bb086b70596c3c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 8 Apr 2026 17:52:55 +0000 Subject: drm/xe: Fix dma-buf attachment leak in xe_gem_prime_import() When xe_dma_buf_init_obj() fails, the attachment from dma_buf_dynamic_attach() is not detached. Add dma_buf_detach() before returning the error. Note: we cannot use goto out_err here because xe_dma_buf_init_obj() already frees bo on failure, and out_err would double-free it. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Mattheq Brost Link: https://patch.msgid.link/20260408175255.3402838-5-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit a828eb185aac41800df8eae4b60501ccc0dbbe51) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_dma_buf.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index c0937c090d33..b9828da15897 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -378,12 +378,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, goto out_err; } - /* Errors here will take care of freeing the bo. */ + /* + * xe_dma_buf_init_obj() takes ownership of bo on both success + * and failure, so we must not touch bo after this call. + */ obj = xe_dma_buf_init_obj(dev, bo, dma_buf); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + dma_buf_detach(dma_buf, attach); return obj; - - + } get_dma_buf(dma_buf); obj->import_attach = attach; return obj; -- cgit v1.2.3 From f3cc22d4df3ed58439ea7e21daa54c3608e03b78 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 8 Apr 2026 02:06:47 +0000 Subject: drm/xe: Fix error cleanup in xe_exec_queue_create_ioctl() Two error handling issues exist in xe_exec_queue_create_ioctl(): 1. When xe_hw_engine_group_add_exec_queue() fails, the error path jumps to put_exec_queue which skips xe_exec_queue_kill(). If the VM is in preempt fence mode, xe_vm_add_compute_exec_queue() has already added the queue to the VM's compute exec queue list. Skipping the kill leaves the queue on that list, leading to a dangling pointer after the queue is freed. 2. When xa_alloc() fails after xe_hw_engine_group_add_exec_queue() has succeeded, the error path does not call xe_hw_engine_group_del_exec_queue() to remove the queue from the hw engine group list. The queue is then freed while still linked into the hw engine group, causing a use-after-free. Fix both by: - Changing the xe_hw_engine_group_add_exec_queue() failure path to jump to kill_exec_queue so that xe_exec_queue_kill() properly removes the queue from the VM's compute list. - Adding a del_hw_engine_group label before kill_exec_queue for the xa_alloc() failure path, which removes the queue from the hw engine group before proceeding with the rest of the cleanup. Fixes: 7970cb36966c ("'drm/xe/hw_engine_group: Register hw engine group's exec queues") Cc: Francois Dugast Cc: Matthew Brost Cc: Niranjana Vishwanathapura Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260408020647.3397933-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 37c831f401746a45d510b312b0ed7a77b1e06ec8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8de8ec784a03..071b8c41df43 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) { err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); if (err) - goto put_exec_queue; + goto kill_exec_queue; } } @@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) - goto kill_exec_queue; + goto del_hw_engine_group; args->exec_queue_id = id; return 0; +del_hw_engine_group: + if (q->vm && q->hwe && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); kill_exec_queue: xe_exec_queue_kill(q); delete_queue_group: -- cgit v1.2.3 From dc2d9842c67d883d3200ae33b9c3859dd9492408 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 15 Apr 2026 22:54:28 +0000 Subject: drm/xe/eustall: Fix drm_dev_put called before stream disable in close In xe_eu_stall_stream_close(), drm_dev_put() is called before the stream is disabled and its resources are freed. If this drops the last reference, the device structures could be freed while the subsequent cleanup code still accesses them, leading to a use-after-free. Fix this by moving drm_dev_put() after all device accesses are complete. This matches the ordering in xe_oa_release(). Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Cc: Harish Chegondi Assisted-by: Claude:claude-opus-4.6 Signed-off-by: Shuicheng Lin Reviewed-by: Harish Chegondi Link: https://patch.msgid.link/20260415225428.3399934-1-shuicheng.lin@intel.com Signed-off-by: Matt Roper (cherry picked from commit 35aff528f7297e949e5e19c9cd7fd748cf1cf21c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_eu_stall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index c34408cfd292..dddcdd0bb7a3 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) struct xe_eu_stall_data_stream *stream = file->private_data; struct xe_gt *gt = stream->gt; - drm_dev_put(>->tile->xe->drm); - mutex_lock(>->eu_stall->stream_lock); xe_eu_stall_disable_locked(stream); xe_eu_stall_data_buf_destroy(stream); xe_eu_stall_stream_free(stream); mutex_unlock(>->eu_stall->stream_lock); + drm_dev_put(>->tile->xe->drm); + return 0; } -- cgit v1.2.3 From 3762d6c36549accea7068c4a175483fafdd03657 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 17 Apr 2026 16:33:08 +0000 Subject: drm/xe/gsc: Fix BO leak on error in query_compatibility_version() When xe_gsc_read_out_header() fails, query_compatibility_version() returns directly instead of jumping to the out_bo label. This skips the xe_bo_unpin_map_no_vm() call, leaving the BO pinned and mapped with no remaining reference to free it. Fix by using goto out_bo so the error path properly cleans up the BO, consistent with the other error handling in the same function. Fixes: 0881cbe04077 ("drm/xe/gsc: Query GSC compatibility version") Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260417163308.3416147-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 8de86d0a843c32ca9d36864bdb92f0376a830bce) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index e5c234f3d795..0d13e357fb43 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc) &rd_offset); if (err) { xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); - return err; + goto out_bo; } compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major); -- cgit v1.2.3 From 0df99689eb790bcad3ad82b38fa4ce1cbf3cffa3 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Apr 2026 14:16:03 +0100 Subject: drm/xe/xelp: Fix Wa_18022495364 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Command parser relative MMIO addressing needs to be enabled when writing to the register. Signed-off-by: Tvrtko Ursulin Fixes: ca33cd271ef9 ("drm/xe/xelp: Add Wa_18022495364") Cc: Matt Roper Cc: Matthew Brost Cc: Thomas Hellström Cc: Rodrigo Vivi Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260420131603.70357-1-tvrtko.ursulin@igalia.com Signed-off-by: Matt Roper (cherry picked from commit 5627392001802a98ed6cf8cf79a303abd00d1c0f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9d12a0d2f0b5..c725cde4508d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1214,7 +1214,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) return -ENOSPC; - *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1); *cmd++ = CS_DEBUG_MODE2(0).addr; *cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); -- cgit v1.2.3 From 4e5591c2fc1b30f4ea5e2eab4c3a695acc404e39 Mon Sep 17 00:00:00 2001 From: Jia Yao Date: Fri, 17 Apr 2026 05:59:16 +0000 Subject: drm/xe/uapi: Reject coh_none PAT index for CPU cached memory in madvise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add validation in xe_vm_madvise_ioctl() to reject PAT indices with XE_COH_NONE coherency mode when applied to CPU cached memory. Using coh_none with CPU cached buffers is a security issue. When the kernel clears pages before reallocation, the clear operation stays in CPU cache (dirty). GPU with coh_none can bypass CPU caches and read stale sensitive data directly from DRAM, potentially leaking data from previously freed pages of other processes. This aligns with the existing validation in vm_bind path (xe_vm_bind_ioctl_validate_bo). v2(Matthew brost) - Add fixes - Move one debug print to better place v3(Matthew Auld) - Should be drm/xe/uapi - More Cc v4(Shuicheng Lin) - Fix kmem leak issues by the way v5 - Remove kmem leak because it has been merged by another patch v6 - Remove the fix which is not related to current fix v7 - No change v8 - Rebase v9 - Limit the restrictions to iGPU v10 - No change Fixes: ada7486c5668 ("drm/xe: Implement madvise ioctl for xe") Cc: # v6.18+ Cc: Shuicheng Lin Cc: Mathew Alwin Cc: Michal Mrozek Cc: Matthew Brost Cc: Matthew Auld Signed-off-by: Jia Yao Reviewed-by: Matthew Auld Acked-by: Michal Mrozek Acked-by: José Roberto de Souza Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20260417055917.2027459-2-jia.yao@intel.com (cherry picked from commit 016ccdb674b8c899940b3944952c96a6a490d10a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_madvise.c | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 66f00d3f5c07..c78906dea82b 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -621,6 +621,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details return 0; } +static bool check_pat_args_are_sane(struct xe_device *xe, + struct xe_vmas_in_madvise_range *madvise_range, + u16 pat_index) +{ + u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + int i; + + /* + * Using coh_none with CPU cached buffers is not allowed on iGPU. + * On iGPU the GPU shares the LLC with the CPU, so with coh_none + * the GPU bypasses CPU caches and reads directly from DRAM, + * potentially seeing stale sensitive data from previously freed + * pages. On dGPU this restriction does not apply, because the + * platform does not provide a non-coherent system memory access + * path that would violate the DMA coherency contract. + */ + if (coh_mode != XE_COH_NONE || IS_DGFX(xe)) + return true; + + for (i = 0; i < madvise_range->num_vmas; i++) { + struct xe_vma *vma = madvise_range->vmas[i]; + struct xe_bo *bo = xe_vma_bo(vma); + + if (bo) { + /* BO with WB caching + COH_NONE is not allowed */ + if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return false; + /* Imported dma-buf without caching info, assume cached */ + if (XE_IOCTL_DBG(xe, !bo->cpu_caching)) + return false; + } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) || + xe_vma_is_userptr(vma))) + /* System memory (userptr/SVM) is always CPU cached */ + return false; + } + + return true; +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -750,6 +789,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } } + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { + if (!check_pat_args_are_sane(xe, &madvise_range, + args->pat_index.val)) { + err = -EINVAL; + goto free_vmas; + } + } + if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { if (!check_bo_args_are_sane(vm, madvise_range.vmas, -- cgit v1.2.3 From 662f9ddc8077792129440d05cbef2f944a07777a Mon Sep 17 00:00:00 2001 From: Jia Yao Date: Fri, 17 Apr 2026 05:59:17 +0000 Subject: drm/xe/uapi: Reject coh_none PAT index for CPU_ADDR_MIRROR Add validation in xe_vm_bind_ioctl() to reject PAT indices with XE_COH_NONE coherency mode when used with DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR. CPU address mirror mappings use system memory that is CPU cached, which makes them incompatible with COH_NONE PAT indices. Allowing COH_NONE with CPU cached buffers is a security risk, as the GPU may bypass CPU caches and read stale sensitive data from DRAM. Although CPU_ADDR_MIRROR does not create an immediate mapping, the backing system memory is still CPU cached. Apply the same PAT coherency restrictions as DRM_XE_VM_BIND_OP_MAP_USERPTR. v2: - Correct fix tag v6: - No change v7: - Correct fix tag v8: - Rebase v9: - Limit the restrictions to iGPU v10: - Just add the iGPU logic but keep dGPU logic Fixes: b43e864af0d4 ("drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR") Cc: # v6.15+ Cc: Shuicheng Lin Cc: Mathew Alwin Cc: Michal Mrozek Cc: Matthew Brost Cc: Matthew Auld Signed-off-by: Jia Yao Reviewed-by: Matthew Auld Acked-by: Michal Mrozek Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20260417055917.2027459-3-jia.yao@intel.com (cherry picked from commit 4d58d7535e826a3175527b6174502f0db319d7f6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1720205c09ca..a717a2b8dea3 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3658,6 +3658,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && + is_cpu_addr_mirror) || XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || is_cpu_addr_mirror) && -- cgit v1.2.3 From 76b0d0baa9ae9c60e726bbe1b6ff0bec2c993634 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Apr 2026 22:07:06 -0700 Subject: Input: elan_i2c - validate firmware size before use Ensure that the firmware file is large enough to contain the expected number of pages and the signature (which resides at the end of the firmware blob) before accessing them to prevent potential out-of-bounds reads. Cc: stable@vger.kernel.org Link: https://patch.msgid.link/ae2dOgiFvXRm4BHo@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 7475803c6ce4..5cba02a156ce 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -648,6 +648,11 @@ static ssize_t elan_sysfs_update_fw(struct device *dev, return error; } + if (fw->size < data->fw_signature_address + sizeof(signature)) { + dev_err(dev, "firmware file too small\n"); + return -EBADF; + } + /* Firmware file must match signature data */ fw_signature = &fw->data[data->fw_signature_address]; if (memcmp(fw_signature, signature, sizeof(signature)) != 0) { -- cgit v1.2.3 From 38694f4639c45599161860e828dc4ac77abf8cea Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Mon, 27 Apr 2026 14:02:32 +0300 Subject: RDMA/mlx5: Fix UAF in SRQ destroy due to race with create A race condition exists between mlx5_cmd_destroy_srq() and mlx5_cmd_create_srq() that can lead to a use-after-free (UAF) [1]. After destroy_srq_split() releases the SRQ to firmware, the SRQN can be immediately reallocated for a new SRQ being created concurrently. If the create path stores the new SRQ in the xarray before the destroy path erases it, the destroy will incorrectly delete the new SRQ's entry. Later accesses then hit freed memory. Fix by replacing the unconditional xa_erase_irq() with xa_cmpxchg_irq() that only erases the entry if it hasn't already been replaced (still contains XA_ZERO_ENTRY), preserving any newly created SRQ. [1] RIP: 0010:mlx5_cmd_destroy_srq+0xd8/0x110 [mlx5_ib] Code: 89 e1 ba 06 04 00 00 4c 89 f6 48 89 ef e8 80 19 70 e1 c6 83 a0 0f 00 00 00 fb 5b 44 89 e8 5d 41 5c 41 5d 41 5e c3 cc cc cc cc <0f> 0b 48 89 c2 83 e2 03 48 83 fa 02 75 08 48 3d 05 c0 ff ff 77 08 RSP: 0018:ff110001037b7d08 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ff1100010bb9c000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ff110001037b7c90 RBP: ff1100010bb9cfa0 R08: 0000000000000000 R09: 0000000000000000 R10: ff110001037b7da0 R11: ff11000104f29580 R12: ff1100010e2ac090 R13: 000000000000000d R14: 0000000000000001 R15: ff11000105336300 FS: 00007fa24787c740(0000) GS:ff1100046eb8d000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa247984e90 CR3: 0000000109d59005 CR4: 0000000000373eb0 Call Trace: mlx5_ib_destroy_srq+0x25/0xa0 [mlx5_ib] ib_destroy_srq_user+0x21/0x90 [ib_core] uverbs_free_srq+0x1b/0x50 [ib_uverbs] destroy_hw_idr_uobject+0x1e/0x50 [ib_uverbs] uverbs_destroy_uobject+0x35/0x180 [ib_uverbs] __uverbs_cleanup_ufile+0xdd/0x140 [ib_uverbs] uverbs_destroy_ufile_hw+0x38/0xf0 [ib_uverbs] ib_uverbs_close+0x17/0xa0 [ib_uverbs] __fput+0xe0/0x2a0 __x64_sys_close+0x3a/0x80 do_syscall_64+0x55/0xac0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fa247984ea4 Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d a5 51 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 3c c3 0f 1f 00 55 48 89 e5 48 83 ec 10 89 7d RSP: 002b:00007ffecfa79498 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000200000000080 RCX: 00007fa247984ea4 RDX: 0000000000000040 RSI: 0000200000000200 RDI: 0000000000000003 RBP: 00007ffecfa794e0 R08: 00007ffecfa794e0 R09: 00007ffecfa794e0 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000001 R13: 0000000000000000 R14: 0000200000000000 R15: 0000200000000009 ---[ end trace 0000000000000000 ]--- Fixes: fd89099d635e ("RDMA/mlx5: Issue FW command to destroy SRQ on reentry") Link: https://patch.msgid.link/r/20260427-security-bug-fixes-v3-1-4621fa52de0e@nvidia.com Signed-off-by: Edward Srouji Reviewed-by: Michael Guralnik Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/srq_cmd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 8b3385396599..c1a088120915 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -683,7 +683,14 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0); return err; } - xa_erase_irq(&table->array, srq->srqn); + + /* + * A race can occur where a concurrent create gets the same srqn + * (after hardware released it) and overwrites XA_ZERO_ENTRY with + * its new SRQ before we reach here. In that case, we must not erase + * the entry as it now belongs to the new SRQ. + */ + xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, NULL, 0); mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); -- cgit v1.2.3 From 9bee81cc5e8811c8bbe67fbf5214a7998457324b Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Mon, 27 Apr 2026 14:02:33 +0300 Subject: RDMA/mlx5: Fix UAF in DCT destroy due to race with create A potential race condition exists between mlx5_core_destroy_dct() and mlx5_core_create_dct() that can lead to a use-after-free. After _mlx5_core_destroy_dct() releases the DCT to firmware, the DCTN can be immediately reallocated for a new DCT being created concurrently. If the create path stores the new DCT in the xarray before the destroy path erases it, the destroy will incorrectly delete the new DCT's entry. Later accesses then hit freed memory. Fix by replacing the unconditional xa_erase_irq() with xa_cmpxchg_irq() that only erases the entry if it hasn't already been replaced (still contains XA_ZERO_ENTRY), preserving any newly created DCT. Fixes: afff24899846 ("RDMA/mlx5: Handle DCT QP logic separately from low level QP interface") Link: https://patch.msgid.link/r/20260427-security-bug-fixes-v3-2-4621fa52de0e@nvidia.com Signed-off-by: Edward Srouji Reviewed-by: Michael Guralnik Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qpc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 146d03ae40bd..a7a4f9420271 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -314,7 +314,14 @@ destroy: xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0); return err; } - xa_erase_irq(&table->dct_xa, dct->mqp.qpn); + + /* + * A race can occur where a concurrent create gets the same dctn + * (after hardware released it) and overwrites XA_ZERO_ENTRY with + * its new DCT before we reach here. In that case, we must not erase + * the entry as it now belongs to the new DCT. + */ + xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, NULL, 0); return 0; } -- cgit v1.2.3 From 610771c62e2ac5bca851fc5a6f8af1cdd83f189a Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 27 Apr 2026 14:02:34 +0300 Subject: IB/core: Fix IPv6 netlink message size in ib_nl_ip_send_msg() When resolving an RDMA-CM IPv6 address, ib_nl_ip_send_msg() sends a netlink request to the userspace daemon to perform IP-to-GID resolution in certain cases. The function allocates the netlink message buffer using nla_total_size(sizeof(size)), which passes 8 bytes (the size of size_t) instead of 16 bytes (the size of an IPv6 address). This results in an 8-byte under-allocation. This is currently masked by nlmsg_new() over-allocation of the skb in its internal logic. However, the code remains incorrect. Fix the issue by supplying the proper IPv6 address length to nla_total_size(). Fixes: ae43f8286730 ("IB/core: Add IP to GID netlink offload") Link: https://patch.msgid.link/r/20260427-security-bug-fixes-v3-3-4621fa52de0e@nvidia.com Signed-off-by: Maher Sanalla Reviewed-by: Patrisious Haddad Signed-off-by: Edward Srouji Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a40a765f0307..27992c38ad90 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -149,7 +149,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; } - len = nla_total_size(sizeof(size)); + len = nla_total_size(size); len += NLMSG_ALIGN(sizeof(*header)); skb = nlmsg_new(len, GFP_KERNEL); -- cgit v1.2.3 From 1f3b337af2231b1e83c9052f771b201f5cbb9997 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 27 Apr 2026 14:02:35 +0300 Subject: RDMA/core: Fix rereg_mr use-after-free race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a driver creates a new MR during rereg_user_mr, a race window exists between rdma_alloc_commit_uobject() for the new MR and the point where the code reads that MR to populate the response keys. A concurrent rereg_mr or destroy_mr could destroy the MR in this window and cause UAF in the first thread. Racing flow between two rereg_mr calls: CPU0 CPU1 ---- ---- rereg_user_mr(mr_handle) uobj_get_write(mr_handle) -> mr0 mr1 = driver→rereg() rdma_alloc_commit_uobject(mr1) // mr1 replaced mr0 and is unlocked uobj_put_destroy(mr0) rereg_user_mr(mr_handle) uobj_get_write(mr_handle) -> mr1 mr2 = driver→rereg() rdma_alloc_commit_uobject(mr2) // mr2 replaced mr1 and is unlocked uobj_put_destroy(mr1) // Destroys mr1! resp.lkey = mr1->lkey; // UAF - mr1 was freed! resp.rkey = mr1->rkey; // UAF - mr1 was freed! Fix by storing lkey/rkey in local variables before the new MR is unlocked and using the local variables to set the user response. Fixes: 6e0954b11c05 ("RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr") Link: https://patch.msgid.link/r/20260427-security-bug-fixes-v3-4-4621fa52de0e@nvidia.com Signed-off-by: Michael Guralnik Reviewed-by: Maher Sanalla Signed-off-by: Edward Srouji Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a768436ba468..91a62d2ade4d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -778,6 +778,7 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) struct ib_pd *orig_pd; struct ib_pd *new_pd; struct ib_mr *new_mr; + u32 lkey, rkey; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -846,6 +847,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) new_mr->uobject = uobj; atomic_inc(&new_pd->usecnt); new_uobj->object = new_mr; + lkey = new_mr->lkey; + rkey = new_mr->rkey; rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); rdma_restrack_set_name(&new_mr->res, NULL); @@ -871,11 +874,13 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) mr->iova = cmd.hca_va; mr->length = cmd.length; } + lkey = mr->lkey; + rkey = mr->rkey; } memset(&resp, 0, sizeof(resp)); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; + resp.lkey = lkey; + resp.rkey = rkey; ret = uverbs_response(attrs, &resp, sizeof(resp)); -- cgit v1.2.3 From 6009cca96fcb01182cede725ad61e9e3810f3932 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 27 Apr 2026 14:02:36 +0300 Subject: RDMA/mlx5: Fix null-ptr-deref in Raw Packet QP creation Raw Packet QPs are unique in that they support separate send and receive queues, using 2 different user-provided buffers. They can also be created with one of the queues having size 0, allowing a send-only or receive-only QP. The Raw Packet RQ umem is created in the common user QP creation path, which allows zero-length queues. Add a later validation of the RQ umem in Raw Packet QP creation path when an RQ was requested. This prevents possible null-ptr dereference crashes, as seen in the below trace: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 6 UID: 0 PID: 3539 Comm: raw_packet_umem Not tainted 6.19.0-rc1+ #166 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:__mlx5_umem_find_best_quantized_pgoff+0x37/0x280 [mlx5_ib] Code: ff df 41 57 49 89 ff 41 56 41 55 41 89 d5 41 54 4d 89 cc 4c 8d 4f 30 55 4c 89 ca 48 89 f5 53 48 c1 ea 03 48 89 cb 48 83 ec 18 <80> 3c 02 00 44 89 04 24 0f 85 01 02 00 00 48 ba 00 00 00 00 00 fc RSP: 0018:ff1100013966f4e0 EFLAGS: 00010282 RAX: dffffc0000000000 RBX: 00000000ffffffc0 RCX: 00000000ffffffc0 RDX: 0000000000000006 RSI: 00000ffffffff000 RDI: 0000000000000000 RBP: 00000ffffffff000 R08: 0000000000000040 R09: 0000000000000030 R10: 0000000000000000 R11: 0000000000000000 R12: ff1100013966f648 R13: 0000000000000005 R14: ff1100013966f980 R15: 0000000000000000 FS: 00007fae6c82f740(0000) GS:ff11000898ba1000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000200000000000 CR3: 000000010f96c005 CR4: 0000000000373eb0 Call Trace: create_qp+0x747d/0xc740 [mlx5_ib] ? is_module_address+0x18/0x110 ? _create_user_qp.constprop.0+0x18e0/0x18e0 [mlx5_ib] ? __module_address+0x49/0x210 ? is_module_address+0x68/0x110 ? static_obj+0x67/0x90 ? lockdep_init_map_type+0x58/0x200 mlx5_ib_create_qp+0xc85/0x2620 [mlx5_ib] ? find_held_lock+0x2b/0x80 ? create_qp+0xc740/0xc740 [mlx5_ib] ? lock_release+0xcb/0x260 ? lockdep_init_map_type+0x58/0x200 ? __init_swait_queue_head+0xcb/0x150 create_qp.part.0+0x558/0x7c0 [ib_core] ib_create_qp_user+0xa0/0x4f0 [ib_core] ? rdma_lookup_get_uobject+0x1e4/0x400 [ib_uverbs] create_qp+0xe4f/0x1d10 [ib_uverbs] ? ib_uverbs_rereg_mr+0xd40/0xd40 [ib_uverbs] ? ib_uverbs_cq_event_handler+0x120/0x120 [ib_uverbs] ? __might_fault+0x81/0x100 ? lock_release+0xcb/0x260 ? _copy_from_user+0x3e/0x90 ib_uverbs_create_qp+0x10a/0x150 [ib_uverbs] ? ib_uverbs_ex_create_qp+0xe0/0xe0 [ib_uverbs] ? __might_fault+0x81/0x100 ? lock_release+0xcb/0x260 ib_uverbs_write+0x7e5/0xc90 [ib_uverbs] ? uverbs_devnode+0xc0/0xc0 [ib_uverbs] ? lock_acquire+0xfa/0x2b0 ? find_held_lock+0x2b/0x80 ? finish_task_switch.isra.0+0x189/0x6c0 vfs_write+0x1c0/0xf70 ? lockdep_hardirqs_on_prepare+0xde/0x170 ? kernel_write+0x5a0/0x5a0 ? __switch_to+0x527/0xe60 ? __schedule+0x10a3/0x3950 ? io_schedule_timeout+0x110/0x110 ksys_write+0x170/0x1c0 ? __x64_sys_read+0xb0/0xb0 ? trace_hardirqs_off.part.0+0x4e/0xe0 do_syscall_64+0x70/0x1360 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7fae6ca3118d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 5b cc 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe678ca308 EFLAGS: 00000213 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ffe678ca448 RCX: 00007fae6ca3118d RDX: 0000000000000070 RSI: 0000200000000280 RDI: 0000000000000003 RBP: 00007ffe678ca320 R08: 00000000ffffffff R09: 00007fae6c8ec5b8 R10: 0000000000000064 R11: 0000000000000213 R12: 0000000000000001 R13: 0000000000000000 R14: 00007fae6cb71000 R15: 0000000000404df0 Modules linked in: mlx5_ib mlx5_fwctl mlx5_core bonding ip6_gre ip6_tunnel tunnel6 ip_gre gre rdma_ucm ib_uverbs rdma_cm iw_cm ib_ipoib ib_cm ib_umad ib_core rpcsec_gss_krb5 auth_rpcgss oid_registry overlay nfnetlink zram zsmalloc fuse scsi_transport_iscsi [last unloaded: mlx5_core] ---[ end trace 0000000000000000 ]--- RIP: 0010:__mlx5_umem_find_best_quantized_pgoff+0x37/0x280 [mlx5_ib] Fixes: 0fb2ed66a14c ("IB/mlx5: Add create and destroy functionality for Raw Packet QP") Link: https://patch.msgid.link/r/20260427-security-bug-fixes-v3-5-4621fa52de0e@nvidia.com Signed-off-by: Michael Guralnik Reviewed-by: Maher Sanalla Signed-off-by: Edward Srouji Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8f50e7342a76..1611a704c1b3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1603,6 +1603,11 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (qp->rq.wqe_cnt) { + if (!rq->base.ubuffer.umem) { + err = -EINVAL; + goto err_destroy_sq; + } + rq->base.container_mibqp = qp; if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING) -- cgit v1.2.3 From 20e81c64c905bd765e69ef07920d2b1130dc79b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 29 Apr 2026 09:44:16 -1000 Subject: workqueue: Annotate alloc_workqueue_va() with __printf(1, 0) alloc_workqueue_va() forwards its va_list to __alloc_workqueue() which ultimately feeds vsnprintf(). __alloc_workqueue() already carries __printf(1, 0); the new wrapper needs the same annotation so format string checking propagates through the forwarding. Fixes: 0de4cb473aed ("workqueue: fix devm_alloc_workqueue() va_list misuse") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604300347.2LgXyteh-lkp@intel.com/ Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 24d0265191d4..3d2e3b2ec528 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5906,6 +5906,7 @@ err_destroy: return NULL; } +__printf(1, 0) static struct workqueue_struct *alloc_workqueue_va(const char *fmt, unsigned int flags, int max_active, -- cgit v1.2.3 From b2aa3b4d64e460ac606f386c24e7d8a873ce6f1a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Apr 2026 12:23:02 -0400 Subject: tracing/probes: Limit size of event probe to 3K There currently isn't a max limit an event probe can be. One could make an event greater than PAGE_SIZE, which makes the event useless because if it's bigger than the max event that can be recorded into the ring buffer, then it will never be recorded. A event probe should never need to be greater than 3K, so make that the max size. As long as the max is less than the max that can be recorded onto the ring buffer, it should be fine. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Fixes: 93ccae7a22274 ("tracing/kprobes: Support basic types on dynamic events") Link: https://patch.msgid.link/20260428122302.706610ba@gandalf.local.home Signed-off-by: Steven Rostedt --- kernel/trace/trace_probe.c | 6 ++++++ kernel/trace/trace_probe.h | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e1c73065dae5..e0d3a0da26af 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1523,6 +1523,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, parg->offset = *size; *size += parg->type->size * (parg->count ?: 1); + if (*size > MAX_PROBE_EVENT_SIZE) { + ret = -E2BIG; + trace_probe_log_err(ctx->offset, EVENT_TOO_BIG); + goto fail; + } + if (parg->count) { len = strlen(parg->type->fmttype) + 6; parg->fmt = kmalloc(len, GFP_KERNEL); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 9fc56c937130..262d8707a3df 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -38,6 +38,7 @@ #define MAX_BTF_ARGS_LEN 128 #define MAX_DENTRY_ARGS_LEN 256 #define MAX_STRING_SIZE PATH_MAX +#define MAX_PROBE_EVENT_SIZE 3072 /* Reserved field names */ #define FIELD_STRING_IP "__probe_ip" @@ -561,7 +562,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_TYPE4STR, "This type does not fit for string."),\ C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"),\ C(TOO_MANY_ARGS, "Too many arguments are specified"), \ - C(TOO_MANY_EARGS, "Too many entry arguments specified"), + C(TOO_MANY_EARGS, "Too many entry arguments specified"), \ + C(EVENT_TOO_BIG, "Event too big (too many fields?)"), #undef C #define C(a, b) TP_ERR_##a -- cgit v1.2.3 From 4ebcf3f94924d54706de0d2492c80944d85410fd Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Tue, 28 Apr 2026 10:34:10 +0900 Subject: ntfs: drop nlink once for WIN32/DOS aliases NTFS could store a filename as paired WIN32 and DOS $FILE_NAME attributes for directories. But ntfs_delete() deleted both attributes for unlinking a directory, but it also called drop_nlink() for each attributes. This could trigger warnings when unlinking directories. Signed-off-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/namei.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 10894de519c3..96c450e62efc 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -945,7 +945,8 @@ search: ni_mrec = actx->base_mrec ? actx->base_mrec : actx->mrec; ni_mrec->link_count = cpu_to_le16(le16_to_cpu(ni_mrec->link_count) - 1); - drop_nlink(VFS_I(ni)); + if (!S_ISDIR(VFS_I(ni)->i_mode)) + drop_nlink(VFS_I(ni)); mark_mft_record_dirty(ni); if (looking_for_dos_name) { @@ -955,6 +956,13 @@ search: goto search; } + /* + * For directories, Drop VFS nlink only when mft record link count + * becomes zero. Because we fixes VFS nlink to 1 for directories. + */ + if (S_ISDIR(VFS_I(ni)->i_mode) && !le16_to_cpu(ni_mrec->link_count)) + drop_nlink(VFS_I(ni)); + /* * If hard link count is not equal to zero then we are done. In other * case there are no reference to this inode left, so we should free all @@ -1221,7 +1229,8 @@ static int __ntfs_link(struct ntfs_inode *ni, struct ntfs_inode *dir_ni, } /* Increment hard links count. */ ni_mrec->link_count = cpu_to_le16(le16_to_cpu(ni_mrec->link_count) + 1); - inc_nlink(VFS_I(ni)); + if (!S_ISDIR(vi->i_mode)) + inc_nlink(VFS_I(ni)); /* Done! */ mark_mft_record_dirty(ni); -- cgit v1.2.3 From 9e9354075d5a15cfc0aba965f3d0d77b7d4303e9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 28 Apr 2026 15:21:38 -0400 Subject: ntfs: Use return instead of goto in ntfs_mapping_pairs_decompress() Clang warns (or errors with CONFIG_WERROR=y / W=e): fs/ntfs/runlist.c:755:6: error: variable 'rl' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] 755 | if (overflows_type(lowest_vcn, vcn)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ... fs/ntfs/runlist.c:971:9: note: uninitialized use occurs here 971 | kvfree(rl); | ^~ ... rl has not been allocated at this point so the 'goto err_out' should really just be a return of the error pointer -EIO. Signed-off-by: Nathan Chancellor Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/runlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index be6ca3d374bb..da21dbeaaf66 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -754,7 +754,7 @@ struct runlist_element *ntfs_mapping_pairs_decompress(const struct ntfs_volume * /* Validate lowest_vcn from on-disk metadata to ensure it is sane. */ if (overflows_type(lowest_vcn, vcn)) { ntfs_error(vol->sb, "Invalid lowest_vcn in mapping pairs."); - goto err_out; + return ERR_PTR(-EIO); } /* Start at vcn = lowest_vcn and lcn 0. */ vcn = lowest_vcn; -- cgit v1.2.3 From 8e13b1b4093e0cbcb3dc2906c13b1fdc95cdf0a0 Mon Sep 17 00:00:00 2001 From: Fredric Cover Date: Wed, 29 Apr 2026 14:34:53 -0700 Subject: smb: client: change allocation requirements in smb2_compound_op Currently, smb2_compound_op() allocates struct smb2_compound_vars *vars using GFP_ATOMIC, although smb2_compound_op() can sleep when it calls compound_send_recv() before vars is freed. Allocate vars using GFP_KERNEL. Signed-off-by: Fredric Cover Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index c6dd282fc3a9..286912616c73 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -230,7 +230,7 @@ replay_again: num_rqst = 0; server = cifs_pick_channel(ses); - vars = kzalloc_obj(*vars, GFP_ATOMIC); + vars = kzalloc_obj(*vars, GFP_KERNEL); if (vars == NULL) { rc = -ENOMEM; goto out; -- cgit v1.2.3 From c208a2b95811d6e1ebae65d0d2fc13f73707f8e7 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 30 Mar 2026 16:19:59 +0530 Subject: cifs: change_conf needs to be called for session setup Today we skip calling change_conf for negotiates and session setup requests. This can be a problem for mchan as the immediate next call after session setup could be due to an I/O that is made on the mount point. For single channel, this is not a problem as there will be several calls after setting up session. This change enforces calling change_conf when the total credits contain enough for reservations for echoes and oplocks. We expect this to happen during the last session setup response. This way, echoes and oplocks are not disabled before the first request to the server. So if that first request is an open, it does not need to disable requesting leases. Cc: Reviewed-by: Bharath SM Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7f346ee50289..e6cb9b144530 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -111,10 +111,21 @@ smb2_add_credits(struct TCP_Server_Info *server, cifs_trace_rw_credits_zero_in_flight); } server->in_flight--; + + /* + * Rebalance credits when an op drains in_flight. For session setup, + * do this only when the total accumulated credits are high enough (>2) + * so that a newly established secondary channel can reserve credits for + * echoes and oplocks. We expect this to happen at the end of the final + * session setup response. + */ if (server->in_flight == 0 && ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) && ((optype & CIFS_OP_MASK) != CIFS_SESS_OP)) rc = change_conf(server); + else if (server->in_flight == 0 && + ((optype & CIFS_OP_MASK) == CIFS_SESS_OP) && *val > 2) + rc = change_conf(server); /* * Sometimes server returns 0 credits on oplock break ack - we need to * rebalance credits in this case. -- cgit v1.2.3 From 1049970d7583194eedc30e45a3c898b2cb1c30ba Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Apr 2026 14:34:45 +0200 Subject: netfilter: replace skb_try_make_writable() by skb_ensure_writable() skb_try_make_writable() only works on clones and uncloned packets might have their network header in paged fragments. nft_fwd needs to work for the ingress and egress hooks, but the egress hook where skb->data points to the mac header, use skb_network_offset() to include the mac header. The flowtable is fine since it already uses the transport offset. Fixes: d32de98ea70f ("netfilter: nft_fwd_netdev: allow to forward packets via neighbour layer") Fixes: 7d2086871762 ("netfilter: nf_flow_table: move ipv4 offload hook code to nf_flow_table") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 4 ++-- net/netfilter/nft_fwd_netdev.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index fd56d663cb5b..dbd7644fdbeb 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -524,7 +524,7 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); @@ -1037,7 +1037,7 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, return 0; } - if (skb_try_make_writable(skb, thoff + ctx->hdrsize)) + if (skb_ensure_writable(skb, thoff + ctx->hdrsize)) return -1; flow_offload_refresh(flow_table, flow, false); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 4bce36c3a6a0..2cc809303ce8 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -100,6 +100,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, int oif = regs->data[priv->sreg_dev]; unsigned int verdict = NF_STOLEN; struct sk_buff *skb = pkt->skb; + int nhoff = skb_network_offset(skb); struct net_device *dev; int neigh_table; @@ -111,7 +112,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*iph))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*iph))) { verdict = NF_DROP; goto out; } @@ -132,7 +133,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, verdict = NFT_BREAK; goto out; } - if (skb_try_make_writable(skb, sizeof(*ip6h))) { + if (skb_ensure_writable(skb, nhoff + sizeof(*ip6h))) { verdict = NF_DROP; goto out; } -- cgit v1.2.3 From 0a0b35f0bf10b4c2be607465f5c9c12c8681305b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Apr 2026 14:34:48 +0200 Subject: netfilter: nft_fwd_netdev: add device and headroom validate with neigh forwarding The ttl field has been decremented already and evaluation of this rule would proceed, just drop this packet instead if there is no destination device to forwards this packet. This is exactly what nf_dup already does in this case. Moreover, check for headroom and call skb_expand_head() like in the IP output path to ensure there is sufficient headroom when forwarding this via neigh_xmit(). Fixes: d32de98ea70f ("netfilter: nft_fwd_netdev: allow to forward packets via neighbour layer") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_fwd_netdev.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 2cc809303ce8..605b1d42abce 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -102,6 +102,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; int nhoff = skb_network_offset(skb); struct net_device *dev; + unsigned int hh_len; int neigh_table; switch (priv->nfproto) { @@ -153,8 +154,19 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, } dev = dev_get_by_index_rcu(nft_net(pkt), oif); - if (dev == NULL) - return; + if (dev == NULL) { + verdict = NF_DROP; + goto out; + } + + hh_len = LL_RESERVED_SPACE(dev); + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) { + verdict = NF_STOLEN; + goto out; + } + } skb->dev = dev; skb_clear_tstamp(skb); -- cgit v1.2.3 From 1d47b55b36d2ec73fe6901212c8b28a593c3b27c Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Mon, 27 Apr 2026 14:34:50 +0200 Subject: netfilter: nft_fwd_netdev: use recursion counter in neigh egress path nft_fwd_neigh can be used in egress chains (NF_NETDEV_EGRESS). When the forwarding rule targets the same device or two devices forward to each other, neigh_xmit() triggers dev_queue_xmit() which re-enters nf_hook_egress(), causing infinite recursion and stack overflow. Move the nf_get_nf_dup_skb_recursion() accessor and NF_RECURSION_LIMIT to the shared header nf_dup_netdev.h as a static inline, so that nft_fwd_netdev can use the recursion counter directly without exported function call overhead. Guard neigh_xmit() with the same recursion limit already used in nf_do_netdev_egress(). [ Updated to cache the nf_get_nf_dup_skb_recursion pointer. --pablo ] Fixes: f87b9464d152 ("netfilter: nft_fwd_netdev: Support egress hook") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_dup_netdev.h | 13 +++++++++++++ net/netfilter/nf_dup_netdev.c | 16 ---------------- net/netfilter/nft_fwd_netdev.c | 8 ++++++++ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index b175d271aec9..609bcf422a9b 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -3,10 +3,23 @@ #define _NF_DUP_NETDEV_H_ #include +#include +#include void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); +#define NF_RECURSION_LIMIT 2 + +static inline u8 *nf_get_nf_dup_skb_recursion(void) +{ +#ifndef CONFIG_PREEMPT_RT + return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); +#else + return ¤t->net_xmit.nf_dup_skb_recursion; +#endif +} + struct nft_offload_ctx; struct nft_flow_rule; diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index e348fb90b8dc..3b0a70e154cd 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -13,22 +13,6 @@ #include #include -#define NF_RECURSION_LIMIT 2 - -#ifndef CONFIG_PREEMPT_RT -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); -} -#else - -static u8 *nf_get_nf_dup_skb_recursion(void) -{ - return ¤t->net_xmit.nf_dup_skb_recursion; -} - -#endif - static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev, enum nf_dev_hooks hook) { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 605b1d42abce..b9e88d7cf308 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -95,6 +95,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { + u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion(); struct nft_fwd_neigh *priv = nft_expr_priv(expr); void *addr = ®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; @@ -153,6 +154,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } + if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT) { + verdict = NF_DROP; + goto out; + } + dev = dev_get_by_index_rcu(nft_net(pkt), oif); if (dev == NULL) { verdict = NF_DROP; @@ -170,7 +176,9 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, skb->dev = dev; skb_clear_tstamp(skb); + (*nf_dup_skb_recursion)++; neigh_xmit(neigh_table, dev, addr, skb); + (*nf_dup_skb_recursion)--; out: regs->verdict.code = verdict; } -- cgit v1.2.3 From 735a309b4bfb9e1e26636ff4a3e8a146f53c54f9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 27 Apr 2026 19:53:20 -0700 Subject: net: add net_iov_init() and use it to initialize ->page_type Commit db359fccf212 ("mm: introduce a new page type for page pool in page type") added a page_type field to struct net_iov at the same offset as struct page::page_type, so that page_pool_set_pp_info() can call __SetPageNetpp() uniformly on both pages and net_iovs. The page-type API requires the field to hold the UINT_MAX "no type" sentinel before a type can be set; for real struct page that invariant is established by the page allocator on free. struct net_iov is not allocated through the page allocator, so the field is left as zero (io_uring zcrx, which uses __GFP_ZERO) or as slab garbage (devmem, which uses kvmalloc_objs() without zeroing). When the page pool then calls page_pool_set_pp_info() on a freshly-bound niov, __SetPageNetpp()'s VM_BUG_ON_PAGE(page->page_type != UINT_MAX) fires and the kernel BUGs. Triggered in selftests by io_uring zcrx setup through the fbnic queue restart path: kernel BUG at ./include/linux/page-flags.h:1062! RIP: 0010:page_pool_set_pp_info (./include/linux/page-flags.h:1062 net/core/page_pool.c:716) Call Trace: net_mp_niov_set_page_pool (net/core/page_pool.c:1360) io_pp_zc_alloc_netmems (io_uring/zcrx.c:1089 io_uring/zcrx.c:1110) fbnic_fill_bdq (./include/net/page_pool/helpers.h:160 drivers/net/ethernet/meta/fbnic/fbnic_txrx.c:906) __fbnic_nv_restart (drivers/net/ethernet/meta/fbnic/fbnic_txrx.c:2470 drivers/net/ethernet/meta/fbnic/fbnic_txrx.c:2874) fbnic_queue_start (drivers/net/ethernet/meta/fbnic/fbnic_txrx.c:2903) netdev_rx_queue_reconfig (net/core/netdev_rx_queue.c:137) __netif_mp_open_rxq (net/core/netdev_rx_queue.c:234) io_register_zcrx (io_uring/zcrx.c:818 io_uring/zcrx.c:903) __io_uring_register (io_uring/register.c:931) __do_sys_io_uring_register (io_uring/register.c:1029) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) The same path is reachable through devmem dmabuf binding via netdev_nl_bind_rx_doit() -> net_devmem_bind_dmabuf_to_queue(). Add a net_iov_init() helper that stamps ->owner, ->type and the ->page_type sentinel, and use it from both the devmem and io_uring zcrx niov init loops. Fixes: db359fccf212 ("mm: introduce a new page type for page pool in page type") Acked-by: Vlastimil Babka (SUSE) Acked-by: Byungchul Park Reviewed-by: Jens Axboe Acked-by: Pavel Begunkov Link: https://patch.msgid.link/20260428025320.853452-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 15 +++++++++++++++ io_uring/zcrx.c | 3 +-- net/core/devmem.c | 3 +-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 507b74c9f52d..78fe51e5756b 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -127,6 +127,21 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov) return niov - net_iov_owner(niov)->niovs; } +/* Initialize a niov: stamp the owning area, the memory provider type, + * and the page_type "no type" sentinel expected by the page-type API + * (see PAGE_TYPE_OPS in ) so that + * page_pool_set_pp_info() can later call __SetPageNetpp() on a niov + * cast to struct page. + */ +static inline void net_iov_init(struct net_iov *niov, + struct net_iov_area *owner, + enum net_iov_type type) +{ + niov->owner = owner; + niov->type = type; + niov->page_type = UINT_MAX; +} + /* netmem */ /** diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 7b93c87b8371..19837e0b5e91 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -495,10 +495,9 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, for (i = 0; i < nr_iovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; - niov->owner = &area->nia; + net_iov_init(niov, &area->nia, NET_IOV_IOURING); area->freelist[i] = i; atomic_set(&area->user_refs[i], 0); - niov->type = NET_IOV_IOURING; } if (ifq->dev) { diff --git a/net/core/devmem.c b/net/core/devmem.c index cde4c89bc146..468344739db2 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -297,8 +297,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, for (i = 0; i < owner->area.num_niovs; i++) { niov = &owner->area.niovs[i]; - niov->type = NET_IOV_DMABUF; - niov->owner = &owner->area; + net_iov_init(niov, &owner->area, NET_IOV_DMABUF); page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); if (direction == DMA_TO_DEVICE) -- cgit v1.2.3 From c3388f8c1cbb5aae3731749b586499ed126b4156 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Tue, 28 Apr 2026 16:24:38 +0200 Subject: MAINTAINERS: Add myself as NFC subsystem maintainer Add myself and update the mailing list. Signed-off-by: David Heidelberg Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 21288a3a7d93..176390ef4275 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18829,8 +18829,10 @@ F: include/uapi/linux/nexthop.h F: net/ipv4/nexthop.c NFC SUBSYSTEM -L: netdev@vger.kernel.org -S: Orphan +M: David Heidelberg +L: oe-linux-nfc@lists.linux.dev +S: Maintained +T: git https://codeberg.org/linux-nfc/linux.git F: Documentation/devicetree/bindings/net/nfc/ F: drivers/nfc/ F: include/net/nfc/ -- cgit v1.2.3 From 72e9647e2b20c31b4f2febf981566e3c5cdef90e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Apr 2026 13:33:57 -0700 Subject: selftests: drv-net: clarify linters and frameworks in README Minor clarifications in the README: - call out what linters we expect to be clean - make it clear that by "frameworks" we mean code under lib/ not just factoring code out in the same file Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/README.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index c8588436c224..c6bed9a985bc 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -211,8 +211,8 @@ Avoid libraries and frameworks Test files should be relatively self contained. The libraries should only include very core or non-trivial code. -It may be tempting to "factor out" the common code, but fight that urge. -Library code increases the barrier of entry, and complexity in general. +It may be tempting to "factor out" the common code to lib/py/, but fight that +urge. Library code increases the barrier of entry, and complexity in general. Avoid mixing test code and boilerplate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -290,6 +290,12 @@ or:: def test(cfg, mode, protocol): pass +Linters +~~~~~~~ + +We expect clean ``ruff check`` and ``pylint --disable=R``. +The code should be clean, avoid disabling pylint warnings explicitly! + Running tests CI-style ====================== -- cgit v1.2.3 From e73cafaf4acea5445df2e5ee021a335d717c1697 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Apr 2026 13:39:24 -0700 Subject: MAINTAINERS: update the IPv4/IPv6 entry and add Ido Schimmel The IPv4/IPv6 and routing code is not very well separated from the TCP/UDP code. Scope it down properly by providing a more accurate file list, instead of net/ipv4/ and net/ipv6/ Now that the entry is more accurately representing layer 3 and routing merge in the nexthop entry into it. Add Ido Schimmel as a co-maintainer, Ido's git history speaks for itself. Reviewed-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260428203924.1229169-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 63 +++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 176390ef4275..27a073f53cea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18672,19 +18672,59 @@ F: net/xfrm/ F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] -M: "David S. Miller" M: David Ahern +M: Ido Schimmel L: netdev@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git -F: arch/x86/net/* -F: include/linux/ip.h -F: include/linux/ipv6* +F: Documentation/netlink/specs/rt-addr.yaml +F: Documentation/netlink/specs/rt-neigh.yaml +F: Documentation/netlink/specs/rt-route.yaml +F: Documentation/netlink/specs/rt-rule.yaml +F: include/linux/inetdevice.h +F: include/linux/mroute* +F: include/net/addrconf.h +F: include/net/arp.h F: include/net/fib* +F: include/net/if_inet6.h +F: include/net/inetpeer.h F: include/net/ip* +F: include/net/lwtunnel.h +F: include/net/ndisc.h +F: include/net/netns/nexthop.h +F: include/net/nexthop.h F: include/net/route.h -F: net/ipv4/ -F: net/ipv6/ +F: include/uapi/linux/fib_rules.h +F: include/uapi/linux/in_route.h +F: include/uapi/linux/mroute* +F: include/uapi/linux/nexthop.h +F: net/core/fib* +F: net/core/lwtunnel.c +F: net/ipv4/arp.c +F: net/ipv4/devinet.c +F: net/ipv4/fib* +F: net/ipv4/icmp.c +F: net/ipv4/igmp.c +F: net/ipv4/inet_fragment.c +F: net/ipv4/inetpeer.c +F: net/ipv4/ip* +F: net/ipv4/metrics.c +F: net/ipv4/netlink.c +F: net/ipv4/nexthop.c +F: net/ipv4/route.c +F: net/ipv6/addr* +F: net/ipv6/anycast.c +F: net/ipv6/exthdrs.c +F: net/ipv6/exthdrs_core.c +F: net/ipv6/fib* +F: net/ipv6/icmp.c +F: net/ipv6/ip* +F: net/ipv6/mcast* +F: net/ipv6/ndisc.c +F: net/ipv6/output_core.c +F: net/ipv6/reassembly.c +F: net/ipv6/route.c +F: tools/testing/selftests/net/fib* +F: tools/testing/selftests/net/forwarding/ NETWORKING [LABELED] (NetLabel, Labeled IPsec, SECMARK) M: Paul Moore @@ -18819,15 +18859,6 @@ F: Documentation/networking/net_failover.rst F: drivers/net/net_failover.c F: include/net/net_failover.h -NEXTHOP -M: David Ahern -L: netdev@vger.kernel.org -S: Maintained -F: include/net/netns/nexthop.h -F: include/net/nexthop.h -F: include/uapi/linux/nexthop.h -F: net/ipv4/nexthop.c - NFC SUBSYSTEM M: David Heidelberg L: oe-linux-nfc@lists.linux.dev -- cgit v1.2.3 From b31681206e3f527970a7c7ed807fbf6a028fc25b Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 28 Apr 2026 08:53:39 -0400 Subject: hv_sock: fix ARM64 support VMBUS ring buffers must be page aligned. Therefore, the current value of 24K presents a challenge on ARM64 kernels (with 64K pages). So, use VMBUS_RING_SIZE() to ensure they are always aligned and large enough to hold all of the relevant data. Cc: stable@vger.kernel.org Fixes: 77ffe33363c0 ("hv_sock: use HV_HYP_PAGE_SIZE for Hyper-V communication") Tested-by: Dexuan Cui Reviewed-by: Dexuan Cui Signed-off-by: Hamza Mahfooz Acked-by: Stefano Garzarella Link: https://patch.msgid.link/20260428125339.13963-1-hamzamahfooz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/hyperv_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index f862988c1e86..7a8963595bf9 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -375,10 +375,10 @@ static void hvs_open_connection(struct vmbus_channel *chan) } else { sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE); sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE); - sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE); + sndbuf = VMBUS_RING_SIZE(sndbuf); rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE); rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE); - rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE); + rcvbuf = VMBUS_RING_SIZE(rcvbuf); } chan->max_pkt_size = HVS_MAX_PKT_SIZE; -- cgit v1.2.3 From 4ca01292ea2f2363660610a65ba0285d7c3309ed Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 28 Apr 2026 08:53:16 +0200 Subject: net: airoha: Do not return err in ndo_stop() callback Always complete the airoha_dev_stop() routine regardless of the airoha_set_vip_for_gdm_port() return value, since errors from ndo_stop() are ignored by the networking stack and the interface is always considered down after the call. Fixes: 23020f049327 ("net: airoha: Introduce ethernet support for EN7581 SoC") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260428-airoha-ndo-stop-not-err-v1-1-674506d29a91@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 5effb4a4ae84..f8b3d53bccad 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1747,13 +1747,10 @@ static int airoha_dev_stop(struct net_device *dev) { struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_qdma *qdma = port->qdma; - int i, err; + int i; netif_tx_disable(dev); - err = airoha_set_vip_for_gdm_port(port, false); - if (err) - return err; - + airoha_set_vip_for_gdm_port(port, false); for (i = 0; i < dev->num_tx_queues; i++) netdev_tx_reset_subqueue(dev, i); -- cgit v1.2.3 From c4f050ce06c56cfb5993268af4a5cb66ed1cd04e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2026 12:32:07 +0000 Subject: bonding: 3ad: implement proper RCU rules for port->aggregator syzbot found a data-race in bond_3ad_get_active_agg_info / bond_3ad_state_machine_handler [1] which hints at lack of proper RCU implementation. Add __rcu qualifier to port->aggregator, and add proper RCU API. [1] BUG: KCSAN: data-race in bond_3ad_get_active_agg_info / bond_3ad_state_machine_handler write to 0xffff88813cf5c4b0 of 8 bytes by task 36 on cpu 0: ad_port_selection_logic drivers/net/bonding/bond_3ad.c:1659 [inline] bond_3ad_state_machine_handler+0x9d5/0x2d60 drivers/net/bonding/bond_3ad.c:2569 process_one_work kernel/workqueue.c:3302 [inline] process_scheduled_works+0x4f0/0x9c0 kernel/workqueue.c:3385 worker_thread+0x58a/0x780 kernel/workqueue.c:3466 kthread+0x22a/0x280 kernel/kthread.c:436 ret_from_fork+0x146/0x330 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 read to 0xffff88813cf5c4b0 of 8 bytes by task 22063 on cpu 1: __bond_3ad_get_active_agg_info drivers/net/bonding/bond_3ad.c:2858 [inline] bond_3ad_get_active_agg_info+0x8c/0x230 drivers/net/bonding/bond_3ad.c:2881 bond_fill_info+0xe0f/0x10f0 drivers/net/bonding/bond_netlink.c:853 rtnl_link_info_fill net/core/rtnetlink.c:906 [inline] rtnl_link_fill+0x1d7/0x4e0 net/core/rtnetlink.c:927 rtnl_fill_ifinfo+0xf8e/0x1380 net/core/rtnetlink.c:2168 rtmsg_ifinfo_build_skb+0x11c/0x1b0 net/core/rtnetlink.c:4453 rtmsg_ifinfo_event net/core/rtnetlink.c:4486 [inline] rtmsg_ifinfo+0x6d/0x110 net/core/rtnetlink.c:4495 __dev_notify_flags+0x76/0x390 net/core/dev.c:9790 netif_change_flags+0xac/0xd0 net/core/dev.c:9823 do_setlink+0x905/0x2950 net/core/rtnetlink.c:3180 rtnl_group_changelink net/core/rtnetlink.c:3813 [inline] __rtnl_newlink net/core/rtnetlink.c:3981 [inline] rtnl_newlink+0xf55/0x1400 net/core/rtnetlink.c:4109 rtnetlink_rcv_msg+0x64b/0x720 net/core/rtnetlink.c:6995 netlink_rcv_skb+0x123/0x220 net/netlink/af_netlink.c:2550 rtnetlink_rcv+0x1c/0x30 net/core/rtnetlink.c:7022 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] netlink_unicast+0x5a8/0x680 net/netlink/af_netlink.c:1344 netlink_sendmsg+0x5c8/0x6f0 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:787 [inline] __sock_sendmsg net/socket.c:802 [inline] ____sys_sendmsg+0x563/0x5b0 net/socket.c:2698 ___sys_sendmsg+0x195/0x1e0 net/socket.c:2752 __sys_sendmsg net/socket.c:2784 [inline] __do_sys_sendmsg net/socket.c:2789 [inline] __se_sys_sendmsg net/socket.c:2787 [inline] __x64_sys_sendmsg+0xd4/0x160 net/socket.c:2787 x64_sys_call+0x194c/0x3020 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x12c/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f value changed: 0x0000000000000000 -> 0xffff88813cf5c400 Reported by Kernel Concurrency Sanitizer on: CPU: 1 UID: 0 PID: 22063 Comm: syz.0.31122 Tainted: G W syzkaller #0 PREEMPT(full) Tainted: [W]=WARN Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026 Fixes: 47e91f56008b ("bonding: use RCU protection for 3ad xmit path") Reported-by: syzbot+9bb2ff2a4ab9e17307e1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69f0a82f.050a0220.3aadc4.0000.GAE@google.com/ Signed-off-by: Eric Dumazet Cc: Jay Vosburgh Cc: Andrew Lunn Link: https://patch.msgid.link/20260428123207.3809211-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_3ad.c | 109 ++++++++++++++++++--------------- drivers/net/bonding/bond_main.c | 8 ++- drivers/net/bonding/bond_netlink.c | 16 +++-- drivers/net/bonding/bond_procfs.c | 3 +- drivers/net/bonding/bond_sysfs_slave.c | 17 +++-- include/net/bond_3ad.h | 2 +- 6 files changed, 89 insertions(+), 66 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index af7f74cfdc08..f0aa7d2f2171 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1029,6 +1029,7 @@ static void ad_cond_set_peer_notif(struct port *port) static void ad_mux_machine(struct port *port, bool *update_slave_arr) { struct bonding *bond = __get_bond_by_port(port); + struct aggregator *aggregator; mux_states_t last_state; /* keep current State Machine state to compare later if it was @@ -1036,6 +1037,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) */ last_state = port->sm_mux_state; + aggregator = rcu_dereference(port->aggregator); if (port->sm_vars & AD_PORT_BEGIN) { port->sm_mux_state = AD_MUX_DETACHED; } else { @@ -1055,7 +1057,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * cycle to update ready variable, we check * READY_N and update READY here */ - __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); port->sm_mux_state = AD_MUX_DETACHED; break; } @@ -1070,7 +1072,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * update ready variable, we check READY_N and update * READY here */ - __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); /* if the wait_while_timer expired, and the port is * in READY state, move to ATTACHED state @@ -1086,7 +1088,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { - if (port->aggregator->is_active) { + if (aggregator->is_active) { int state = AD_MUX_COLLECTING_DISTRIBUTING; if (!bond->params.coupled_control) @@ -1102,9 +1104,9 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * cycle to update ready variable, we check * READY_N and update READY here */ - __set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); port->sm_mux_state = AD_MUX_DETACHED; - } else if (port->aggregator->is_active) { + } else if (aggregator->is_active) { port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; } @@ -1115,7 +1117,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * sure that a collecting distributing * port in an active aggregator is enabled */ - if (port->aggregator->is_active && + if (aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; @@ -1134,7 +1136,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) */ struct slave *slave = port->slave; - if (port->aggregator->is_active && + if (aggregator->is_active && bond_is_slave_rx_disabled(slave)) { ad_enable_collecting(port); *update_slave_arr = true; @@ -1154,8 +1156,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * sure that a collecting distributing * port in an active aggregator is enabled */ - if (port->aggregator && - port->aggregator->is_active && + if (aggregator && + aggregator->is_active && !__port_is_collecting_distributing(port)) { __enable_port(port); *update_slave_arr = true; @@ -1187,7 +1189,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0); break; case AD_MUX_ATTACHED: - if (port->aggregator->is_active) + if (aggregator->is_active) port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; else @@ -1561,9 +1563,9 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) bond = __get_bond_by_port(port); /* if the port is connected to other aggregator, detach it */ - if (port->aggregator) { + temp_aggregator = rcu_dereference(port->aggregator); + if (temp_aggregator) { /* detach the port from its former aggregator */ - temp_aggregator = port->aggregator; for (curr_port = temp_aggregator->lag_ports; curr_port; last_port = curr_port, curr_port = curr_port->next_port_in_aggregator) { @@ -1586,7 +1588,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) /* clear the port's relations to this * aggregator */ - port->aggregator = NULL; + RCU_INIT_POINTER(port->aggregator, NULL); port->next_port_in_aggregator = NULL; port->actor_port_aggregator_identifier = 0; @@ -1609,7 +1611,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) port->slave->bond->dev->name, port->slave->dev->name, port->actor_port_number, - port->aggregator->aggregator_identifier); + temp_aggregator->aggregator_identifier); } } /* search on all aggregators for a suitable aggregator for this port */ @@ -1633,15 +1635,15 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) ) ) { /* attach to the founded aggregator */ - port->aggregator = aggregator; + rcu_assign_pointer(port->aggregator, aggregator); port->actor_port_aggregator_identifier = - port->aggregator->aggregator_identifier; + aggregator->aggregator_identifier; port->next_port_in_aggregator = aggregator->lag_ports; - port->aggregator->num_of_ports++; + aggregator->num_of_ports++; aggregator->lag_ports = port; slave_dbg(bond->dev, slave->dev, "Port %d joined LAG %d (existing LAG)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; @@ -1656,39 +1658,40 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) if (!found) { if (free_aggregator) { /* assign port a new aggregator */ - port->aggregator = free_aggregator; port->actor_port_aggregator_identifier = - port->aggregator->aggregator_identifier; + free_aggregator->aggregator_identifier; /* update the new aggregator's parameters * if port was responsed from the end-user */ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS) /* if port is full duplex */ - port->aggregator->is_individual = false; + free_aggregator->is_individual = false; else - port->aggregator->is_individual = true; + free_aggregator->is_individual = true; - port->aggregator->actor_admin_aggregator_key = + free_aggregator->actor_admin_aggregator_key = port->actor_admin_port_key; - port->aggregator->actor_oper_aggregator_key = + free_aggregator->actor_oper_aggregator_key = port->actor_oper_port_key; - port->aggregator->partner_system = + free_aggregator->partner_system = port->partner_oper.system; - port->aggregator->partner_system_priority = + free_aggregator->partner_system_priority = port->partner_oper.system_priority; - port->aggregator->partner_oper_aggregator_key = port->partner_oper.key; - port->aggregator->receive_state = 1; - port->aggregator->transmit_state = 1; - port->aggregator->lag_ports = port; - port->aggregator->num_of_ports++; + free_aggregator->partner_oper_aggregator_key = port->partner_oper.key; + free_aggregator->receive_state = 1; + free_aggregator->transmit_state = 1; + free_aggregator->lag_ports = port; + free_aggregator->num_of_ports++; + + rcu_assign_pointer(port->aggregator, free_aggregator); /* mark this port as selected */ port->sm_vars |= AD_PORT_SELECTED; slave_dbg(bond->dev, port->slave->dev, "Port %d joined LAG %d (new LAG)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + free_aggregator->aggregator_identifier); } else { slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", @@ -1700,13 +1703,12 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) * in all aggregator's ports, else set ready=FALSE in all * aggregator's ports */ - __set_agg_ports_ready(port->aggregator, - __agg_ports_are_ready(port->aggregator)); + aggregator = rcu_dereference(port->aggregator); + __set_agg_ports_ready(aggregator, __agg_ports_are_ready(aggregator)); - aggregator = __get_first_agg(port); - ad_agg_selection_logic(aggregator, update_slave_arr); + ad_agg_selection_logic(__get_first_agg(port), update_slave_arr); - if (!port->aggregator->is_active) + if (!aggregator->is_active) port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } @@ -2075,13 +2077,15 @@ static void ad_initialize_port(struct port *port, const struct bond_params *bond */ static void ad_enable_collecting(struct port *port) { - if (port->aggregator->is_active) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator->is_active) { struct slave *slave = port->slave; slave_dbg(slave->bond->dev, slave->dev, "Enabling collecting on port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __enable_collecting_port(port); } } @@ -2093,11 +2097,13 @@ static void ad_enable_collecting(struct port *port) */ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) { - if (port->aggregator && __agg_has_partner(port->aggregator)) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator && __agg_has_partner(aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling distributing on port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __disable_distributing_port(port); /* Slave array needs an update */ *update_slave_arr = true; @@ -2114,11 +2120,13 @@ static void ad_disable_distributing(struct port *port, bool *update_slave_arr) static void ad_enable_collecting_distributing(struct port *port, bool *update_slave_arr) { - if (port->aggregator->is_active) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator->is_active) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Enabling port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __enable_port(port); /* Slave array needs update */ *update_slave_arr = true; @@ -2135,11 +2143,13 @@ static void ad_enable_collecting_distributing(struct port *port, static void ad_disable_collecting_distributing(struct port *port, bool *update_slave_arr) { - if (port->aggregator && __agg_has_partner(port->aggregator)) { + struct aggregator *aggregator = rcu_dereference(port->aggregator); + + if (aggregator && __agg_has_partner(aggregator)) { slave_dbg(port->slave->bond->dev, port->slave->dev, "Disabling port %d (LAG %d)\n", port->actor_port_number, - port->aggregator->aggregator_identifier); + aggregator->aggregator_identifier); __disable_port(port); /* Slave array needs an update */ *update_slave_arr = true; @@ -2379,7 +2389,7 @@ void bond_3ad_unbind_slave(struct slave *slave) */ for (temp_port = aggregator->lag_ports; temp_port; temp_port = temp_port->next_port_in_aggregator) { - temp_port->aggregator = new_aggregator; + rcu_assign_pointer(temp_port->aggregator, new_aggregator); temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier; } @@ -2848,15 +2858,16 @@ out: int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { - struct aggregator *aggregator = NULL; + struct aggregator *aggregator = NULL, *tmp; struct list_head *iter; struct slave *slave; struct port *port; bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); - if (port->aggregator && port->aggregator->is_active) { - aggregator = port->aggregator; + tmp = rcu_dereference(port->aggregator); + if (tmp && tmp->is_active) { + aggregator = tmp; break; } } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c7baa5c4bf40..af82a3df2c5d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1433,7 +1433,7 @@ static void bond_poll_controller(struct net_device *bond_dev) if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct aggregator *agg = - SLAVE_AD_INFO(slave)->port.aggregator; + rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); if (agg && agg->aggregator_identifier != ad_info.aggregator_id) @@ -5179,15 +5179,16 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) spin_unlock_bh(&bond->mode_lock); agg_id = ad_info.aggregator_id; } + rcu_read_lock(); bond_for_each_slave(bond, slave, iter) { if (skipslave == slave) continue; all_slaves->arr[all_slaves->count++] = slave; if (BOND_MODE(bond) == BOND_MODE_8023AD) { - struct aggregator *agg; + const struct aggregator *agg; - agg = SLAVE_AD_INFO(slave)->port.aggregator; + agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); if (!agg || agg->aggregator_identifier != agg_id) continue; } @@ -5199,6 +5200,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves->arr[usable_slaves->count++] = slave; } + rcu_read_unlock(); bond_set_slave_arr(bond, usable_slaves, all_slaves); return ret; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index ea1a80e658ae..c7d3e0602c83 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -66,27 +66,29 @@ static int bond_fill_slave_info(struct sk_buff *skb, const struct port *ad_port; ad_port = &SLAVE_AD_INFO(slave)->port; - agg = SLAVE_AD_INFO(slave)->port.aggregator; + rcu_read_lock(); + agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); if (agg) { if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, agg->aggregator_identifier)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ad_port->actor_oper_port_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ad_port->partner_oper.port_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, ad_port->sm_churn_actor_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, ad_port->sm_churn_partner_state)) - goto nla_put_failure; + goto nla_put_failure_rcu; } + rcu_read_unlock(); if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, SLAVE_AD_INFO(slave)->port_priority)) @@ -95,6 +97,8 @@ static int bond_fill_slave_info(struct sk_buff *skb, return 0; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: return -EMSGSIZE; } diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index e34f80305191..3714aab1a3d9 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -188,6 +188,7 @@ static void bond_info_show_master(struct seq_file *seq) } } +/* Note: runs under rcu_read_lock() */ static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { @@ -214,7 +215,7 @@ static void bond_info_show_slave(struct seq_file *seq, if (BOND_MODE(bond) == BOND_MODE_8023AD) { const struct port *port = &SLAVE_AD_INFO(slave)->port; - const struct aggregator *agg = port->aggregator; + const struct aggregator *agg = rcu_dereference(port->aggregator); if (agg) { seq_printf(seq, "Aggregator ID: %d\n", diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index 36d0e8440b5b..fc6fe7181789 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -62,10 +62,15 @@ static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) const struct aggregator *agg; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { - agg = SLAVE_AD_INFO(slave)->port.aggregator; - if (agg) - return sysfs_emit(buf, "%d\n", - agg->aggregator_identifier); + rcu_read_lock(); + agg = rcu_dereference(SLAVE_AD_INFO(slave)->port.aggregator); + if (agg) { + ssize_t res = sysfs_emit(buf, "%d\n", + agg->aggregator_identifier); + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); } return sysfs_emit(buf, "N/A\n"); @@ -78,7 +83,7 @@ static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; - if (ad_port->aggregator) + if (rcu_access_pointer(ad_port->aggregator)) return sysfs_emit(buf, "%u\n", ad_port->actor_oper_port_state); } @@ -93,7 +98,7 @@ static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; - if (ad_port->aggregator) + if (rcu_access_pointer(ad_port->aggregator)) return sysfs_emit(buf, "%u\n", ad_port->partner_oper.port_state); } diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c92d4a976246..05572c19e14b 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -243,7 +243,7 @@ typedef struct port { churn_state_t sm_churn_actor_state; churn_state_t sm_churn_partner_state; struct slave *slave; /* pointer to the bond slave that this port belongs to */ - struct aggregator *aggregator; /* pointer to an aggregator that this port related to */ + struct aggregator __rcu *aggregator; /* pointer to an aggregator that this port related to */ struct port *next_port_in_aggregator; /* Next port on the linked list of the parent aggregator */ u32 transaction_id; /* continuous number for identification of Marker PDU's; */ struct lacpdu lacpdu; /* the lacpdu that will be sent for this port */ -- cgit v1.2.3 From 5ef343614db766acdc01c56d66e780a1b43c6ac6 Mon Sep 17 00:00:00 2001 From: Hasan Basbunar Date: Tue, 28 Apr 2026 19:07:39 +0200 Subject: page_pool: fix memory-provider leak in page_pool_create_percpu() error path When page_pool_create_percpu() fails on page_pool_list(), it falls through to its err_uninit: label, which calls page_pool_uninit(). At that point page_pool_init() has already taken two references when the user requested PP_FLAG_ALLOW_UNREADABLE_NETMEM: pool->mp_ops->init(pool) static_branch_inc(&page_pool_mem_providers); Neither is undone by page_pool_uninit(); both are only undone by __page_pool_destroy() (success-side teardown). The error path therefore leaks the per-provider reference taken by mp_ops->init (io_zcrx_ifq->refs in the io_uring zcrx provider, the dmabuf binding refcount in the devmem provider) plus one increment of the page_pool_mem_providers static branch on every failure of xa_alloc_cyclic() inside page_pool_list(). The leaked io_zcrx_ifq->refs in turn pins everything io_zcrx_ifq_free() would release on cleanup: ifq->user (uid), ifq->mm_account (mmdrop), ifq->dev (device refcount), ifq->netdev_tracker (netdev refcount), and the rbuf region. The leaked static branch increment forces all subsequent page_pool_alloc_netmems() and page_pool_return_page() callers to take the slow mp_ops branch for the lifetime of the kernel. Reachable via the io_uring zcrx path: io_uring_register(IORING_REGISTER_ZCRX_IFQ) /* CAP_NET_ADMIN */ -> __io_uring_register -> io_register_zcrx -> zcrx_register_netdev -> netif_mp_open_rxq -> driver ndo_queue_mem_alloc -> page_pool_create_percpu -> page_pool_init succeeds (mp_ops->init runs, branch++) -> page_pool_list fails (xa_alloc_cyclic -ENOMEM) -> goto err_uninit <-- leak The same shape applies to the devmem dmabuf provider via mp_dmabuf_devmem_init()/mp_dmabuf_devmem_destroy(). Restore the cleanup symmetry by moving the mp_ops->destroy() and static_branch_dec() calls out of __page_pool_destroy() and into page_pool_uninit(), so page_pool_uninit() is again the strict inverse of page_pool_init(). page_pool_uninit() has only two callers (the err_uninit: path and __page_pool_destroy()), so this preserves the single-call invariant on the success path while fixing the err path. The error path of page_pool_init() itself still skips the mp_ops cleanup correctly: mp_ops->init is the last action that takes a reference before page_pool_init() returns 0, so when it returns an error neither the refcount nor the static branch has been touched. Triggering the bug requires xa_alloc_cyclic() to fail with -ENOMEM, which under normal GFP_KERNEL retry behaviour is rare. It is deterministic under CONFIG_FAULT_INJECTION with fail_page_alloc / xa fault injection, or under sustained memory pressure. The leak is silent: there is no warning, and the released kernel build continues running with a permanently-incremented static branch. Fixes: 0f9214046893 ("memory-provider: dmabuf devmem memory provider") Signed-off-by: Hasan Basbunar Link: https://patch.msgid.link/20260428170739.34881-1-basbunarhasan@gmail.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 877bbf7a1938..6e576dec80db 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -327,6 +327,11 @@ static void page_pool_uninit(struct page_pool *pool) if (!pool->system) free_percpu(pool->recycle_stats); #endif + + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); + static_branch_dec(&page_pool_mem_providers); + } } /** @@ -1146,11 +1151,6 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_ops) { - pool->mp_ops->destroy(pool); - static_branch_dec(&page_pool_mem_providers); - } - kfree(pool); } -- cgit v1.2.3 From f2abc305aa93f5b12d5c929d7a9c1cf7d7fee8af Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 29 Apr 2026 20:38:17 -0600 Subject: riscv: Define __riscv_copy_{,vec_}{words,bytes}_unaligned() using SYM_TYPED_FUNC_START After commit 67bdd7b01387 ("riscv: Split out measure_cycles() for reuse") and commit c03ad15f7cf6 ("riscv: Reuse measure_cycles() in check_vector_unaligned_access()"), there are CFI failure when booting kernels with CONFIG_CFI=y: CFI failure at measure_cycles+0x38/0xe0 (target: __riscv_copy_words_unaligned+0x0/0x50; expected type: ...) CFI failure at measure_cycles+0x38/0xe0 (target: __riscv_copy_vec_words_unaligned+0x0/0x24; expected type: ...) The __riscv_copy_*_unaligned() functions are now called indirectly but they are not defined with SYM_TYPED_FUNC_START, which is required for assembly functions called indirectly from C to pass CFI checking. Switch to SYM_TYPED_FUNC_START to clear up the CFI failures. Fixes: 67bdd7b01387 ("riscv: Split out measure_cycles() for reuse") Fixes: c03ad15f7cf6 ("riscv: Reuse measure_cycles() in check_vector_unaligned_access()") Signed-off-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Reviewed-by: Nam Cao Link: https://patch.msgid.link/20260406-measure_cycles-cfi-failure-v1-1-03e0234ae02f@kernel.org Signed-off-by: Paul Walmsley --- arch/riscv/kernel/copy-unaligned.S | 5 +++-- arch/riscv/kernel/vec-copy-unaligned.S | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S index 2b3d9398c113..90f3549621f7 100644 --- a/arch/riscv/kernel/copy-unaligned.S +++ b/arch/riscv/kernel/copy-unaligned.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2023 Rivos Inc. */ +#include #include #include @@ -9,7 +10,7 @@ /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ /* Note: The size is truncated to a multiple of 8 * SZREG */ -SYM_FUNC_START(__riscv_copy_words_unaligned) +SYM_TYPED_FUNC_START(__riscv_copy_words_unaligned) andi a4, a2, ~((8*SZREG)-1) beqz a4, 2f add a3, a1, a4 @@ -41,7 +42,7 @@ SYM_FUNC_END(__riscv_copy_words_unaligned) /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using only byte accesses. */ /* Note: The size is truncated to a multiple of 8 */ -SYM_FUNC_START(__riscv_copy_bytes_unaligned) +SYM_TYPED_FUNC_START(__riscv_copy_bytes_unaligned) andi a4, a2, ~(8-1) beqz a4, 2f add a3, a1, a4 diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S index 7ce4de6f6e69..361039f7b944 100644 --- a/arch/riscv/kernel/vec-copy-unaligned.S +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -2,6 +2,7 @@ /* Copyright (C) 2024 Rivos Inc. */ #include +#include #include #include @@ -16,7 +17,7 @@ /* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ /* Note: The size is truncated to a multiple of WORD_EEW */ -SYM_FUNC_START(__riscv_copy_vec_words_unaligned) +SYM_TYPED_FUNC_START(__riscv_copy_vec_words_unaligned) andi a4, a2, ~(WORD_EEW-1) beqz a4, 2f add a3, a1, a4 @@ -38,7 +39,7 @@ SYM_FUNC_END(__riscv_copy_vec_words_unaligned) /* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using only byte accesses. */ /* Note: The size is truncated to a multiple of 8 */ -SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) +SYM_TYPED_FUNC_START(__riscv_copy_vec_bytes_unaligned) andi a4, a2, ~(8-1) beqz a4, 2f add a3, a1, a4 -- cgit v1.2.3 From 7dfc0063022078a80fe5774815723c185e4b7b57 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 29 Apr 2026 15:57:34 +0200 Subject: regulator: rpi-panel-attiny: add back GPIOLIB dependency This driver provides a gpio chip, which is only possible when GPIOLIB is enabled, which was previously guaranteed by the CONFIG_OF_GPIO dependency that is now gone: ERROR: modpost: "gpiochip_get_data" [drivers/regulator/rpi-panel-attiny-regulator.ko] undefined! ERROR: modpost: "devm_gpiochip_add_data_with_key" [drivers/regulator/rpi-panel-attiny-regulator.ko] undefined! Add an explicit GPIOLIB dependency instead. Fixes: bf017304fce1 ("regulator: drop unneeded dependencies on OF_GPIO") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260429135812.112514-1-arnd@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index e8002526cfb0..d71dac9436e3 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -1231,6 +1231,7 @@ config REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY tristate "Raspberry Pi 7-inch touchscreen panel ATTINY regulator" depends on ARM || ARM64 || COMPILE_TEST depends on BACKLIGHT_CLASS_DEVICE + depends on GPIOLIB depends on I2C select REGMAP_I2C help -- cgit v1.2.3 From 6813985ca456d1f5677ad9554f55805cbf27e16f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Apr 2026 17:35:18 +0200 Subject: netfilter: x_tables: add .check_hooks to matches and targets Add a new .check_hooks interface for checking if the match/target is used from the validate hook according to its configuration. Move existing conditional hook check based on the match/target configuration from .checkentry to .check_hooks for the following matches/targets: - addrtype - devgroup - physdev - policy - set - TCPMSS - SET This is a preparation patch to fix nft_compat, not functional changes are intended. Based on patch from Florian Westphal. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 8 ++++ net/netfilter/x_tables.c | 79 ++++++++++++++++++++++++++++++++++---- net/netfilter/xt_TCPMSS.c | 33 ++++++++-------- net/netfilter/xt_addrtype.c | 25 +++++++++--- net/netfilter/xt_devgroup.c | 18 ++++++--- net/netfilter/xt_physdev.c | 20 +++++++--- net/netfilter/xt_policy.c | 24 +++++++++--- net/netfilter/xt_set.c | 39 ++++++++++++------- 8 files changed, 187 insertions(+), 59 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 77c778d84d4c..a81b46af5118 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -146,6 +146,9 @@ struct xt_match { /* Called when user tries to insert an entry of this type. */ int (*checkentry)(const struct xt_mtchk_param *); + /* Called to validate hooks based on the match configuration. */ + int (*check_hooks)(const struct xt_mtchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -187,6 +190,9 @@ struct xt_target { /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); + /* Called to validate hooks based on the target configuration. */ + int (*check_hooks)(const struct xt_tgchk_param *); + /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT @@ -279,8 +285,10 @@ bool xt_find_jump_offset(const unsigned int *offsets, int xt_check_proc_name(const char *name, unsigned int size); +int xt_check_hooks_match(struct xt_mtchk_param *par); int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto, bool inv_proto); +int xt_check_hooks_target(struct xt_tgchk_param *par); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto, bool inv_proto); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9f837fb5ceb4..2c67c2e6b132 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -477,11 +477,9 @@ int xt_check_proc_name(const char *name, unsigned int size) } EXPORT_SYMBOL(xt_check_proc_name); -int xt_check_match(struct xt_mtchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_match_common(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->match->matchsize) != size && par->match->matchsize != -1) { /* @@ -530,6 +528,14 @@ int xt_check_match(struct xt_mtchk_param *par, par->match->proto); return -EINVAL; } + + return 0; +} + +static int xt_checkentry_match(struct xt_mtchk_param *par) +{ + int ret; + if (par->match->checkentry != NULL) { ret = par->match->checkentry(par); if (ret < 0) @@ -538,8 +544,34 @@ int xt_check_match(struct xt_mtchk_param *par, /* Flag up potential errors. */ return -EIO; } + + return 0; +} + +int xt_check_hooks_match(struct xt_mtchk_param *par) +{ + if (par->match->check_hooks != NULL) + return par->match->check_hooks(par); + return 0; } +EXPORT_SYMBOL_GPL(xt_check_hooks_match); + +int xt_check_match(struct xt_mtchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_match_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_match(par); + if (ret < 0) + return ret; + + return xt_checkentry_match(par); +} EXPORT_SYMBOL_GPL(xt_check_match); /** xt_check_entry_match - check that matches end before start of target @@ -1012,11 +1044,9 @@ bool xt_find_jump_offset(const unsigned int *offsets, } EXPORT_SYMBOL(xt_find_jump_offset); -int xt_check_target(struct xt_tgchk_param *par, - unsigned int size, u16 proto, bool inv_proto) +static int xt_check_target_common(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) { - int ret; - if (XT_ALIGN(par->target->targetsize) != size) { pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", xt_prefix[par->family], par->target->name, @@ -1061,6 +1091,23 @@ int xt_check_target(struct xt_tgchk_param *par, par->target->proto); return -EINVAL; } + + return 0; +} + +int xt_check_hooks_target(struct xt_tgchk_param *par) +{ + if (par->target->check_hooks != NULL) + return par->target->check_hooks(par); + + return 0; +} +EXPORT_SYMBOL_GPL(xt_check_hooks_target); + +static int xt_checkentry_target(struct xt_tgchk_param *par) +{ + int ret; + if (par->target->checkentry != NULL) { ret = par->target->checkentry(par); if (ret < 0) @@ -1071,6 +1118,22 @@ int xt_check_target(struct xt_tgchk_param *par, } return 0; } + +int xt_check_target(struct xt_tgchk_param *par, + unsigned int size, u16 proto, bool inv_proto) +{ + int ret; + + ret = xt_check_target_common(par, size, proto, inv_proto); + if (ret < 0) + return ret; + + ret = xt_check_hooks_target(par); + if (ret < 0) + return ret; + + return xt_checkentry_target(par); +} EXPORT_SYMBOL_GPL(xt_check_target); /** diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 116a885adb3c..80e1634bc51f 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -247,6 +247,21 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif +static int tcpmss_tg4_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + + return 0; +} + /* Must specify -p tcp --syn */ static inline bool find_syn_match(const struct xt_entry_match *m) { @@ -262,17 +277,9 @@ static inline bool find_syn_match(const struct xt_entry_match *m) static int tcpmss_tg4_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ipt_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -286,17 +293,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static int tcpmss_tg6_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = par->targinfo; const struct ip6t_entry *e = par->entryinfo; const struct xt_entry_match *ematch; - if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (par->hook_mask & ~((1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return -EINVAL; - } if (par->nft_compat) return 0; @@ -312,6 +311,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV4, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg4_check, .target = tcpmss_tg4, .targetsize = sizeof(struct xt_tcpmss_info), @@ -322,6 +322,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { { .family = NFPROTO_IPV6, .name = "TCPMSS", + .check_hooks = tcpmss_tg4_check_hooks, .checkentry = tcpmss_tg6_check, .target = tcpmss_tg6, .targetsize = sizeof(struct xt_tcpmss_info), diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index a77088943107..913dbe3aa5e2 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -153,14 +153,10 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +static int addrtype_mt_check_hooks(const struct xt_mtchk_param *par) { - const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && @@ -176,6 +172,21 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) goto err; } + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { @@ -211,6 +222,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE @@ -221,6 +233,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .match = addrtype_mt_v1, + .check_hooks = addrtype_mt_check_hooks, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index 9520dd00070b..6d1a44ab5eee 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -33,14 +33,10 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } -static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +static int devgroup_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_devgroup_info *info = par->matchinfo; - if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | - XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) - return -EINVAL; - if (info->flags & XT_DEVGROUP_MATCH_SRC && par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | @@ -56,9 +52,21 @@ static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) return 0; } +static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | + XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) + return -EINVAL; + + return 0; +} + static struct xt_match devgroup_mt_reg __read_mostly = { .name = "devgroup", .match = devgroup_mt, + .check_hooks = devgroup_mt_check_hooks, .checkentry = devgroup_mt_checkentry, .matchsize = sizeof(struct xt_devgroup_info), .family = NFPROTO_UNSPEC, diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index d2b0b52434fa..dd98f758176c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -91,14 +91,10 @@ match_outdev: return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -static int physdev_mt_check(const struct xt_mtchk_param *par) +static int physdev_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - static bool brnf_probed __read_mostly; - if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || - info->bitmask & ~XT_PHYSDEV_OP_MASK) - return -EINVAL; if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && @@ -107,6 +103,18 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } + return 0; +} + +static int physdev_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_physdev_info *info = par->matchinfo; + static bool brnf_probed __read_mostly; + + if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || + info->bitmask & ~XT_PHYSDEV_OP_MASK) + return -EINVAL; + #define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb) if (info->bitmask & XT_PHYSDEV_OP_IN) { if (info->physindev[0] == '\0') @@ -141,6 +149,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = { { .name = "physdev", .family = NFPROTO_IPV4, + .check_hooks = physdev_mt_check_hooks, .checkentry = physdev_mt_check, .match = physdev_mt, .matchsize = sizeof(struct xt_physdev_info), @@ -149,6 +158,7 @@ static struct xt_match physdev_mt_reg[] __read_mostly = { { .name = "physdev", .family = NFPROTO_IPV6, + .check_hooks = physdev_mt_check_hooks, .checkentry = physdev_mt_check, .match = physdev_mt, .matchsize = sizeof(struct xt_physdev_info), diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index b5fa65558318..ff54e3a8581e 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -126,13 +126,10 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret; } -static int policy_mt_check(const struct xt_mtchk_param *par) +static int policy_mt_check_hooks(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; - const char *errmsg = "neither incoming nor outgoing policy selected"; - - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) - goto err; + const char *errmsg; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { @@ -144,6 +141,21 @@ static int policy_mt_check(const struct xt_mtchk_param *par) errmsg = "input policy not valid in POSTROUTING and OUTPUT"; goto err; } + + return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; +} + +static int policy_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; + if (info->len > XT_POLICY_MAX_ELEM) { errmsg = "too many policy elements"; goto err; @@ -158,6 +170,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV4, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), @@ -166,6 +179,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", .family = NFPROTO_IPV6, + .check_hooks = policy_mt_check_hooks, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 731bc2cafae4..4ae04bba9358 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -430,6 +430,29 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } +static int +set_target_v3_check_hooks(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target_v3 *info = par->targinfo; + + if (info->map_set.index != IPSET_INVALID_ID) { + if (strncmp(par->table, "mangle", 7)) { + pr_info_ratelimited("--map-set only usable from mangle table\n"); + return -EINVAL; + } + if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | + (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && + (par->hook_mask & ~(1 << NF_INET_FORWARD | + 1 << NF_INET_LOCAL_OUT | + 1 << NF_INET_POST_ROUTING))) { + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + return -EINVAL; + } + } + + return 0; +} + static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { @@ -459,20 +482,6 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) } if (info->map_set.index != IPSET_INVALID_ID) { - if (strncmp(par->table, "mangle", 7)) { - pr_info_ratelimited("--map-set only usable from mangle table\n"); - ret = -EINVAL; - goto cleanup_del; - } - if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | - (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) && - (par->hook_mask & ~(1 << NF_INET_FORWARD | - 1 << NF_INET_LOCAL_OUT | - 1 << NF_INET_POST_ROUTING))) { - pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); - ret = -EINVAL; - goto cleanup_del; - } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { @@ -672,6 +681,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV4, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE @@ -682,6 +692,7 @@ static struct xt_target set_targets[] __read_mostly = { .family = NFPROTO_IPV6, .target = set_target_v3, .targetsize = sizeof(struct xt_set_info_target_v3), + .check_hooks = set_target_v3_check_hooks, .checkentry = set_target_v3_checkentry, .destroy = set_target_v3_destroy, .me = THIS_MODULE -- cgit v1.2.3 From 2f768d638d977eff824f64dcc9639e3fea32da8f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Apr 2026 19:04:07 +0200 Subject: netfilter: nft_compat: run xt_check_hooks_{match,target}() from .validate Several matches and one target check that the hook is correct from checkentry(), however, the basechain is only available from nft_table_validate(). This patch uses xt_check_hooks_{match,target}() from the nft_compat expression .validate path. This patch sets the table in the nft_ctx struct in nft_table_validate() which is required by this patch. Based on patch from Florian Westphal. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Reported-by: Xiang Mei Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + net/netfilter/nft_compat.c | 45 +++++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d20ce5c36d31..38e33c66c618 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4205,6 +4205,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) struct nft_chain *chain; struct nft_ctx ctx = { .net = net, + .table = (struct nft_table *)table, .family = table->family, }; int err = 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index decc725a33c2..0caa9304d2d0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -261,10 +261,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { if (ret == -ENOENT) { @@ -353,8 +353,6 @@ nla_put_failure: static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_target *target = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -377,11 +375,21 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_tgchk_param par; + union nft_entry e = {}; - hook_mask = 1 << ops->hooknum; if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; + nft_target_set_tgchk_param(&par, ctx, target, info, &e, 0, false); + + ret = xt_check_hooks_target(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, target->table); if (ret < 0) return ret; @@ -515,10 +523,10 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; } - nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(ctx->net); + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + return xt_check_match(&par, size, proto, inv); } @@ -614,8 +622,6 @@ static int nft_match_large_dump(struct sk_buff *skb, static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { - struct xt_match *match = expr->ops->data; - unsigned int hook_mask = 0; int ret; if (ctx->family != NFPROTO_IPV4 && @@ -638,11 +644,30 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops; + unsigned int hook_mask = 1 << ops->hooknum; + struct xt_match *match = expr->ops->data; + size_t size = XT_ALIGN(match->matchsize); + struct xt_mtchk_param par; + union nft_entry e = {}; + void *info; - hook_mask = 1 << ops->hooknum; if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; + if (NFT_EXPR_SIZE(size) > NFT_MATCH_LARGE_THRESH) { + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + info = priv->info; + } else { + info = nft_expr_priv(expr); + } + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, 0, false); + + ret = xt_check_hooks_match(&par); + if (ret < 0) + return ret; + ret = nft_compat_chain_validate_dependency(ctx, match->table); if (ret < 0) return ret; -- cgit v1.2.3 From 8bedb6c46945752a688d9b0cf2021e0e68b1876c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 28 Apr 2026 19:37:57 +0200 Subject: netfilter: xt_CT: fix usersize for v1 and v2 revision While resurrecting the conntrack-tool test cases I found following bug: In: iptables -I OUTPUT -t raw -p 13 -j CT --timeout test-generic Out: [0:0] -A OUTPUT -p 13 -j CT --timeout test Data after first four bytes of the timeout policy name is never copied to userspace because its treated as kernel-only. Fixes: ec2318904965 ("xtables: extend matches and targets with .usersize") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 498f5871c84a..d2aeacf94230 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -354,7 +354,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -366,7 +366,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV4, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -398,7 +398,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, @@ -410,7 +410,7 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .family = NFPROTO_IPV6, .revision = 2, .targetsize = sizeof(struct xt_ct_target_info_v1), - .usersize = offsetof(struct xt_ct_target_info, ct), + .usersize = offsetof(struct xt_ct_target_info_v1, ct), .checkentry = xt_ct_tg_check_v2, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, -- cgit v1.2.3 From 63bac027860308d1344f761cb47aabb3b30973fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Apr 2026 08:21:35 +0200 Subject: netfilter: nf_tables: fix netdev hook allocation memleak with dormant tables sashiko says: could the related code in __nf_tables_abort() leak the struct nft_hook objects when the table is dormant? In __nf_tables_abort(), when rolling back a NEWCHAIN transaction that updates hooks, the code conditionally unregisters and frees the hooks only if the table is not dormant [..] if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); } ... nft_trans_destroy(trans); Unfortunately netdev family mixes hook registration and allocation. Push table struct down and only check for the flag to unregister. Fixes: 216e7bf7402c ("netfilter: nf_tables: skip netdev hook unregistration if table is dormant") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 38e33c66c618..87387adbca65 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -407,6 +407,7 @@ static void nft_netdev_unregister_trans_hook(struct net *net, } static void nft_netdev_unregister_hooks(struct net *net, + const struct nft_table *table, struct list_head *hook_list, bool release_netdev) { @@ -414,8 +415,10 @@ static void nft_netdev_unregister_hooks(struct net *net, struct nf_hook_ops *ops; list_for_each_entry_safe(hook, next, hook_list, list) { - list_for_each_entry(ops, &hook->ops_list, list) - nf_unregister_net_hook(net, ops); + if (!(table->flags & NFT_TABLE_F_DORMANT)) { + list_for_each_entry(ops, &hook->ops_list, list) + nf_unregister_net_hook(net, ops); + } if (release_netdev) nft_netdev_hook_unlink_free_rcu(hook); } @@ -452,20 +455,25 @@ static void __nf_tables_unregister_hook(struct net *net, struct nft_base_chain *basechain; const struct nf_hook_ops *ops; - if (table->flags & NFT_TABLE_F_DORMANT || - !nft_is_base_chain(chain)) + if (!nft_is_base_chain(chain)) return; basechain = nft_base_chain(chain); ops = &basechain->ops; + /* must also be called for dormant tables */ + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { + nft_netdev_unregister_hooks(net, table, &basechain->hook_list, + release_netdev); + return; + } + + if (table->flags & NFT_TABLE_F_DORMANT) + return; + if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list, - release_netdev); - else - nf_unregister_net_hook(net, &basechain->ops); + nf_unregister_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -11282,11 +11290,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(table->flags & NFT_TABLE_F_DORMANT)) { - nft_netdev_unregister_hooks(net, - &nft_trans_chain_hooks(trans), - true); - } + nft_netdev_unregister_hooks(net, table, + &nft_trans_chain_hooks(trans), + true); free_percpu(nft_trans_chain_stats(trans)); kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); -- cgit v1.2.3 From 22d0213e55fbb723c2c00dd5aa855a6eaad95b23 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Fri, 10 Apr 2026 13:35:06 +0200 Subject: dma-direct: fix use of max_pfn Calculate the correct physical address of the last byte of memory. Since max_pfn is in fact "the PFN of the first page after the highest system RAM in physical address space", the highest address that might be used for a DMA buffer is one byte below max_pfn << PAGE_SHIFT. This fix is unlikely to make any difference in practice. It's just that the current formula is slightly confusing. Signed-off-by: Petr Tesarik Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260410113506.262579-1-ptesarik@suse.com --- kernel/dma/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index ec887f443741..583c5922bca2 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -39,7 +39,7 @@ static inline struct page *dma_direct_to_page(struct device *dev, u64 dma_direct_get_required_mask(struct device *dev) { - phys_addr_t phys = (phys_addr_t)(max_pfn - 1) << PAGE_SHIFT; + phys_addr_t phys = ((phys_addr_t)max_pfn << PAGE_SHIFT) - 1; u64 max_dma = phys_to_dma_direct(dev, phys); return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; @@ -553,7 +553,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, int dma_direct_supported(struct device *dev, u64 mask) { - u64 min_mask = (max_pfn - 1) << PAGE_SHIFT; + u64 min_mask = ((u64)max_pfn << PAGE_SHIFT) - 1; /* * Because 32-bit DMA masks are so common we expect every architecture -- cgit v1.2.3 From e6a650acbd991bba279f2580853aed9a8d166e6f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 29 Apr 2026 21:18:25 +0200 Subject: parisc: Fix build failure for 32-bit kernel with PA2.0 instruction set The CONFIG_PA11 option can not be used as a reliable check if we build a 32-bit kernel which needs the 32-bit VDSO. Instead depend on CONFIG_64BIT and CONFIG_COMPAT only. Reported-by: Christoph Biedl Tested-by: Christoph Biedl Signed-off-by: Helge Deller --- arch/parisc/Makefile | 16 +++++++++++----- arch/parisc/kernel/Makefile | 7 +++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index edab2a948352..4391783521bd 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -174,15 +174,21 @@ ifeq ($(KBUILD_EXTMOD),) # this hack. prepare: vdso_prepare vdso_prepare: prepare0 - $(if $(CONFIG_64BIT),$(Q)$(MAKE) \ - $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h) - $(if $(CONFIG_PA11)$(CONFIG_COMPAT),$(Q)$(MAKE) \ +ifdef CONFIG_64BIT + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h + $(if $(CONFIG_COMPAT),$(Q)$(MAKE) \ $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h) +else + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h +endif endif -vdso-install-$(CONFIG_PA11) += arch/parisc/kernel/vdso32/vdso32.so +ifdef CONFIG_64BIT +vdso-install-y += arch/parisc/kernel/vdso64/vdso64.so vdso-install-$(CONFIG_COMPAT) += arch/parisc/kernel/vdso32/vdso32.so -vdso-install-$(CONFIG_64BIT) += arch/parisc/kernel/vdso64/vdso64.so +else +vdso-install-y += arch/parisc/kernel/vdso32/vdso32.so +endif install: KBUILD_IMAGE := vmlinux zinstall: KBUILD_IMAGE := vmlinuz diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 2f3441769ac5..49f937c2abbe 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -46,6 +46,9 @@ obj-$(CONFIG_KEXEC_FILE) += kexec_file.o # vdso obj-y += vdso.o -obj-$(CONFIG_64BIT) += vdso64/ -obj-$(CONFIG_PA11) += vdso32/ +ifdef CONFIG_64BIT +obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ +else +obj-y += vdso32/ +endif -- cgit v1.2.3 From 100201d349edd226ca3470c894c92dccc67ee7a8 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Mon, 27 Apr 2026 11:17:46 +0200 Subject: USB: serial: option: add Telit Cinterion LE910Cx compositions Add the following Telit Cinterion LE910Cx compositions: 0x1251: RNDIS + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (SAP) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=108 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1251 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=02 Prot=ff Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1253: ECM + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (SAP) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=121 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1253 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1254: tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=122 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1254 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms 0x1255: tty (AT/NMEA) + tty (AT) + tty (AT) + tty (SAP) T: Bus=01 Lev=01 Prnt=21 Port=06 Cnt=01 Dev#=123 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1255 Rev=03.18 S: Manufacturer=Android S: Product=LE910C1-EU S: SerialNumber=0123456789ABCDEF C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c71461893d20..42e4cecd28ac 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1513,7 +1513,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1251, 0xff) }, /* Telit LE910Cx (RNDIS) */ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1252, 0xff) }, /* Telit LE910Cx (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1253, 0xff) }, /* Telit LE910Cx (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1254, 0xff) }, /* Telit LE910Cx */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1255, 0xff) }, /* Telit LE910Cx */ { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), -- cgit v1.2.3 From 70d62b669f1f9080a25278fc90b64309f4ae8959 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:13 -0700 Subject: iavf: rename IAVF_VLAN_IS_NEW to IAVF_VLAN_ADDING Rename the IAVF_VLAN_IS_NEW state to IAVF_VLAN_ADDING to better describe what the state represents: an ADD request has been sent to the PF and is waiting for a response. This is a pure rename with no behavioral change, preparing for a cleanup of the VLAN filter state machine. Signed-off-by: Petr Oros Reviewed-by: Aleksandr Loktionov Tested-by: Rafal Romanowski Reviewed-by: Simon Horman Reviewed-by: Przemek Kitszel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-1-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/iavf/iavf.h | 2 +- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e9fb0a0919e3..47a862ca5e2c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -158,7 +158,7 @@ struct iavf_vlan { enum iavf_vlan_state_t { IAVF_VLAN_INVALID, IAVF_VLAN_ADD, /* filter needs to be added */ - IAVF_VLAN_IS_NEW, /* filter is new, wait for PF answer */ + IAVF_VLAN_ADDING, /* ADD sent to PF, waiting for response */ IAVF_VLAN_ACTIVE, /* filter is accepted by PF */ IAVF_VLAN_DISABLE, /* filter needs to be deleted by PF, then marked INACTIVE */ IAVF_VLAN_INACTIVE, /* filter is inactive, we are in IFF_DOWN */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a52c100dcbc5..6b06ae872a0c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -746,7 +746,7 @@ static void iavf_vlan_add_reject(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_IS_NEW) { + if (f->state == IAVF_VLAN_ADDING) { list_del(&f->list); kfree(f); adapter->num_vlan_filters--; @@ -812,7 +812,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) if (f->state == IAVF_VLAN_ADD) { vvfl->vlan_id[i] = f->vlan.vid; i++; - f->state = IAVF_VLAN_IS_NEW; + f->state = IAVF_VLAN_ADDING; if (i == count) break; } @@ -874,7 +874,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vlan->tpid = f->vlan.tpid; i++; - f->state = IAVF_VLAN_IS_NEW; + f->state = IAVF_VLAN_ADDING; } } @@ -2910,7 +2910,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_IS_NEW) + if (f->state == IAVF_VLAN_ADDING) f->state = IAVF_VLAN_ACTIVE; } spin_unlock_bh(&adapter->mac_vlan_list_lock); -- cgit v1.2.3 From f2ce65b9b917474a1a6ce68d357e15fac2aca0f2 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:14 -0700 Subject: iavf: stop removing VLAN filters from PF on interface down When a VF goes down, the driver currently sends DEL_VLAN to the PF for every VLAN filter (ACTIVE -> DISABLE -> send DEL -> INACTIVE), then re-adds them all on UP (INACTIVE -> ADD -> send ADD -> ADDING -> ACTIVE). This round-trip is unnecessary because: 1. The PF disables the VF's queues via VIRTCHNL_OP_DISABLE_QUEUES, which already prevents all RX/TX traffic regardless of VLAN filter state. 2. The VLAN filters remaining in PF HW while the VF is down is harmless - packets matching those filters have nowhere to go with queues disabled. 3. The DEL+ADD cycle during down/up creates race windows where the VLAN filter list is incomplete. With spoofcheck enabled, the PF enables TX VLAN filtering on the first non-zero VLAN add, blocking traffic for any VLANs not yet re-added. Remove the entire DISABLE/INACTIVE state machinery: - Remove IAVF_VLAN_DISABLE and IAVF_VLAN_INACTIVE enum values - Remove iavf_restore_filters() and its call from iavf_open() - Remove VLAN filter handling from iavf_clear_mac_vlan_filters(), rename it to iavf_clear_mac_filters() - Remove DEL_VLAN_FILTER scheduling from iavf_down() - Remove all DISABLE/INACTIVE handling from iavf_del_vlans() VLAN filters now stay ACTIVE across down/up cycles. Only explicit user removal (ndo_vlan_rx_kill_vid) or PF/VF reset triggers VLAN filter deletion/re-addition. Fixes: ed1f5b58ea01 ("i40evf: remove VLAN filters on close") Signed-off-by: Petr Oros Reviewed-by: Aleksandr Loktionov Tested-by: Rafal Romanowski Reviewed-by: Simon Horman Reviewed-by: Przemek Kitszel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-2-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/iavf/iavf.h | 6 ++-- drivers/net/ethernet/intel/iavf/iavf_main.c | 39 +++---------------------- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 33 ++++----------------- 3 files changed, 12 insertions(+), 66 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 47a862ca5e2c..5765715914d6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -159,10 +159,8 @@ enum iavf_vlan_state_t { IAVF_VLAN_INVALID, IAVF_VLAN_ADD, /* filter needs to be added */ IAVF_VLAN_ADDING, /* ADD sent to PF, waiting for response */ - IAVF_VLAN_ACTIVE, /* filter is accepted by PF */ - IAVF_VLAN_DISABLE, /* filter needs to be deleted by PF, then marked INACTIVE */ - IAVF_VLAN_INACTIVE, /* filter is inactive, we are in IFF_DOWN */ - IAVF_VLAN_REMOVE, /* filter needs to be removed from list */ + IAVF_VLAN_ACTIVE, /* PF confirmed, filter is in HW */ + IAVF_VLAN_REMOVE, /* filter queued for DEL from PF */ }; struct iavf_vlan_filter { diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3c1465cf0515..ca29038c0016 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -801,27 +801,6 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) spin_unlock_bh(&adapter->mac_vlan_list_lock); } -/** - * iavf_restore_filters - * @adapter: board private structure - * - * Restore existing non MAC filters when VF netdev comes back up - **/ -static void iavf_restore_filters(struct iavf_adapter *adapter) -{ - struct iavf_vlan_filter *f; - - /* re-add all VLAN filters */ - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_INACTIVE) - f->state = IAVF_VLAN_ADD; - } - - spin_unlock_bh(&adapter->mac_vlan_list_lock); - adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; -} /** * iavf_get_num_vlans_added - get number of VLANs added @@ -1246,13 +1225,12 @@ static void iavf_up_complete(struct iavf_adapter *adapter) } /** - * iavf_clear_mac_vlan_filters - Remove mac and vlan filters not sent to PF - * yet and mark other to be removed. + * iavf_clear_mac_filters - Remove MAC filters not sent to PF yet and mark + * others to be removed. * @adapter: board private structure **/ -static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter) +static void iavf_clear_mac_filters(struct iavf_adapter *adapter) { - struct iavf_vlan_filter *vlf, *vlftmp; struct iavf_mac_filter *f, *ftmp; spin_lock_bh(&adapter->mac_vlan_list_lock); @@ -1271,11 +1249,6 @@ static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter) } } - /* disable all VLAN filters */ - list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, - list) - vlf->state = IAVF_VLAN_DISABLE; - spin_unlock_bh(&adapter->mac_vlan_list_lock); } @@ -1371,7 +1344,7 @@ void iavf_down(struct iavf_adapter *adapter) iavf_napi_disable_all(adapter); iavf_irq_disable(adapter); - iavf_clear_mac_vlan_filters(adapter); + iavf_clear_mac_filters(adapter); iavf_clear_cloud_filters(adapter); iavf_clear_fdir_filters(adapter); iavf_clear_adv_rss_conf(adapter); @@ -1388,8 +1361,6 @@ void iavf_down(struct iavf_adapter *adapter) */ if (!list_empty(&adapter->mac_filter_list)) adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; - if (!list_empty(&adapter->vlan_filter_list)) - adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; if (!list_empty(&adapter->cloud_filter_list)) adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; if (!list_empty(&adapter->fdir_list_head)) @@ -4494,8 +4465,6 @@ static int iavf_open(struct net_device *netdev) iavf_add_filter(adapter, adapter->hw.mac.addr); spin_unlock_bh(&adapter->mac_vlan_list_lock); - /* Restore filters that were removed with IFF_DOWN */ - iavf_restore_filters(adapter); iavf_restore_fdir_filters(adapter); iavf_configure(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 6b06ae872a0c..4f197d908124 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -911,22 +911,12 @@ void iavf_del_vlans(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - /* since VLAN capabilities are not allowed, we dont want to send - * a VLAN delete request because it will most likely fail and - * create unnecessary errors/noise, so just free the VLAN - * filters marked for removal to enable bailing out before - * sending a virtchnl message - */ if (f->state == IAVF_VLAN_REMOVE && !VLAN_FILTERING_ALLOWED(adapter)) { list_del(&f->list); kfree(f); adapter->num_vlan_filters--; - } else if (f->state == IAVF_VLAN_DISABLE && - !VLAN_FILTERING_ALLOWED(adapter)) { - f->state = IAVF_VLAN_INACTIVE; - } else if (f->state == IAVF_VLAN_REMOVE || - f->state == IAVF_VLAN_DISABLE) { + } else if (f->state == IAVF_VLAN_REMOVE) { count++; } } @@ -959,13 +949,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl->vsi_id = adapter->vsi_res->vsi_id; vvfl->num_elements = count; list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_DISABLE) { - vvfl->vlan_id[i] = f->vlan.vid; - f->state = IAVF_VLAN_INACTIVE; - i++; - if (i == count) - break; - } else if (f->state == IAVF_VLAN_REMOVE) { + if (f->state == IAVF_VLAN_REMOVE) { vvfl->vlan_id[i] = f->vlan.vid; list_del(&f->list); kfree(f); @@ -1007,8 +991,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl_v2->vport_id = adapter->vsi_res->vsi_id; vvfl_v2->num_elements = count; list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - if (f->state == IAVF_VLAN_DISABLE || - f->state == IAVF_VLAN_REMOVE) { + if (f->state == IAVF_VLAN_REMOVE) { struct virtchnl_vlan_supported_caps *filtering_support = &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; @@ -1022,13 +1005,9 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vlan->tci = f->vlan.vid; vlan->tpid = f->vlan.tpid; - if (f->state == IAVF_VLAN_DISABLE) { - f->state = IAVF_VLAN_INACTIVE; - } else { - list_del(&f->list); - kfree(f); - adapter->num_vlan_filters--; - } + list_del(&f->list); + kfree(f); + adapter->num_vlan_filters--; i++; if (i == count) break; -- cgit v1.2.3 From bbcbe4ed70dea948849549af7edf44bd42bbd695 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:15 -0700 Subject: iavf: wait for PF confirmation before removing VLAN filters The VLAN filter DELETE path was asymmetric with the ADD path: ADD waits for PF confirmation (ADD -> ADDING -> ACTIVE), but DELETE immediately frees the filter struct after sending the DEL message without waiting for the PF response. This is problematic because: - If the PF rejects the DEL, the filter remains in HW but the driver has already freed the tracking structure, losing sync. - Race conditions between DEL pending and other operations (add, reset) cannot be properly resolved if the filter struct is already gone. Add IAVF_VLAN_REMOVING state to make the DELETE path symmetric: REMOVE -> REMOVING (send DEL) -> PF confirms -> kfree -> PF rejects -> ACTIVE In iavf_del_vlans(), transition filters from REMOVE to REMOVING instead of immediately freeing them. The new DEL completion handler in iavf_virtchnl_completion() frees filters on success or reverts them to ACTIVE on error. Update iavf_add_vlan() to handle the REMOVING state: if a DEL is pending and the user re-adds the same VLAN, queue it for ADD so it gets re-programmed after the PF processes the DEL. The !VLAN_FILTERING_ALLOWED early-exit path still frees filters directly since no PF message is sent in that case. Also update iavf_del_vlan() to skip filters already in REMOVING state: DEL has been sent to PF and the completion handler will free the filter when PF confirms. Without this guard, the sequence DEL(pending) -> user-del -> second DEL could cause the PF to return an error for the second DEL (filter already gone), causing the completion handler to incorrectly revert a deleted filter back to ACTIVE. Fixes: 968996c070ef ("iavf: Fix VLAN_V2 addition/rejection") Signed-off-by: Petr Oros Reviewed-by: Aleksandr Loktionov Tested-by: Rafal Romanowski Reviewed-by: Przemek Kitszel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-3-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 13 +++++---- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 37 +++++++++++++++++-------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 5765715914d6..050f8241ef5e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -161,6 +161,7 @@ enum iavf_vlan_state_t { IAVF_VLAN_ADDING, /* ADD sent to PF, waiting for response */ IAVF_VLAN_ACTIVE, /* PF confirmed, filter is in HW */ IAVF_VLAN_REMOVE, /* filter queued for DEL from PF */ + IAVF_VLAN_REMOVING, /* DEL sent to PF, waiting for response */ }; struct iavf_vlan_filter { diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ca29038c0016..d2914c511e1e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -757,10 +757,10 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, adapter->num_vlan_filters++; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } else if (f->state == IAVF_VLAN_REMOVE) { - /* Re-add the filter since we cannot tell whether the - * pending delete has already been processed by the PF. - * A duplicate add is harmless. - */ + /* DEL not yet sent to PF, cancel it */ + f->state = IAVF_VLAN_ACTIVE; + } else if (f->state == IAVF_VLAN_REMOVING) { + /* DEL already sent to PF, re-add after completion */ f->state = IAVF_VLAN_ADD; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); @@ -791,11 +791,14 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) list_del(&f->list); kfree(f); adapter->num_vlan_filters--; - } else { + } else if (f->state != IAVF_VLAN_REMOVING) { f->state = IAVF_VLAN_REMOVE; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER); } + /* If REMOVING, DEL is already sent to PF; completion + * handler will free the filter when PF confirms. + */ } spin_unlock_bh(&adapter->mac_vlan_list_lock); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 4f197d908124..93ca79c3e3b5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -948,12 +948,10 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl->vsi_id = adapter->vsi_res->vsi_id; vvfl->num_elements = count; - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { + list_for_each_entry(f, &adapter->vlan_filter_list, list) { if (f->state == IAVF_VLAN_REMOVE) { vvfl->vlan_id[i] = f->vlan.vid; - list_del(&f->list); - kfree(f); - adapter->num_vlan_filters--; + f->state = IAVF_VLAN_REMOVING; i++; if (i == count) break; @@ -990,7 +988,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vvfl_v2->vport_id = adapter->vsi_res->vsi_id; vvfl_v2->num_elements = count; - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { + list_for_each_entry(f, &adapter->vlan_filter_list, list) { if (f->state == IAVF_VLAN_REMOVE) { struct virtchnl_vlan_supported_caps *filtering_support = &adapter->vlan_v2_caps.filtering.filtering_support; @@ -1005,9 +1003,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter) vlan->tci = f->vlan.vid; vlan->tpid = f->vlan.tpid; - list_del(&f->list); - kfree(f); - adapter->num_vlan_filters--; + f->state = IAVF_VLAN_REMOVING; i++; if (i == count) break; @@ -2370,10 +2366,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); wake_up(&adapter->vc_waitqueue); break; - case VIRTCHNL_OP_DEL_VLAN: - dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", - iavf_stat_str(&adapter->hw, v_retval)); - break; case VIRTCHNL_OP_DEL_ETH_ADDR: dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", iavf_stat_str(&adapter->hw, v_retval)); @@ -2895,6 +2887,27 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->mac_vlan_list_lock); } break; + case VIRTCHNL_OP_DEL_VLAN: + case VIRTCHNL_OP_DEL_VLAN_V2: { + struct iavf_vlan_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, + list) { + if (f->state == IAVF_VLAN_REMOVING) { + if (v_retval) { + /* PF rejected DEL, keep filter */ + f->state = IAVF_VLAN_ACTIVE; + } else { + list_del(&f->list); + kfree(f); + adapter->num_vlan_filters--; + } + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); + } + break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: /* PF enabled vlan strip on this VF. * Update netdev->features if needed to be in sync with ethtool. -- cgit v1.2.3 From 34d33313b52eeac3a97ad2e3176d523ec70d9283 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:16 -0700 Subject: iavf: add VIRTCHNL_OP_ADD_VLAN to success completion handler The V1 ADD_VLAN opcode had no success handler; filters sent via V1 stayed in ADDING state permanently. Add a fallthrough case so V1 filters also transition ADDING -> ACTIVE on PF confirmation. Critically, add an `if (v_retval) break` guard: the error switch in iavf_virtchnl_completion() does NOT return after handling errors, it falls through to the success switch. Without this guard, a PF-rejected ADD would incorrectly mark ADDING filters as ACTIVE, creating a driver/HW mismatch where the driver believes the filter is installed but the PF never accepted it. For V2, this is harmless: iavf_vlan_add_reject() in the error block already kfree'd all ADDING filters, so the success handler finds nothing to transition. Fixes: 968996c070ef ("iavf: Fix VLAN_V2 addition/rejection") Signed-off-by: Petr Oros Reviewed-by: Aleksandr Loktionov Tested-by: Rafal Romanowski Reviewed-by: Przemek Kitszel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-4-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 93ca79c3e3b5..4f2defd2331b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -2876,9 +2876,13 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->adv_rss_lock); } break; + case VIRTCHNL_OP_ADD_VLAN: case VIRTCHNL_OP_ADD_VLAN_V2: { struct iavf_vlan_filter *f; + if (v_retval) + break; + spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry(f, &adapter->vlan_filter_list, list) { if (f->state == IAVF_VLAN_ADDING) -- cgit v1.2.3 From 54ef02487914c24170c7e1c061e45212dc55365e Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:17 -0700 Subject: ice: fix NULL pointer dereference in ice_reset_all_vfs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ice_reset_all_vfs() ignores the return value of ice_vf_rebuild_vsi(). When the VSI rebuild fails (e.g. during NVM firmware update via nvmupdate64e), ice_vsi_rebuild() tears down the VSI on its error path, leaving txq_map and rxq_map as NULL. The subsequent unconditional call to ice_vf_post_vsi_rebuild() leads to a NULL pointer dereference in ice_ena_vf_q_mappings() when it accesses vsi->txq_map[0]. The single-VF reset path in ice_reset_vf() already handles this correctly by checking the return value of ice_vf_reconfig_vsi() and skipping ice_vf_post_vsi_rebuild() on failure. Apply the same pattern to ice_reset_all_vfs(): check the return value of ice_vf_rebuild_vsi() and skip ice_vf_post_vsi_rebuild() and ice_eswitch_attach_vf() on failure. The VF is left safely disabled (ICE_VF_STATE_INIT not set, VFGEN_RSTAT not set to VFACTIVE) and can be recovered via a VFLR triggered by a PCI reset of the VF (sysfs reset or driver rebind). Note that this patch does not prevent the VF VSI rebuild from failing during NVM update — the underlying cause is firmware being in a transitional state while the EMP reset is processed, which can cause Admin Queue commands (ice_add_vsi, ice_cfg_vsi_lan) to fail. This patch only prevents the subsequent NULL pointer dereference that crashes the kernel when the rebuild does fail. crash> bt PID: 50795 TASK: ff34c9ee708dc680 CPU: 1 COMMAND: "kworker/u512:5" #0 [ff72159bcfe5bb50] machine_kexec at ffffffffaa8850ee #1 [ff72159bcfe5bba8] __crash_kexec at ffffffffaaa15fba #2 [ff72159bcfe5bc68] crash_kexec at ffffffffaaa16540 #3 [ff72159bcfe5bc70] oops_end at ffffffffaa837eda #4 [ff72159bcfe5bc90] page_fault_oops at ffffffffaa893997 #5 [ff72159bcfe5bce8] exc_page_fault at ffffffffab528595 #6 [ff72159bcfe5bd10] asm_exc_page_fault at ffffffffab600bb2 [exception RIP: ice_ena_vf_q_mappings+0x79] RIP: ffffffffc0a85b29 RSP: ff72159bcfe5bdc8 RFLAGS: 00010206 RAX: 00000000000f0000 RBX: ff34c9efc9c00000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000010 RDI: ff34c9efc9c00000 RBP: ff34c9efc27d4828 R8: 0000000000000093 R9: 0000000000000040 R10: ff34c9efc27d4828 R11: 0000000000000040 R12: 0000000000100000 R13: 0000000000000010 R14: R15: ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ff72159bcfe5bdf8] ice_sriov_post_vsi_rebuild at ffffffffc0a85e2e [ice] #8 [ff72159bcfe5be08] ice_reset_all_vfs at ffffffffc0a920b4 [ice] #9 [ff72159bcfe5be48] ice_service_task at ffffffffc0a31519 [ice] #10 [ff72159bcfe5be88] process_one_work at ffffffffaa93dca4 #11 [ff72159bcfe5bec8] worker_thread at ffffffffaa93e9de #12 [ff72159bcfe5bf18] kthread at ffffffffaa946663 #13 [ff72159bcfe5bf50] ret_from_fork at ffffffffaa8086b9 The panic occurs attempting to dereference the NULL pointer in RDX at ice_sriov.c:294, which loads vsi->txq_map (offset 0x4b8 in ice_vsi). The faulting VSI is an allocated slab object but not fully initialized after a failed ice_vsi_rebuild(): crash> struct ice_vsi 0xff34c9efc27d4828 netdev = 0x0, rx_rings = 0x0, tx_rings = 0x0, q_vectors = 0x0, txq_map = 0x0, rxq_map = 0x0, alloc_txq = 0x10, num_txq = 0x10, alloc_rxq = 0x10, num_rxq = 0x10, The nvmupdate64e process was performing NVM firmware update: crash> bt 0xff34c9edd1a30000 PID: 49858 TASK: ff34c9edd1a30000 CPU: 1 COMMAND: "nvmupdate64e" #0 [ff72159bcd617618] __schedule at ffffffffab5333f8 #4 [ff72159bcd617750] ice_sq_send_cmd at ffffffffc0a35347 [ice] #5 [ff72159bcd6177a8] ice_sq_send_cmd_retry at ffffffffc0a35b47 [ice] #6 [ff72159bcd617810] ice_aq_send_cmd at ffffffffc0a38018 [ice] #7 [ff72159bcd617848] ice_aq_read_nvm at ffffffffc0a40254 [ice] #8 [ff72159bcd6178b8] ice_read_flat_nvm at ffffffffc0a4034c [ice] #9 [ff72159bcd617918] ice_devlink_nvm_snapshot at ffffffffc0a6ffa5 [ice] dmesg: ice 0000:13:00.0: firmware recommends not updating fw.mgmt, as it may result in a downgrade. continuing anyways ice 0000:13:00.1: ice_init_nvm failed -5 ice 0000:13:00.1: Rebuild failed, unload and reload driver Fixes: 12bb018c538c ("ice: Refactor VF reset") Signed-off-by: Petr Oros Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-5-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 772f6b07340d..b1f46707dcc0 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -804,7 +804,12 @@ void ice_reset_all_vfs(struct ice_pf *pf) ice_vf_ctrl_invalidate_vsi(vf); ice_vf_pre_vsi_rebuild(vf); - ice_vf_rebuild_vsi(vf); + if (ice_vf_rebuild_vsi(vf)) { + dev_err(dev, "VF %u VSI rebuild failed, leaving VF disabled\n", + vf->vf_id); + mutex_unlock(&vf->cfg_lock); + continue; + } ice_vf_post_vsi_rebuild(vf); ice_eswitch_attach_vf(pf, vf); -- cgit v1.2.3 From 70ad216411e030f67b1743774e245601194aee6a Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:18 -0700 Subject: ice: fix infinite recursion in ice_cfg_tx_topo via ice_init_dev_hw On certain E810 configurations where firmware supports Tx scheduler topology switching (tx_sched_topo_comp_mode_en), ice_cfg_tx_topo() may need to apply a new 5-layer or 9-layer topology from the DDP package. If the AQ command to set the topology fails (e.g. due to invalid DDP data or firmware limitations), the global configuration lock must still be cleared via a CORER reset. Commit 86aae43f21cf ("ice: don't leave device non-functional if Tx scheduler config fails") correctly fixed this by refactoring ice_cfg_tx_topo() to always trigger CORER after acquiring the global lock and re-initialize hardware via ice_init_hw() afterwards. However, commit 8a37f9e2ff40 ("ice: move ice_deinit_dev() to the end of deinit paths") later moved ice_init_dev_hw() into ice_init_hw(), breaking the reinit path introduced by 86aae43f21cf. This creates an infinite recursive call chain: ice_init_hw() ice_init_dev_hw() ice_cfg_tx_topo() # topology change needed ice_deinit_hw() ice_init_hw() # reinit after CORER ice_init_dev_hw() # recurse ice_cfg_tx_topo() ... # stack overflow Fix by moving ice_init_dev_hw() back out of ice_init_hw() and calling it explicitly from ice_probe() and ice_devlink_reinit_up(). The third caller, ice_cfg_tx_topo(), intentionally does not need ice_init_dev_hw() during its reinit, it only needs the core HW reinitialization. This breaks the recursion cleanly without adding flags or guards. The deinit ordering changes from commit 8a37f9e2ff40 ("ice: move ice_deinit_dev() to the end of deinit paths") which fixed slow rmmod are preserved, only the init-side placement of ice_init_dev_hw() is reverted. Fixes: 8a37f9e2ff40 ("ice: move ice_deinit_dev() to the end of deinit paths") Signed-off-by: Petr Oros Reviewed-by: Paul Menzel Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Tested-by: Alexander Nowlin Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-6-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/devlink/devlink.c | 2 ++ drivers/net/ethernet/intel/ice/ice_common.c | 2 -- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 6144cee8034d..641d6e289d5c 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1245,6 +1245,8 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) return err; } + ice_init_dev_hw(pf); + /* load MSI-X values */ ice_set_min_max_msix(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index ce11fea122d0..b617a6bff891 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1126,8 +1126,6 @@ int ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_fltr_mgmt_struct; - ice_init_dev_hw(hw->back); - mutex_init(&hw->tnl_lock); ice_init_chk_recipe_reuse_support(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5f92377d4dfc..1d1947a7fe11 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5245,6 +5245,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return err; } + ice_init_dev_hw(pf); + adapter = ice_adapter_get(pdev); if (IS_ERR(adapter)) { err = PTR_ERR(adapter); -- cgit v1.2.3 From 56a643aed0f0af5c29ebb4593d4917b78344dd48 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:19 -0700 Subject: ice: fix missing SMA pin initialization in DPLL subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DPLL SMA/U.FL pin redesign introduced ice_dpll_sw_pin_frequency_get() which gates frequency reporting on the pin's active flag. This flag is determined by ice_dpll_sw_pins_update() from the PCA9575 GPIO expander state. Before the redesign, SMA pins were exposed as direct HW input/output pins and ice_dpll_frequency_get() returned the CGU frequency unconditionally — the PCA9575 state was never consulted. The PCA9575 powers on with all outputs high, setting ICE_SMA1_DIR_EN, ICE_SMA1_TX_EN, ICE_SMA2_DIR_EN and ICE_SMA2_TX_EN. Nothing in the driver writes the register during initialization, so ice_dpll_sw_pins_update() sees all pins as inactive and ice_dpll_sw_pin_frequency_get() permanently returns 0 Hz for every SW pin. Fix this by writing a default SMA configuration in ice_dpll_init_info_sw_pins(): clear all SMA bits, then set SMA1 and SMA2 as active inputs (DIR_EN=0) with U.FL1 output and U.FL2 input disabled. Each SMA/U.FL pair shares a physical signal path so only one pin per pair can be active at a time. U.FL pins still report frequency 0 after this fix: U.FL1 (output-only) is disabled by ICE_SMA1_TX_EN which keeps the TX output buffer off, and U.FL2 (input-only) is disabled by ICE_SMA2_UFL2_RX_DIS. They can be activated by changing the corresponding SMA pin direction via dpll netlink. Fixes: 2dd5d03c77e2 ("ice: redesign dpll sma/u.fl pins control") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Reviewed-by: Arkadiusz Kubalewski Tested-by: Alexander Nowlin Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-7-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_dpll.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 62f75701d652..498ec2c045f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -4014,6 +4014,7 @@ static int ice_dpll_init_info_sw_pins(struct ice_pf *pf) struct ice_dpll_pin *pin; u32 phase_adj_max, caps; int i, ret; + u8 data; if (pf->hw.device_id == ICE_DEV_ID_E810C_QSFP) input_idx_offset = ICE_E810_RCLK_PINS_NUM; @@ -4073,6 +4074,22 @@ static int ice_dpll_init_info_sw_pins(struct ice_pf *pf) } ice_dpll_phase_range_set(&pin->prop.phase_range, phase_adj_max); } + + /* Initialize the SMA control register to a known-good default state. + * Without this write the PCA9575 GPIO expander retains its power-on + * default (all outputs high) which makes all SW pins appear inactive. + * Set SMA1 and SMA2 as active inputs, disable U.FL1 output and + * U.FL2 input. + */ + ret = ice_read_sma_ctrl(&pf->hw, &data); + if (ret) + return ret; + data &= ~ICE_ALL_SMA_MASK; + data |= ICE_SMA1_TX_EN | ICE_SMA2_TX_EN | ICE_SMA2_UFL2_RX_DIS; + ret = ice_write_sma_ctrl(&pf->hw, data); + if (ret) + return ret; + ret = ice_dpll_pin_state_update(pf, pin, ICE_DPLL_PIN_TYPE_SOFTWARE, NULL); if (ret) -- cgit v1.2.3 From 6f9d8393c9f50fbc68b9c9e99f78ca5a7b43ff44 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:20 -0700 Subject: ice: fix SMA and U.FL pin state changes affecting paired pin SMA and U.FL pins share physical signal paths in pairs (SMA1/U.FL1 and SMA2/U.FL2) controlled by the PCA9575 GPIO expander. Each pair can only have one active pin at a time: SMA1 output and U.FL1 output share the same CGU output, SMA2 input and U.FL2 input share the same CGU input. The PCA9575 register bits determine which connector in each pair owns the signal path. The driver does not account for this pairing in two places: ice_dpll_ufl_pin_state_set() modifies PCA9575 bits and disables the backing CGU pin without checking whether the U.FL pin is currently active. Disconnecting an already inactive U.FL pin flips bits that the paired SMA pin relies on, breaking its connection. ice_dpll_sma_direction_set() does not propagate direction changes to the paired U.FL pin. For SMA2/U.FL2 the ICE_SMA2_UFL2_RX_DIS bit is never managed, so U.FL2 stays disconnected after SMA2 switches to output. For both pairs the backing CGU pin of the U.FL side is never enabled when a direction change activates it, so userspace sees the pin as disconnected even though the routing is correct. Fix by guarding the U.FL disconnect path against inactive pins and by updating the paired U.FL pin fully on SMA direction changes: manage ICE_SMA2_UFL2_RX_DIS for the SMA2/U.FL2 pair and enable the backing CGU pin whenever the peer becomes active. Fixes: 2dd5d03c77e2 ("ice: redesign dpll sma/u.fl pins control") Signed-off-by: Petr Oros Tested-by: Alexander Nowlin Reviewed-by: Arkadiusz Kubalewski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-8-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_dpll.c | 50 ++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 498ec2c045f3..3f8cd5b8298b 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1171,6 +1171,8 @@ static int ice_dpll_sma_direction_set(struct ice_dpll_pin *p, enum dpll_pin_direction direction, struct netlink_ext_ack *extack) { + struct ice_dplls *d = &p->pf->dplls; + struct ice_dpll_pin *peer; u8 data; int ret; @@ -1189,8 +1191,9 @@ static int ice_dpll_sma_direction_set(struct ice_dpll_pin *p, case ICE_DPLL_PIN_SW_2_IDX: if (direction == DPLL_PIN_DIRECTION_INPUT) { data &= ~ICE_SMA2_DIR_EN; + data |= ICE_SMA2_UFL2_RX_DIS; } else { - data &= ~ICE_SMA2_TX_EN; + data &= ~(ICE_SMA2_TX_EN | ICE_SMA2_UFL2_RX_DIS); data |= ICE_SMA2_DIR_EN; } break; @@ -1202,6 +1205,34 @@ static int ice_dpll_sma_direction_set(struct ice_dpll_pin *p, ret = ice_dpll_pin_state_update(p->pf, p, ICE_DPLL_PIN_TYPE_SOFTWARE, extack); + if (ret) + return ret; + + /* When a direction change activates the paired U.FL pin, enable + * its backing CGU pin so the pin reports as connected. Without + * this the U.FL routing is correct but the CGU pin stays disabled + * and userspace sees the pin as disconnected. Do not disable the + * backing pin when U.FL becomes inactive because the SMA pin may + * still be using it. + */ + peer = &d->ufl[p->idx]; + if (peer->active) { + struct ice_dpll_pin *target; + enum ice_dpll_pin_type type; + + if (peer->output) { + target = peer->output; + type = ICE_DPLL_PIN_TYPE_OUTPUT; + } else { + target = peer->input; + type = ICE_DPLL_PIN_TYPE_INPUT; + } + ret = ice_dpll_pin_enable(&p->pf->hw, target, + d->eec.dpll_idx, type, extack); + if (!ret) + ret = ice_dpll_pin_state_update(p->pf, target, + type, extack); + } return ret; } @@ -1253,6 +1284,14 @@ ice_dpll_ufl_pin_state_set(const struct dpll_pin *pin, void *pin_priv, data &= ~ICE_SMA1_MASK; enable = true; } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + /* Skip if U.FL1 is not active, setting TX_EN + * while DIR_EN is set would also deactivate + * the paired SMA1 output. + */ + if (data & (ICE_SMA1_DIR_EN | ICE_SMA1_TX_EN)) { + ret = 0; + goto unlock; + } data |= ICE_SMA1_TX_EN; enable = false; } else { @@ -1267,6 +1306,15 @@ ice_dpll_ufl_pin_state_set(const struct dpll_pin *pin, void *pin_priv, data &= ~ICE_SMA2_UFL2_RX_DIS; enable = true; } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + /* Skip if U.FL2 is not active, setting + * UFL2_RX_DIS could also disable the paired + * SMA2 input. + */ + if (!(data & ICE_SMA2_DIR_EN) || + (data & ICE_SMA2_UFL2_RX_DIS)) { + ret = 0; + goto unlock; + } data |= ICE_SMA2_UFL2_RX_DIS; enable = false; } else { -- cgit v1.2.3 From 620055cb1036a6125fd912e7a14b47a6572b809b Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 27 Apr 2026 22:22:21 -0700 Subject: dpll: export __dpll_pin_change_ntf() for use under dpll_lock Export __dpll_pin_change_ntf() so that drivers can send pin change notifications from within pin callbacks, which are already called under dpll_lock. Using dpll_pin_change_ntf() in that context would deadlock. Add lockdep_assert_held() to catch misuse without the lock held. Acked-by: Vadim Fedorenko Signed-off-by: Ivan Vecera Signed-off-by: Petr Oros Tested-by: Alexander Nowlin Reviewed-by: Arkadiusz Kubalewski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-9-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/dpll/dpll_netlink.c | 10 ++++++++++ drivers/dpll/dpll_netlink.h | 2 -- include/linux/dpll.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index af7ce62ec55c..0ff1658c2dc1 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -900,11 +900,21 @@ int dpll_pin_delete_ntf(struct dpll_pin *pin) return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin); } +/** + * __dpll_pin_change_ntf - notify that the pin has been changed + * @pin: registered pin pointer + * + * Context: caller must hold dpll_lock. Suitable for use inside pin + * callbacks which are already invoked under dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ int __dpll_pin_change_ntf(struct dpll_pin *pin) { + lockdep_assert_held(&dpll_lock); dpll_pin_notify(pin, DPLL_PIN_CHANGED); return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin); } +EXPORT_SYMBOL_GPL(__dpll_pin_change_ntf); /** * dpll_pin_change_ntf - notify that the pin has been changed diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h index dd28b56d27c5..a9cfd55f57fc 100644 --- a/drivers/dpll/dpll_netlink.h +++ b/drivers/dpll/dpll_netlink.h @@ -11,5 +11,3 @@ int dpll_device_delete_ntf(struct dpll_device *dpll); int dpll_pin_create_ntf(struct dpll_pin *pin); int dpll_pin_delete_ntf(struct dpll_pin *pin); - -int __dpll_pin_change_ntf(struct dpll_pin *pin); diff --git a/include/linux/dpll.h b/include/linux/dpll.h index b7277a8b484d..f8037f1ab20b 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -286,6 +286,7 @@ int dpll_pin_ref_sync_pair_add(struct dpll_pin *pin, int dpll_device_change_ntf(struct dpll_device *dpll); +int __dpll_pin_change_ntf(struct dpll_pin *pin); int dpll_pin_change_ntf(struct dpll_pin *pin); int register_dpll_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 1a41b58fd4dc80dca16c717e6e77c88b9d4e83a7 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:22 -0700 Subject: ice: fix missing dpll notifications for SW pins The SMA/U.FL pin redesign (commit 2dd5d03c77e2 ("ice: redesign dpll sma/u.fl pins control")) introduced software-controlled pins that wrap backing CGU input/output pins, but never updated the notification and data paths to propagate pin events to these SW wrappers. The periodic work sends dpll_pin_change_ntf() only for direct CGU input pins. SW pins that wrap these inputs never receive change or phase offset notifications, so userspace consumers such as synce4l monitoring SMA pins via dpll netlink never learn about state transitions or phase offset updates. Similarly, ice_dpll_phase_offset_get() reads the SW pin's own phase_offset field which is never updated; the PPS monitor writes to the backing CGU input's field instead. Fix by introducing ice_dpll_pin_ntf(), a wrapper around dpll_pin_change_ntf() that also notifies any registered SMA/U.FL pin whose backing CGU input matches. Replace all direct dpll_pin_change_ntf() calls in the periodic notification paths with this wrapper. Fix ice_dpll_phase_offset_get() to return the backing CGU input's phase_offset for input-direction SW pins. Fixes: 2dd5d03c77e2 ("ice: redesign dpll sma/u.fl pins control") Signed-off-by: Petr Oros Tested-by: Alexander Nowlin Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Aleksandr Loktionov Reviewed-by: Ivan Vecera Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-10-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_dpll.c | 47 +++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 3f8cd5b8298b..721a3f4d6a28 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1963,7 +1963,10 @@ ice_dpll_phase_offset_get(const struct dpll_pin *pin, void *pin_priv, d->active_input == p->input->pin)) *phase_offset = d->phase_offset * ICE_DPLL_PHASE_OFFSET_FACTOR; else if (d->phase_offset_monitor_period) - *phase_offset = p->phase_offset * ICE_DPLL_PHASE_OFFSET_FACTOR; + *phase_offset = (p->input && + p->direction == DPLL_PIN_DIRECTION_INPUT ? + p->input->phase_offset : + p->phase_offset) * ICE_DPLL_PHASE_OFFSET_FACTOR; else *phase_offset = 0; mutex_unlock(&pf->dplls.lock); @@ -2657,6 +2660,27 @@ static u64 ice_generate_clock_id(struct ice_pf *pf) return pci_get_dsn(pf->pdev); } +/** + * ice_dpll_pin_ntf - notify pin change including any SW pin wrappers + * @dplls: pointer to dplls struct + * @pin: the dpll_pin that changed + * + * Send a change notification for @pin and for any registered SMA/U.FL pin + * whose backing CGU input matches @pin. + */ +static void ice_dpll_pin_ntf(struct ice_dplls *dplls, struct dpll_pin *pin) +{ + dpll_pin_change_ntf(pin); + for (int i = 0; i < ICE_DPLL_PIN_SW_NUM; i++) { + if (dplls->sma[i].pin && dplls->sma[i].input && + dplls->sma[i].input->pin == pin) + dpll_pin_change_ntf(dplls->sma[i].pin); + if (dplls->ufl[i].pin && dplls->ufl[i].input && + dplls->ufl[i].input->pin == pin) + dpll_pin_change_ntf(dplls->ufl[i].pin); + } +} + /** * ice_dpll_notify_changes - notify dpll subsystem about changes * @d: pointer do dpll @@ -2665,6 +2689,7 @@ static u64 ice_generate_clock_id(struct ice_pf *pf) */ static void ice_dpll_notify_changes(struct ice_dpll *d) { + struct ice_dplls *dplls = &d->pf->dplls; bool pin_notified = false; if (d->prev_dpll_state != d->dpll_state) { @@ -2673,17 +2698,17 @@ static void ice_dpll_notify_changes(struct ice_dpll *d) } if (d->prev_input != d->active_input) { if (d->prev_input) - dpll_pin_change_ntf(d->prev_input); + ice_dpll_pin_ntf(dplls, d->prev_input); d->prev_input = d->active_input; if (d->active_input) { - dpll_pin_change_ntf(d->active_input); + ice_dpll_pin_ntf(dplls, d->active_input); pin_notified = true; } } if (d->prev_phase_offset != d->phase_offset) { d->prev_phase_offset = d->phase_offset; if (!pin_notified && d->active_input) - dpll_pin_change_ntf(d->active_input); + ice_dpll_pin_ntf(dplls, d->active_input); } } @@ -2712,6 +2737,7 @@ static bool ice_dpll_is_pps_phase_monitor(struct ice_pf *pf) /** * ice_dpll_pins_notify_mask - notify dpll subsystem about bulk pin changes + * @dplls: pointer to dplls struct * @pins: array of ice_dpll_pin pointers registered within dpll subsystem * @pin_num: number of pins * @phase_offset_ntf_mask: bitmask of pin indexes to notify @@ -2721,15 +2747,14 @@ static bool ice_dpll_is_pps_phase_monitor(struct ice_pf *pf) * * Context: Must be called while pf->dplls.lock is released. */ -static void ice_dpll_pins_notify_mask(struct ice_dpll_pin *pins, +static void ice_dpll_pins_notify_mask(struct ice_dplls *dplls, + struct ice_dpll_pin *pins, u8 pin_num, u32 phase_offset_ntf_mask) { - int i = 0; - - for (i = 0; i < pin_num; i++) - if (phase_offset_ntf_mask & (1 << i)) - dpll_pin_change_ntf(pins[i].pin); + for (int i = 0; i < pin_num; i++) + if (phase_offset_ntf_mask & BIT(i)) + ice_dpll_pin_ntf(dplls, pins[i].pin); } /** @@ -2905,7 +2930,7 @@ static void ice_dpll_periodic_work(struct kthread_work *work) ice_dpll_notify_changes(de); ice_dpll_notify_changes(dp); if (phase_offset_ntf) - ice_dpll_pins_notify_mask(d->inputs, d->num_inputs, + ice_dpll_pins_notify_mask(d, d->inputs, d->num_inputs, phase_offset_ntf); resched: -- cgit v1.2.3 From 9e5dead140af10e8b5f975b8f04e46197d48d274 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Mon, 27 Apr 2026 22:22:23 -0700 Subject: ice: add dpll peer notification for paired SMA and U.FL pins SMA and U.FL pins share physical signal paths in pairs (SMA1/U.FL1 and SMA2/U.FL2). When one pin's state changes via a PCA9575 GPIO write, the paired pin's state also changes, but no notification is sent for the peer pin. Userspace consumers monitoring the peer via dpll netlink subscribe never learn about the update. Add ice_dpll_sw_pin_notify_peer() which sends a change notification for the paired SW pin. Call it from ice_dpll_pin_sma_direction_set(), ice_dpll_sma_pin_state_set(), and ice_dpll_ufl_pin_state_set() after pf->dplls.lock is released. Use __dpll_pin_change_ntf() because dpll_lock is still held by the dpll netlink layer (dpll_pin_pre_doit). Fixes: 2dd5d03c77e2 ("ice: redesign dpll sma/u.fl pins control") Signed-off-by: Petr Oros Tested-by: Alexander Nowlin Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260427-jk-iwl-net-petr-oros-fixes-v1-11-cdcb48303fd8@intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_dpll.c | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 721a3f4d6a28..27b460926bac 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1154,6 +1154,32 @@ ice_dpll_input_state_get(const struct dpll_pin *pin, void *pin_priv, extack, ICE_DPLL_PIN_TYPE_INPUT); } +/** + * ice_dpll_sw_pin_notify_peer - notify the paired SW pin after a state change + * @d: pointer to dplls struct + * @changed: the SW pin that was explicitly changed (already notified by dpll core) + * + * SMA and U.FL pins share physical signal paths in pairs (SMA1/U.FL1 and + * SMA2/U.FL2). When one pin's routing changes via the PCA9575 GPIO + * expander, the paired pin's state may also change. Send a change + * notification for the peer pin so userspace consumers monitoring the + * peer via dpll netlink learn about the update. + * + * Context: Called from dpll_pin_ops callbacks after pf->dplls.lock is + * released. Uses __dpll_pin_change_ntf() because dpll_lock is + * still held by the dpll netlink layer. + */ +static void ice_dpll_sw_pin_notify_peer(struct ice_dplls *d, + struct ice_dpll_pin *changed) +{ + struct ice_dpll_pin *peer; + + peer = (changed >= d->sma && changed < d->sma + ICE_DPLL_PIN_SW_NUM) ? + &d->ufl[changed->idx] : &d->sma[changed->idx]; + if (peer->pin) + __dpll_pin_change_ntf(peer->pin); +} + /** * ice_dpll_sma_direction_set - set direction of SMA pin * @p: pointer to a pin @@ -1344,6 +1370,8 @@ ice_dpll_ufl_pin_state_set(const struct dpll_pin *pin, void *pin_priv, unlock: mutex_unlock(&pf->dplls.lock); + if (!ret) + ice_dpll_sw_pin_notify_peer(&pf->dplls, p); return ret; } @@ -1462,6 +1490,8 @@ ice_dpll_sma_pin_state_set(const struct dpll_pin *pin, void *pin_priv, unlock: mutex_unlock(&pf->dplls.lock); + if (!ret) + ice_dpll_sw_pin_notify_peer(&pf->dplls, sma); return ret; } @@ -1657,6 +1687,8 @@ ice_dpll_pin_sma_direction_set(const struct dpll_pin *pin, void *pin_priv, mutex_lock(&pf->dplls.lock); ret = ice_dpll_sma_direction_set(p, direction, extack); mutex_unlock(&pf->dplls.lock); + if (!ret) + ice_dpll_sw_pin_notify_peer(&pf->dplls, p); return ret; } -- cgit v1.2.3 From 58689498ca3384851145a754dbb1d8ed1cf9fb54 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Apr 2026 16:15:59 -0700 Subject: net: tls: fix strparser anchor skb leak on offload RX setup failure When tls_set_device_offload_rx() fails at tls_dev_add(), the error path calls tls_sw_free_resources_rx() to clean up the SW context that was initialized by tls_set_sw_offload(). This function calls tls_sw_release_resources_rx() (which stops the strparser via tls_strp_stop()) and tls_sw_free_ctx_rx() (which kfrees the context), but never frees the anchor skb that was allocated by alloc_skb(0) in tls_strp_init(). Note that tls_sw_free_resources_rx() is exclusively used for this "failed to start offload" code path, there's no other caller. The leak did not exist before commit 84c61fe1a75b ("tls: rx: do not use the standard strparser"), because the standard strparser doesn't try to pre-allocate an skb. The normal close path in tls_sk_proto_close() handles cleanup by calling tls_sw_strparser_done() (which calls tls_strp_done()) after dropping the socket lock, because tls_strp_done() does cancel_work_sync() and the strparser work handler takes the socket lock. Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Signed-off-by: Jakub Kicinski Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260428231559.1358502-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/tls/tls.h | 1 + net/tls/tls_strp.c | 6 ++++++ net/tls/tls_sw.c | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/net/tls/tls.h b/net/tls/tls.h index e8f81a006520..12f44cb649c9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -188,6 +188,7 @@ int tls_strp_dev_init(void); void tls_strp_dev_exit(void); void tls_strp_done(struct tls_strparser *strp); +void __tls_strp_done(struct tls_strparser *strp); void tls_strp_stop(struct tls_strparser *strp); int tls_strp_init(struct tls_strparser *strp, struct sock *sk); void tls_strp_data_ready(struct tls_strparser *strp); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 98e12f0ff57e..c72e88317627 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -624,6 +624,12 @@ void tls_strp_done(struct tls_strparser *strp) WARN_ON(!strp->stopped); cancel_work_sync(&strp->work); + __tls_strp_done(strp); +} + +/* For setup error paths where the strparser was initialized but never armed. */ +void __tls_strp_done(struct tls_strparser *strp) +{ tls_strp_anchor_free(strp); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 94d2ae0daa8c..798243eabb1f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2624,8 +2624,12 @@ void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) void tls_sw_free_resources_rx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx; + + ctx = tls_sw_ctx_rx(tls_ctx); tls_sw_release_resources_rx(sk); + __tls_strp_done(&ctx->strp); tls_sw_free_ctx_rx(tls_ctx); } -- cgit v1.2.3 From 051ffb001b8a232cfa6e72f38bb5f51c4270a60b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Apr 2026 09:48:17 +0300 Subject: sfc: fix error code in efx_devlink_info_running_versions() Return -EIO if efx_mcdi_rpc() doesn't return enough space. Fixes: 14743ddd2495 ("sfc: add devlink info support for ef100") Signed-off-by: Dan Carpenter Reviewed-by: Edward Cree Link: https://patch.msgid.link/afGpsbLRHL4_H0KS@stanley.mountain Signed-off-by: Paolo Abeni --- drivers/net/ethernet/sfc/efx_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index d842c60dfc10..e5c6f81af48b 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -531,7 +531,7 @@ static int efx_devlink_info_running_versions(struct efx_nic *efx, if (rc || outlength < MC_CMD_GET_VERSION_OUT_LEN) { netif_err(efx, drv, efx->net_dev, "mcdi MC_CMD_GET_VERSION failed\n"); - return rc; + return rc ?: -EIO; } /* Handle previous output */ -- cgit v1.2.3 From 1e01abec856593e02cd69fd95b784c10dd46880c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 29 Apr 2026 09:39:11 +0200 Subject: net/sched: cls_flower: revert unintended changes While applying the blamed commit 4ca07b9239bd ("net: mctp i2c: check length before marking flow active"), I unintentionally included unrelated and unacceptable changes. Revert them. Fixes: 4ca07b9239bd ("net: mctp i2c: check length before marking flow active") Reported-by: Jeremy Kerr Closes: https://lore.kernel.org/netdev/bd8704fe0bd53e278add5cde4873256656623e2e.camel@codeconstruct.com.au/ Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/043026a53ff84da88b17648c4b0d17f0331749cb.1777447863.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- net/sched/cls_flower.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b9672ea05747..88f8a32fab2b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -556,7 +556,6 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, struct netlink_ext_ack *extack) { struct cls_fl_head *head = fl_head_dereference(tp); - struct fl_flow_mask *mask; *last = false; @@ -573,12 +572,11 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, list_del_rcu(&f->list); spin_unlock(&tp->lock); - mask = f->mask; + *last = fl_mask_put(head, f->mask); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f, rtnl_held, extack); tcf_unbind_filter(tp, &f->res); __fl_put(f); - *last = fl_mask_put(head, mask); return 0; } -- cgit v1.2.3 From 7dd57d7a6350770dfc283287125c409e995200e0 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Thu, 30 Apr 2026 11:56:44 +0200 Subject: accel/ivpu: Disallow re-exporting imported GEM objects Prevent re-exporting of imported GEM buffers by adding a custom prime_handle_to_fd callback that checks if the object is imported and returns -EOPNOTSUPP if so. Re-exporting imported GEM buffers causes loss of buffer flags settings, leading to incorrect device access and data corruption. Reported-by: Yametsu Fixes: 57557964b582 ("accel/ivpu: Add support for userptr buffer objects") Reviewed-by: Andrzej Kacprowski Signed-off-by: Karol Wachowski Cc: # v6.19+ --- drivers/accel/ivpu/ivpu_drv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 2801378e3e19..3b7b008bccfe 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -537,6 +537,26 @@ static const struct file_operations ivpu_fops = { #endif }; +static int ivpu_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, + u32 handle, u32 flags, int *prime_fd) +{ + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(file_priv, handle); + if (!obj) + return -ENOENT; + + if (drm_gem_is_imported(obj)) { + /* Do not allow re-exporting */ + drm_gem_object_put(obj); + return -EOPNOTSUPP; + } + + drm_gem_object_put(obj); + + return drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); +} + static const struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, @@ -545,6 +565,7 @@ static const struct drm_driver driver = { .gem_create_object = ivpu_gem_create_object, .gem_prime_import = ivpu_gem_prime_import, + .prime_handle_to_fd = ivpu_gem_prime_handle_to_fd, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), -- cgit v1.2.3 From d3b7a868f1aeb6a43de880a3709ef3a0341f2c2a Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Wed, 29 Apr 2026 08:20:56 -0500 Subject: platform/wmi: Fix unchecked min_size in wmidev_invoke_method() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After calling wmidev_evaluate_method(), if the ACPI core does not return an out object, then wmidev_invoke_method() bypasses the min_size check and returns 0. Add a check for min_size if there is not an out object. Fixes: 1aeded2f55f0 ("platform/wmi: Extend wmidev_query_block() to reject undersized data") Closes: https://sashiko.dev/#/patchset/20260406203237.2970-1-W_Armin%40gmx.de Signed-off-by: Kurt Borja Reviewed-by: Armin Wolf Link: https://patch.msgid.link/20260429-invoke-fix-v1-1-ce938eb80cd3@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/wmi/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c index 7aa40dab6145..5a2ffcbab6af 100644 --- a/drivers/platform/wmi/core.c +++ b/drivers/platform/wmi/core.c @@ -411,6 +411,9 @@ int wmidev_invoke_method(struct wmi_device *wdev, u8 instance, u32 method_id, obj = aout.pointer; if (!obj) { + if (min_size != 0) + return -ENOMSG; + out->length = 0; out->data = ZERO_SIZE_PTR; -- cgit v1.2.3 From c2d4b76458c9ebf81eae2916970320f1f61ad002 Mon Sep 17 00:00:00 2001 From: Krishna Chomal Date: Wed, 29 Apr 2026 23:39:53 +0530 Subject: platform/x86: hp-wmi: silence unknown board warning for 8D41 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Omen Max 16-ah0xxx, DMI board ID 8D41 is currently marked with victus_s_thermal_params in the victus_s_thermal_profile_boards[] list. This disables thermal profile readback and adds a dmesg warning during driver init for "unknown board". After testing we know that (similar to another HP Omen Max 16 device, board ID 8D87), the embedded controller on this board does not expose thermal profile which means we have to intentionally disable EC readback. Changing its driver_data to omen_v1_no_ec_thermal_params is sufficient to silence the warning. Tested-by: Benjamin Y Signed-off-by: Krishna Chomal Link: https://patch.msgid.link/20260429180953.129885-1-krishna.chomal108@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index d1cc6e7d176c..24c151289dd3 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -243,7 +243,7 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8D41") }, - .driver_data = (void *)&victus_s_thermal_params, + .driver_data = (void *)&omen_v1_no_ec_thermal_params, }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8D87") }, -- cgit v1.2.3 From 863810d4985ad214f70c1623f24384ccc850f2a2 Mon Sep 17 00:00:00 2001 From: Yufei CHENG Date: Mon, 27 Apr 2026 00:50:34 +0800 Subject: platform/x86: lenovo: wmi-other: Fix uninitialized variable in lwmi_om_hwmon_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the flag relax_fan_constraint is set, local variable 'raw' is never assigned, and lwmi_om_hwmon_write() will pass uninitialized value to lwmi_om_fan_get_set() resulting in undefined behavior. This flag allows user to bypass minimum fan RPM divisor rounding, but assignment to 'raw' only happens in the non-relaxed path. Fix by defaulting 'raw' to user provided 'val' in the else branch. Fixes: 51ed34282f63 ("platform/x86: lenovo-wmi-other: Add HWMON for fan reporting/tuning") Reviewed-by: Rong Zhang Signed-off-by: Yufei CHENG Link: https://patch.msgid.link/20260426165034.9073-1-cd345al@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-other.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 6040f45aa2b0..6c2febe1a595 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -349,6 +349,8 @@ static int lwmi_om_hwmon_write(struct device *dev, enum hwmon_sensor_types type, */ if (!relax_fan_constraint) raw = val / LWMI_FAN_DIV * LWMI_FAN_DIV; + else + raw = val; err = lwmi_om_fan_get_set(priv, channel, &raw, true); if (err) -- cgit v1.2.3 From 17666e2d7592c3e85260cafd3950121524acc2c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Apr 2026 14:29:02 -0600 Subject: io_uring/tw: serialize ctx->retry_llist with ->uring_lock The DEFER_TASKRUN local task work paths all run under ctx->uring_lock, which serializes them with each other and with the rest of the ring's hot paths. io_move_task_work_from_local() is the exception - it's called from io_ring_exit_work() on a kworker without holding the lock and from the iopoll cancelation side right after dropping it. ->work_llist is fine with this, as it's only ever updated via the expected paths. But the ->retry_llist is updated while runing, and hence it could potentially race between normal task_work running and the task-has-exited shutdown path. Simply grab ->uring_lock while moving the local work to the fallback list for exit purposes, which nicely serializes it across both the normal additions and the exit prune path. Cc: stable@vger.kernel.org Fixes: f46b9cdb22f7 ("io_uring: limit local tw done") Reported-by: Robert Femmer Reported-by: Christian Reitter Reported-by: Michael Rodler Signed-off-by: Jens Axboe --- io_uring/tw.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/io_uring/tw.c b/io_uring/tw.c index fdff81eebc95..023d5e6bc491 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c @@ -273,8 +273,18 @@ void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags) void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) { - struct llist_node *node = llist_del_all(&ctx->work_llist); + struct llist_node *node; + /* + * Running the work items may utilize ->retry_llist as a means + * for capping the number of task_work entries run at the same + * time. But that list can potentially race with moving the work + * from here, if the task is exiting. As any normal task_work + * running holds ->uring_lock already, just guard this slow path + * with ->uring_lock to avoid racing on ->retry_llist. + */ + guard(mutex)(&ctx->uring_lock); + node = llist_del_all(&ctx->work_llist); __io_fallback_tw(node, false); node = llist_del_all(&ctx->retry_llist); __io_fallback_tw(node, false); -- cgit v1.2.3 From 99ebc509eef52675ae5bae86aa2a53e15594e4a3 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Wed, 29 Apr 2026 15:31:05 +0800 Subject: mm: memcontrol: fix rcu unbalance in get_non_dying_memcg_end() Currently, get_non_dying_memcg_start() and get_non_dying_memcg_end() both evaluate cgroup_subsys_on_dfl(memory_cgrp_subsys) independently to determine whether to acquire or release the RCU read lock. However, the result of cgroup_subsys_on_dfl() can change dynamically at runtime due to cgroup hierarchy rebinding (e.g., when the memory controller is moved between cgroup v1 and v2 hierarchies). This can cause the following warning: ===================================== WARNING: bad unlock balance detected! 7.0.0-next-20260420+ #83 Tainted: G W ------------------------------------- memcg-repro/270 is trying to release lock (rcu_read_lock) at: [] rcu_read_unlock+0x17/0x60 but there are no more locks to release! other info that might help us debug this: 1 lock held by memcg-repro/270: #0: ffff888102fa2088 (vm_lock){++++}-{0:0}, at: do_user_addr_fault+0x285/0x880 stack backtrace: CPU: 0 UID: 0 PID: 270 Comm: memcg-repro Tainted: G W 7.0.0-next-20260420+ # Tainted: [W]=WARN Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: ? rcu_read_unlock+0x17/0x60 dump_stack_lvl+0x77/0xb0 print_unlock_imbalance_bug+0xe0/0xf0 ? rcu_read_unlock+0x17/0x60 lock_release+0x21d/0x2a0 rcu_read_unlock+0x1c/0x60 do_pte_missing+0x233/0xb40 __handle_mm_fault+0x80e/0xcd0 handle_mm_fault+0x146/0x310 do_user_addr_fault+0x303/0x880 exc_page_fault+0x9b/0x270 asm_exc_page_fault+0x26/0x30 RIP: 0033:0x5590e4eb41ea Code: 61 cc 66 0f 6f e0 66 0f 61 c2 66 0f db cd 66 0f 69 e2 66 0f 6f d0 66 0f 69 d4 66 0f 61 0 RSP: 002b:00007ffcad25f030 EFLAGS: 00010202 RAX: 00005590e4eb8010 RBX: 00007ffcad260f7d RCX: 00007f73c474d44d RDX: 00005590e4eb80a0 RSI: 00005590e4eb503c RDI: 000000000000000f RBP: 00005590e4eb70a0 R08: 0000000000000000 R09: 00007f73c483a680 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffcad25f180 R14: 00005590e4eb6dd8 R15: 00007f73c4869020 ------------[ cut here ]------------ Fix this by explicitly tracking the RCU lock state, ensuring that rcu_read_unlock() in get_non_dying_memcg_end() is strictly paired with the lock acquisition, regardless of any runtime rebinding events. Link: https://lore.kernel.org/20260429073105.44472-1-qi.zheng@linux.dev Fixes: 8285917d6f38 ("mm: memcontrol: prepare for reparenting non-hierarchical stats") Signed-off-by: Qi Zheng Acked-by: Shakeel Butt Reviewed-by: Muchun Song Cc: Johannes Weiner Cc: Michal Hocko Cc: Qi Zheng Cc: Roman Gushchin Signed-off-by: Andrew Morton --- mm/memcontrol.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3d98ab41f1f..c03d4787d466 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -805,12 +805,17 @@ static long memcg_state_val_in_pages(int idx, long val) * Used in mod_memcg_state() and mod_memcg_lruvec_state() to avoid race with * reparenting of non-hierarchical state_locals. */ -static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg) +static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg, + bool *rcu_locked) { - if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + /* Rebinding can cause this value to be changed at runtime */ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) { + *rcu_locked = false; return memcg; + } rcu_read_lock(); + *rcu_locked = true; while (memcg_is_dying(memcg)) memcg = parent_mem_cgroup(memcg); @@ -818,20 +823,21 @@ static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *me return memcg; } -static inline void get_non_dying_memcg_end(void) +static inline void get_non_dying_memcg_end(bool rcu_locked) { - if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + if (!rcu_locked) return; rcu_read_unlock(); } #else -static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg) +static inline struct mem_cgroup *get_non_dying_memcg_start(struct mem_cgroup *memcg, + bool *rcu_locked) { return memcg; } -static inline void get_non_dying_memcg_end(void) +static inline void get_non_dying_memcg_end(bool rcu_locked) { } #endif @@ -865,12 +871,14 @@ static void __mod_memcg_state(struct mem_cgroup *memcg, void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int val) { + bool rcu_locked = false; + if (mem_cgroup_disabled()) return; - memcg = get_non_dying_memcg_start(memcg); + memcg = get_non_dying_memcg_start(memcg, &rcu_locked); __mod_memcg_state(memcg, idx, val); - get_non_dying_memcg_end(); + get_non_dying_memcg_end(rcu_locked); } #ifdef CONFIG_MEMCG_V1 @@ -933,14 +941,15 @@ static void mod_memcg_lruvec_state(struct lruvec *lruvec, struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct mem_cgroup_per_node *pn; struct mem_cgroup *memcg; + bool rcu_locked = false; pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - memcg = get_non_dying_memcg_start(pn->memcg); + memcg = get_non_dying_memcg_start(pn->memcg, &rcu_locked); pn = memcg->nodeinfo[pgdat->node_id]; __mod_memcg_lruvec_state(pn, idx, val); - get_non_dying_memcg_end(); + get_non_dying_memcg_end(rcu_locked); } /** -- cgit v1.2.3 From 92a8b5e2eff6920bf815cd6a80b088ec3fdf01a3 Mon Sep 17 00:00:00 2001 From: Yuriy Padlyak Date: Thu, 30 Apr 2026 01:09:03 +0300 Subject: ALSA: hda/realtek: Fix speaker silence after S3 resume on Xiaomi Mi Laptop Pro 15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Xiaomi Mi Laptop Pro 15 (TM1905, subsystem 1d72:1905) ships with the Realtek ALC256 codec on Intel Comet Lake PCH-LP. After S3 resume the codec sets coefficient register 0x10 to 0x0220 instead of 0x0020 — bit 9 is erroneously set, which silences the internal speaker. Bluetooth and HDMI audio are unaffected because they use different paths. This is the same mechanism fixed for Clevo NJ51CU by commit edca7cc4b0ac ("ALSA: hda/realtek: Fix quirk for Clevo NJ51CU"), but the existing ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME also reconfigures pin 0x19 as a front mic, which is wrong for this Xiaomi where pin 0x19 default is 0x411111f0 (disabled). Add a minimal fixup that only clears the stuck coef bit, and add the Xiaomi SSID to the quirk table. Verified by reading coef 0x10 with hda-verb after resume (returns 0x0220), writing 0x0020, and confirming the internal speaker resumes output. With this fixup applied the bit is cleared on every codec init, including post-resume. Signed-off-by: Yuriy Padlyak Cc: Tested-by: Yuriy Padlyak Link: https://patch.msgid.link/20260429220903.14918-1-yuriypadlyak@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index a9cd03bb73c4..9a5747c2e359 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3390,6 +3390,19 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, } } +static void alc256_fixup_xiaomi_pro15_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * On the Xiaomi Mi Laptop Pro 15 (TM1905, SSID 1d72:1905) the ALC256 + * codec sets coefficient 0x10 bit 9 to 1 after S3 resume, silencing + * the internal speaker. Bluetooth and HDMI audio are unaffected. + * Clear the bit so the speaker keeps working across suspend cycles. + */ + alc_update_coef_idx(codec, 0x10, 1<<9, 0); +} + static void alc256_decrease_headphone_amp_val(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4052,6 +4065,7 @@ enum { ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, + ALC256_FIXUP_XIAOMI_PRO15_RESUME, ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, ALC287_FIXUP_LEGION_16ACHG6, @@ -6240,6 +6254,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC256_FIXUP_XIAOMI_PRO15_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_xiaomi_pro15_resume, + }, [ALC287_FIXUP_LEGION_16ACHG6] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_legion_16achg6_speakers, @@ -7774,6 +7792,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1905, "Xiaomi Mi Laptop Pro 15", ALC256_FIXUP_XIAOMI_PRO15_RESUME), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1e39, 0xca14, "MEDION NM14LNL", ALC233_FIXUP_MEDION_MTL_SPK), -- cgit v1.2.3 From 0bf00859d7a5ab685901c36f29df063b825cfaaa Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 28 Apr 2026 12:25:46 +0200 Subject: netfilter: nf_socket: skip socket lookup for non-first fragments Both nft_socket and xt_socket relies on L4 headers to perform socket lookup in the slow path. For fragmented packets, while the IP protocol remains constant across all fragments, only the first fragment contains the actual L4 header. As the expression/match could be attached to a chain with a priority lower than -400, it could bypass defragmentation. Add a check for fragmentation in the lookup functions directly so the problem is handled for both nft_socket and xt_socket at the same time. In addition, future users of the functions would not need to care about this. Fixes: 902d6a4c2a4f ("netfilter: nf_defrag: Skip defrag if NOTRACK is set") Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_socket_ipv4.c | 3 +++ net/ipv6/netfilter/nf_socket_ipv6.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 5080fa5fbf6a..f9c6755f5ec5 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -94,6 +94,9 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, #endif int doff = 0; + if (ntohs(iph->frag_off) & IP_OFFSET) + return NULL; + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp; diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index ced8bd44828e..893f2aeb4711 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -100,6 +100,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; + unsigned short fragoff = 0; int doff = 0; int thoff = 0, tproto; #if IS_ENABLED(CONFIG_NF_CONNTRACK) @@ -107,8 +108,8 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, struct nf_conn const *ct; #endif - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NULL; } -- cgit v1.2.3 From 009d203e56dbe8db2589455b9e3644955f30313a Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 28 Apr 2026 12:25:47 +0200 Subject: netfilter: nf_tables: skip L4 header parsing for non-first fragments The tproxy, osf and exthdr (SCTP) expressions rely on the presence of transport layer headers to perform socket lookups, fingerprint matching, or chunk extraction. For fragmented packets, while the IP protocol remains constant across all fragments, only the first fragment contains the actual L4 header. The expressions could be attached to a chain with a priority lower than -400, bypassing defragmentation. Or could be used in stateless environments where defragmentation is not happening at all. This could result in garbage data being used for the matching. Add a check for pkt->fragoff so only unfragmented packets or the first fragment is processed. Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks") Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_exthdr.c | 2 +- net/netfilter/nft_osf.c | 2 +- net/netfilter/nft_tproxy.c | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 5ddd5b6e135f..8ab186f86dd4 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -153,7 +153,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb) + pkt->nhoff; else { - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return false; ptr = skb->data + nft_thoff(pkt); } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 0407d6f708ae..e6a07c0df207 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -376,7 +376,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr, const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; - if (pkt->tprot != IPPROTO_SCTP) + if (pkt->tprot != IPPROTO_SCTP || pkt->fragoff) goto err; do { diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index c02d5cb52143..45fe56da5044 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -33,7 +33,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - if (pkt->tprot != IPPROTO_TCP) { + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index f2101af8c867..89be443734f6 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -30,8 +30,8 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr, __be16 tport = 0; struct sock *sk; - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } @@ -97,8 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, memset(&taddr, 0, sizeof(taddr)); - if (pkt->tprot != IPPROTO_TCP && - pkt->tprot != IPPROTO_UDP) { + if ((pkt->tprot != IPPROTO_TCP && + pkt->tprot != IPPROTO_UDP) || pkt->fragoff) { regs->verdict.code = NFT_BREAK; return; } -- cgit v1.2.3 From 952e121c96137c73bd3e59bb20a93ef659376947 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 28 Apr 2026 12:25:48 +0200 Subject: netfilter: xtables: fix L4 header parsing for non-first fragments Multiple targets and matches relies on L4 header to operate. For fragmented packets, every fragment carries the transport protocol identifier, but only the first fragment contains the L4 header. As the 'raw' table can be configured to run at priority -450 (before defragmentation at -400), the target/match can be reached before reassembly. In this case, non-first fragments have their payload incorrectly parsed as a TCP/UDP header. This would be of course a misconfiguration scenario. In most of the cases this just lead to a unreliable behavior for fragmented traffic. Add a fragment check to ensure target/match only evaluates unfragmented packets or the first fragment in the stream. Fixes: 902d6a4c2a4f ("netfilter: nf_defrag: Skip defrag if NOTRACK is set") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TPROXY.c | 11 +++++++++-- net/netfilter/xt_ecn.c | 4 ++++ net/netfilter/xt_hashlimit.c | 4 +++- net/netfilter/xt_osf.c | 3 +++ net/netfilter/xt_tcpmss.c | 4 ++++ 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e4bea1d346cf..5f60e7298a1e 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -86,6 +86,9 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -95,6 +98,9 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + if (par->fragoff) + return NF_DROP; + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } @@ -106,6 +112,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; + unsigned short fragoff = 0; struct udphdr _hdr, *hp; struct sock *sk; const struct in6_addr *laddr; @@ -113,8 +120,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) int thoff = 0; int tproto; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) + tproto = ipv6_find_hdr(skb, &thoff, -1, &fragoff, NULL); + if (tproto < 0 || fragoff) return NF_DROP; hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index b96e8203ac54..a8503f5d26bf 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c @@ -30,6 +30,10 @@ static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) struct tcphdr _tcph; const struct tcphdr *th; + /* this is fine for IPv6 as ecn_mt_check6() enforces -p tcp */ + if (par->fragoff) + return false; + /* In practice, TCP match does this, so can't fail. But let's * be good citizens. */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 3bd127bfc114..2704b4b60d1e 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -658,6 +658,8 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, if (!(hinfo->cfg.mode & (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) return 0; + if (ntohs(ip_hdr(skb)->frag_off) & IP_OFFSET) + return -1; nexthdr = ip_hdr(skb)->protocol; break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -681,7 +683,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; nexthdr = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if ((int)protoff < 0) + if ((int)protoff < 0 || ntohs(frag_off) & IP6_OFFSET) return -1; break; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index dc9485854002..e8807caede68 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -27,6 +27,9 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) { + if (p->fragoff) + return false; + return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p), xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers); } diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 0d32d4841cb3..b9da8269161d 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -32,6 +32,10 @@ tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) u8 _opt[15 * 4 - sizeof(_tcph)]; unsigned int i, optlen; + /* this is fine for IPv6 as xt_tcpmss enforces -p tcp */ + if (par->fragoff) + return false; + /* If we don't have the whole header, drop packet. */ th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) -- cgit v1.2.3 From ef4f741e8627512cb8c82f59a1fc7aacd854aadf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 30 Apr 2026 16:49:48 +0200 Subject: netfilter: flowtable: ensure sufficient headroom in xmit path Check for headroom and call skb_expand_head() like in the IP output path to ensure there is sufficient headroom for the mac header when forwarding this packet as suggested by sashiko. Fixes: b5964aac51e0 ("netfilter: flowtable: consolidate xmit path") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index dbd7644fdbeb..8d5fb7e940a1 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -471,8 +471,17 @@ struct nf_flow_xmit { static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, struct nf_flow_xmit *xmit) { - skb->dev = xmit->outdev; - dev_hard_header(skb, skb->dev, ntohs(skb->protocol), + struct net_device *dev = xmit->outdev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) + return NF_STOLEN; + } + + skb->dev = dev; + dev_hard_header(skb, dev, ntohs(skb->protocol), xmit->dest, xmit->source, skb->len); dev_queue_xmit(skb); -- cgit v1.2.3 From 3fe7ecab1a0856aafe1026a35af1621a5c18d53f Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 27 Apr 2026 07:17:19 +0200 Subject: s390/pai: Disable duplicate read of kernel PAI counter value The PAI crypto counter design allows for user space and kernel space PAI counter increment recording. This is achieved by splitting the recording page in half. The upper part of the 4KB page records user space increments of PAI crypto counter and the lower half records kernel space increments. The page itself looks like: lowcore ptr ---> ++++++++++++++++++++++++ |user space area | +----------------------+ |kernel space area | ++++++++++++++++++++++++ User space and kernel space entries are handled via a kernel_offset value when wrting. For PAI crypto counters this offset is 2048 or half of a page size. For PAI NNPA counter design this distinction was not needed. There is no user and kernel space part for the page pointed to by lowcore. The set up is: lowcore ptr ---> ++++++++++++++++++++++++ |user + kernel space | |area | | | ++++++++++++++++++++++++ There is always only one counter value recorded and saved. Depending on number of CPUs and machine load, the number of PAI NNPA counter increment differs between counting (perf stat) and recording (perf record). The number reported by sampling was double the number shown by counting. This was caused by a double read of the PAI NNPA values in function pai_copy(). The first part of that function reads the kernel space part. The offset into the kernel page part must be larger than zero. The second part of that function reads the user space part, which begins of offset zero. This works fine for PAI crypto counters. It fails for PAI NNPA counters because the PMU device driver does not support that feature and has a kernel_offset value of 0x0. Executing both user and kernel space read out might end up reading user space value twice. For the PAI NNPA PMU prohibit the kernel space part read out. Cc: stable@vger.kernel.org Fixes: f12473541356 ("s390/pai_crypto: Rename paicrypt_copy() to pai_copy()") Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c index 86f71a3d1ef2..f13c5c5fbea6 100644 --- a/arch/s390/kernel/perf_pai.c +++ b/arch/s390/kernel/perf_pai.c @@ -651,7 +651,7 @@ static void pai_have_sample(struct perf_event *event, struct pai_map *cpump) rawsize = pai_copy(cpump->save, cpump->area, pp, (unsigned long *)PAI_SAVE_AREA(event), event->attr.exclude_user, - event->attr.exclude_kernel); + !pp->kernel_offset ? true : event->attr.exclude_kernel); if (rawsize) /* No incremented counters */ pai_push_sample(rawsize, cpump, event); } -- cgit v1.2.3 From d6cc7c99bf1f73eda7d565d224d791d16239bb41 Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Thu, 16 Apr 2026 21:59:30 +0000 Subject: hwmon: (ltc2992) Clamp threshold writes to hardware range ltc2992_set_voltage(), ltc2992_set_current(), and ltc2992_set_power() do not validate the user-supplied value before converting it to a register value. This can result in: 1. Negative input values wrapping to large positive register values. For power, the negative long is implicitly cast to u64 in mul_u64_u32_div(), producing an incorrect value. For voltage and current, the negative converted value wraps when passed to ltc2992_write_reg() as a u32. 2. Intermediate arithmetic exceeding the range representable in u64 on 64-bit platforms. In ltc2992_set_voltage(), (u64)val * 1000 can exceed U64_MAX when val is a large positive long. In ltc2992_set_current(), (u64)val * r_sense_uohm can overflow similarly. In ltc2992_set_power(), the computed value may not fit in u64. 3. Register values exceeding the hardware field width. Voltage and current threshold registers are 12-bit (stored left-justified in 16 bits), and power threshold registers are 24-bit. Without clamping, bits above the field width are truncated in ltc2992_write_reg(). Fix by clamping negative values to zero, clamping positive values to the rounded hardware-representable maximum (the value returned by the read path for a full-scale register) to prevent intermediate overflow, and clamping the converted register value to the hardware field width before writing. The existing conversion formula and rounding behavior are preserved. In the power write path, cancel the factor of 1000 from both the numerator (r_sense_uohm * 1000) and the denominator (VADC_UV_LSB * IADC_NANOV_LSB) to also eliminate a u32 overflow of r_sense_uohm * 1000 when r_sense_uohm exceeds about 4.29 ohms. Fixes: b0bd407e94b03 ("hwmon: (ltc2992) Add support") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260416215904.101969-2-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2992.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 1fcd320d6161..106973619676 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -431,10 +431,16 @@ static int ltc2992_get_voltage(struct ltc2992_state *st, u32 reg, u32 scale, lon static int ltc2992_set_voltage(struct ltc2992_state *st, u32 reg, u32 scale, long val) { - val = DIV_ROUND_CLOSEST(val * 1000, scale); - val = val << 4; + u32 reg_val; + long vmax; + + vmax = DIV_ROUND_CLOSEST_ULL(0xFFFULL * scale, 1000); + val = max(val, 0L); + val = min(val, vmax); + reg_val = min(DIV_ROUND_CLOSEST_ULL((u64)val * 1000, scale), + 0xFFFULL) << 4; - return ltc2992_write_reg(st, reg, 2, val); + return ltc2992_write_reg(st, reg, 2, reg_val); } static int ltc2992_read_gpio_alarm(struct ltc2992_state *st, int nr_gpio, u32 attr, long *val) @@ -559,9 +565,15 @@ static int ltc2992_get_current(struct ltc2992_state *st, u32 reg, u32 channel, l static int ltc2992_set_current(struct ltc2992_state *st, u32 reg, u32 channel, long val) { u32 reg_val; + long cmax; - reg_val = DIV_ROUND_CLOSEST(val * st->r_sense_uohm[channel], LTC2992_IADC_NANOV_LSB); - reg_val = reg_val << 4; + cmax = DIV_ROUND_CLOSEST_ULL(0xFFFULL * LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); + val = max(val, 0L); + val = min(val, cmax); + reg_val = min(DIV_ROUND_CLOSEST_ULL((u64)val * st->r_sense_uohm[channel], + LTC2992_IADC_NANOV_LSB), + 0xFFFULL) << 4; return ltc2992_write_reg(st, reg, 2, reg_val); } @@ -634,9 +646,18 @@ static int ltc2992_get_power(struct ltc2992_state *st, u32 reg, u32 channel, lon static int ltc2992_set_power(struct ltc2992_state *st, u32 reg, u32 channel, long val) { u32 reg_val; - - reg_val = mul_u64_u32_div(val, st->r_sense_uohm[channel] * 1000, - LTC2992_VADC_UV_LSB * LTC2992_IADC_NANOV_LSB); + u64 pmax, uval; + + uval = max(val, 0L); + pmax = mul_u64_u32_div(0xFFFFFFULL, + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); + uval = min(uval, pmax); + reg_val = min(mul_u64_u32_div(uval, st->r_sense_uohm[channel], + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB), + 0xFFFFFFULL); return ltc2992_write_reg(st, reg, 3, reg_val); } -- cgit v1.2.3 From 2da0c1fd01dbd6b22844e8676585153dfc660cbe Mon Sep 17 00:00:00 2001 From: Sanman Pradhan Date: Thu, 16 Apr 2026 21:59:40 +0000 Subject: hwmon: (ltc2992) Fix u32 overflow in power read path ltc2992_get_power() computes the divisor for mul_u64_u32_div() as r_sense_uohm * 1000. This multiplication overflows u32 when r_sense_uohm exceeds about 4.29 ohms (4294967 micro-ohms), producing a truncated divisor and an incorrect power reading. Cancel the factor of 1000 from both the numerator (VADC_UV_LSB * IADC_NANOV_LSB = 312500000) and the divisor (r_sense_uohm * 1000), giving (VADC_UV_LSB / 1000) * IADC_NANOV_LSB = 312500 as the numerator and plain r_sense_uohm as the divisor. The cancellation is exact because LTC2992_VADC_UV_LSB (25000) is divisible by 1000. This is the read-path counterpart of the write-path fix applied in the preceding patch. Fixes: b0bd407e94b03 ("hwmon: (ltc2992) Add support") Cc: stable@vger.kernel.org Signed-off-by: Sanman Pradhan Link: https://lore.kernel.org/r/20260416215904.101969-3-sanman.pradhan@hpe.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2992.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 106973619676..2617c4538af9 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -637,8 +637,10 @@ static int ltc2992_get_power(struct ltc2992_state *st, u32 reg, u32 channel, lon if (reg_val < 0) return reg_val; - *val = mul_u64_u32_div(reg_val, LTC2992_VADC_UV_LSB * LTC2992_IADC_NANOV_LSB, - st->r_sense_uohm[channel] * 1000); + *val = mul_u64_u32_div(reg_val, + LTC2992_VADC_UV_LSB / 1000 * + LTC2992_IADC_NANOV_LSB, + st->r_sense_uohm[channel]); return 0; } -- cgit v1.2.3 From 5ed26ffe57ffca054b7a141ff0c1a07bf3a80f6c Mon Sep 17 00:00:00 2001 From: Ninad Naik Date: Sat, 18 Apr 2026 00:44:11 +0530 Subject: Documentation: hwmon: fix link to ideapad-laptop.c file The ideapad-laptop.c file now exists inside drivers/platform/x86/lenovo/ directory. Updating the GitHub link to the correct path. Signed-off-by: Ninad Naik Link: https://lore.kernel.org/r/20260417191411.713958-1-ninadnaik07@gmail.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/yogafan.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/yogafan.rst b/Documentation/hwmon/yogafan.rst index c553a381f772..68761947a1a8 100644 --- a/Documentation/hwmon/yogafan.rst +++ b/Documentation/hwmon/yogafan.rst @@ -135,4 +135,4 @@ References 4. **Lenovo IdeaPad Laptop Driver:** Reference for DMI-based hardware feature gating in Lenovo laptops. - https://github.com/torvalds/linux/blob/master/drivers/platform/x86/ideapad-laptop.c + https://github.com/torvalds/linux/blob/master/drivers/platform/x86/lenovo/ideapad-laptop.c -- cgit v1.2.3 From 1a1414c675ee1b637bbe3840241555a49c61b123 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 25 Apr 2026 20:03:19 -0400 Subject: hwmon: Remove stale CONFIG_SENSORS_SBRMI Makefile reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kconfiglint reports: X001: CONFIG_SENSORS_SBRMI referenced in Makefile but not defined in any Kconfig The SB-RMI hardware monitoring driver was originally introduced in commit 5a0f50d110b3 ("hwmon: Add support for SB-RMI power module") with both a Kconfig entry (CONFIG_SENSORS_SBRMI) and a Makefile line (obj-$(CONFIG_SENSORS_SBRMI) += sbrmi.o) in drivers/hwmon/. Commit e156586764050 ("hwmon/misc: amd-sbi: Move core sbrmi from hwmon to misc") moved the driver to drivers/misc/amd-sbi/ to support additional functionality beyond hardware monitoring. That commit correctly removed the Kconfig entry from drivers/hwmon/Kconfig, moved the source file drivers/hwmon/sbrmi.c to drivers/misc/amd-sbi/sbrmi.c, and created new Kconfig/Makefile entries in drivers/misc/amd-sbi/ with a renamed symbol (CONFIG_AMD_SBRMI_I2C). However, the Makefile line in drivers/hwmon/Makefile was not removed in that commit. The orphaned line references a CONFIG symbol that no longer exists and a source file that is no longer present, so it has no effect on the build — but it is dead code that should be cleaned up. Remove the stale Makefile reference. Assisted-by: Claude:claude-opus-4-6 kconfiglint Signed-off-by: Sasha Levin Link: https://lore.kernel.org/r/20260426000319.55908-1-sashal@kernel.org Signed-off-by: Guenter Roeck --- drivers/hwmon/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 4788996aa137..982ee2c6f9de 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -201,7 +201,6 @@ obj-$(CONFIG_SENSORS_PWM_FAN) += pwm-fan.o obj-$(CONFIG_SENSORS_QNAP_MCU_HWMON) += qnap-mcu-hwmon.o obj-$(CONFIG_SENSORS_RASPBERRYPI_HWMON) += raspberrypi-hwmon.o obj-$(CONFIG_SENSORS_SBTSI) += sbtsi_temp.o -obj-$(CONFIG_SENSORS_SBRMI) += sbrmi.o obj-$(CONFIG_SENSORS_SCH56XX_COMMON)+= sch56xx-common.o obj-$(CONFIG_SENSORS_SCH5627) += sch5627.o obj-$(CONFIG_SENSORS_SCH5636) += sch5636.o -- cgit v1.2.3 From 174606451fbb17db506ebaacdd5e203e57773d5f Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 22:50:51 +0900 Subject: hwmon: (corsair-psu) Close HID device on probe errors corsairpsu_probe() opens the HID device before sending the device init and firmware-info commands. If either command fails, the error path jumps directly to fail_and_stop and skips hid_hw_close(). Use the existing fail_and_close label for those post-open failures so the open count and low-level close callback are balanced before hid_hw_stop(). Fixes: d115b51e0e56 ("hwmon: add Corsair PSU HID controller driver") Cc: stable@vger.kernel.org Signed-off-by: Myeonghun Pak Reviewed-by: Wilken Gottwalt Link: https://lore.kernel.org/r/20260424135107.13720-1-mhun512@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/corsair-psu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index dddbd2463f8d..76f3e1da68d0 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -796,13 +796,13 @@ static int corsairpsu_probe(struct hid_device *hdev, const struct hid_device_id ret = corsairpsu_init(priv); if (ret < 0) { dev_err(&hdev->dev, "unable to initialize device (%d)\n", ret); - goto fail_and_stop; + goto fail_and_close; } ret = corsairpsu_fwinfo(priv); if (ret < 0) { dev_err(&hdev->dev, "unable to query firmware (%d)\n", ret); - goto fail_and_stop; + goto fail_and_close; } corsairpsu_get_criticals(priv); -- cgit v1.2.3 From d272b8d2dd132de8579e3f79a77bc6ae58214a93 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 30 Apr 2026 12:53:50 +0800 Subject: riscv: cpufeature: Drop this_hwcap clear in T-Head vector workaround The variable this_hwcap is initialized to 0 for each loop, it is not necessary to do the bit clearance since this_hwcap is still 0 at this point, clearing the source_isa is enough here. Signed-off-by: Hui Wang Link: https://patch.msgid.link/20260430045350.22213-1-hui.wang@canonical.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/cpufeature.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 1734f9a4c2fd..3dc4c0d31550 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -896,10 +896,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * CPU cores with the ratified spec will contain non-zero * marchid. */ - if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) { - this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; + if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) clear_bit(RISCV_ISA_EXT_v, source_isa); - } riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); -- cgit v1.2.3 From 18ed60e33e6c77d62409c1343dec1c61bae3d2e7 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 29 Apr 2026 16:21:41 +0800 Subject: net: mctp: test: use a zeroed struct sockaddr_mctp Invalid sockaddr padding will cause bind() to fail; ensure we have a zeroed address in the testcase. Fixes: 0d8647bc74cb ("net: mctp: don't require a route for null-EID ingress") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260429-dev-mctp-test-fixes-v1-1-1127b7425809@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/route-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index e1033643fab0..e4b230ef6099 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -920,9 +920,9 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) static void mctp_test_route_input_null_eid(struct kunit *test) { struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO); + struct sockaddr_mctp addr = { 0 }; struct sk_buff *skb_pkt, *skb_sk; struct mctp_test_dev *dev; - struct sockaddr_mctp addr; struct socket *sock; u8 type = 0; int rc; -- cgit v1.2.3 From 76872971064133474d9b891da05db8f7586fcc11 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 29 Apr 2026 16:21:42 +0800 Subject: net: mctp: test: Use dev_direct_xmit for TX to our test device In our test cases, we typically feed a packet sequence into the routing code, then inspect the device's TXed skbs to assert specific behaviours. Using dev_queue_xmit() for our TX path introduces a fair bit of complexity between the test packet sequence and the test device's ndo_start_xmit callback; which may mean that the skbs have not hit the device at the point we're inspecting the TXed skb list. Use dev_direct_xmit instead, as we want a direct a path as possible here, and the test dev does not need any queueing, scheduling or flow control. Fixes: 6ab578739a4c ("net: mctp: test: move TX packetqueue from dst to dev") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202604281320.525eee17-lkp@intel.com Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260429-dev-mctp-test-fixes-v1-2-1127b7425809@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index c3987d5ade7a..6eef8d485c25 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -116,7 +116,7 @@ void mctp_test_destroy_dev(struct mctp_test_dev *dev) static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb) { skb->dev = dst->dev->dev; - dev_queue_xmit(skb); + dev_direct_xmit(skb, 0); return 0; } -- cgit v1.2.3 From ba6b328588f7cb7cf4aca33bae565c2914d74786 Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 25 Apr 2026 11:41:23 +0800 Subject: rust: arch: um: Fix building 32-bit UML with GCC 32-bit UML builds can be configured either by setting CONFIG_64BIT=n or with SUBARCH=i386. Both work with Rust-for-Linux when clang is the compiler, but when SUBARCH=i386, we don't set a bindgen target correctly if gcc is the compiler. Add the appropriate bindgen target configuration for i386, as is done in Makefile.clang. [ For reference, the errors look like: BINDGEN rust/bindings/bindings_generated.rs error: unsupported option '-mno-sse' for target '' ... error: unknown target triple 'unknown' panicked at .../bindgen-0.72.1/ir/context.rs:562:15: libclang error; possible causes include: ... - Miguel ] Fixes: ab0f4cedc355 ("arch: um: rust: Add i386 support for Rust") Signed-off-by: David Gow Link: https://patch.msgid.link/20260425034125.53866-1-david@davidgow.net [ Added space in title. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/Makefile b/rust/Makefile index b361bfedfdf0..b9e9f512cec3 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -403,6 +403,8 @@ BINDGEN_TARGET_x86 := x86_64-linux-gnu BINDGEN_TARGET_arm64 := aarch64-linux-gnu BINDGEN_TARGET_arm := arm-linux-gnueabi BINDGEN_TARGET_loongarch := loongarch64-linux-gnusf +# This is only for i386 UM builds, which need the 32-bit target not -m32 +BINDGEN_TARGET_i386 := i386-linux-gnu BINDGEN_TARGET_um := $(BINDGEN_TARGET_$(SUBARCH)) BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) -- cgit v1.2.3 From 83ac2870310b694775ab7e8f0244fdd94fc21926 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Mon, 27 Apr 2026 16:43:00 +0100 Subject: rust: pin-init: internal: move alignment check to `make_field_check` Instead of having the reference creation serving dual-purpose as both for let bindings and alignment check, detangle them so that the alignment check is done explicitly in `make_field_check`. This is more robust against refactors that may change the way let bindings are created. Cc: stable@vger.kernel.org Reviewed-by: Alice Ryhl Signed-off-by: Gary Guo Link: https://patch.msgid.link/20260427-pin-init-fix-v3-1-496a699674dd@garyguo.net [ Reworded for typo. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/pin-init/internal/src/init.rs | 78 ++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/rust/pin-init/internal/src/init.rs b/rust/pin-init/internal/src/init.rs index daa3f1c6466e..0a6600e8156c 100644 --- a/rust/pin-init/internal/src/init.rs +++ b/rust/pin-init/internal/src/init.rs @@ -249,10 +249,6 @@ fn init_fields( }); // Again span for better diagnostics let write = quote_spanned!(ident.span()=> ::core::ptr::write); - // NOTE: the field accessor ensures that the initialized field is properly aligned. - // Unaligned fields will cause the compiler to emit E0793. We do not support - // unaligned fields since `Init::__init` requires an aligned pointer; the call to - // `ptr::write` below has the same requirement. let accessor = if pinned { let project_ident = format_ident!("__project_{ident}"); quote! { @@ -367,49 +363,49 @@ fn init_fields( } } -/// Generate the check for ensuring that every field has been initialized. +/// Generate the check for ensuring that every field has been initialized and aligned. fn make_field_check( fields: &Punctuated, init_kind: InitKind, path: &Path, ) -> TokenStream { - let field_attrs = fields + let field_attrs: Vec<_> = fields .iter() - .filter_map(|f| f.kind.ident().map(|_| &f.attrs)); - let field_name = fields.iter().filter_map(|f| f.kind.ident()); - match init_kind { - InitKind::Normal => quote! { - // We use unreachable code to ensure that all fields have been mentioned exactly once, - // this struct initializer will still be type-checked and complain with a very natural - // error message if a field is forgotten/mentioned more than once. - #[allow(unreachable_code, clippy::diverging_sub_expression)] - // SAFETY: this code is never executed. - let _ = || unsafe { - ::core::ptr::write(slot, #path { - #( - #(#field_attrs)* - #field_name: ::core::panic!(), - )* - }) - }; - }, - InitKind::Zeroing => quote! { - // We use unreachable code to ensure that all fields have been mentioned at most once. - // Since the user specified `..Zeroable::zeroed()` at the end, all missing fields will - // be zeroed. This struct initializer will still be type-checked and complain with a - // very natural error message if a field is mentioned more than once, or doesn't exist. - #[allow(unreachable_code, clippy::diverging_sub_expression, unused_assignments)] - // SAFETY: this code is never executed. - let _ = || unsafe { - ::core::ptr::write(slot, #path { - #( - #(#field_attrs)* - #field_name: ::core::panic!(), - )* - ..::core::mem::zeroed() - }) - }; - }, + .filter_map(|f| f.kind.ident().map(|_| &f.attrs)) + .collect(); + let field_name: Vec<_> = fields.iter().filter_map(|f| f.kind.ident()).collect(); + let zeroing_trailer = match init_kind { + InitKind::Normal => None, + InitKind::Zeroing => Some(quote! { + ..::core::mem::zeroed() + }), + }; + quote! { + #[allow(unreachable_code, clippy::diverging_sub_expression)] + // We use unreachable code to perform field checks. They're still checked by the compiler. + // SAFETY: this code is never executed. + let _ = || unsafe { + // Create references to ensure that the initialized field is properly aligned. + // Unaligned fields will cause the compiler to emit E0793. We do not support + // unaligned fields since `Init::__init` requires an aligned pointer; the call to + // `ptr::write` for value-initialization case has the same requirement. + #( + #(#field_attrs)* + let _ = &(*slot).#field_name; + )* + + // If the zeroing trailer is not present, this checks that all fields have been + // mentioned exactly once. If the zeroing trailer is present, all missing fields will be + // zeroed, so this checks that all fields have been mentioned at most once. The use of + // struct initializer will still generate very natural error messages for any misuse. + ::core::ptr::write(slot, #path { + #( + #(#field_attrs)* + #field_name: ::core::panic!(), + )* + #zeroing_trailer + }) + }; } } -- cgit v1.2.3 From 68bf102226cf2199dc609b67c1e847cad4de4b57 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Mon, 27 Apr 2026 16:43:01 +0100 Subject: rust: pin-init: fix incorrect accessor reference lifetime When a field has been initialized, `init!`/`pin_init!` create a reference or pinned reference to the field so it can be accessed later during the initialization of other fields. However, the reference it created is incorrectly `&'static` rather than just the scope of the initializer. This means that you can do init!(Foo { a: 1, _: { let b: &'static u32 = a; } }) which is unsound. This is caused by `&mut (*#slot).#ident`, which actually allows arbitrary lifetime, so this is effectively `'static`. Somewhat ironically, the safety justification of creating the accessor is.. "SAFETY: TODO". Fix it by adding `let_binding` method on `DropGuard` to shorten lifetime. This results in exactly what we want for these accessors. The safety and invariant comments of `DropGuard` have been reworked; instead of reasoning about what caller can do with the guard, express it in a way that the ownership is transferred to the guard and `forget` takes it back, so the unsafe operations within the `DropGuard` can be more easily justified. Fixes: 42415d163e5d ("rust: pin-init: add references to previously initialized fields") Cc: stable@vger.kernel.org Signed-off-by: Gary Guo Link: https://patch.msgid.link/20260427-pin-init-fix-v3-2-496a699674dd@garyguo.net [ Reworded for missing word. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/pin-init/internal/src/init.rs | 106 ++++++++++++++++--------------------- rust/pin-init/src/__internal.rs | 28 ++++++---- 2 files changed, 66 insertions(+), 68 deletions(-) diff --git a/rust/pin-init/internal/src/init.rs b/rust/pin-init/internal/src/init.rs index 0a6600e8156c..487ee0013faf 100644 --- a/rust/pin-init/internal/src/init.rs +++ b/rust/pin-init/internal/src/init.rs @@ -249,18 +249,6 @@ fn init_fields( }); // Again span for better diagnostics let write = quote_spanned!(ident.span()=> ::core::ptr::write); - let accessor = if pinned { - let project_ident = format_ident!("__project_{ident}"); - quote! { - // SAFETY: TODO - unsafe { #data.#project_ident(&mut (*#slot).#ident) } - } - } else { - quote! { - // SAFETY: TODO - unsafe { &mut (*#slot).#ident } - } - }; quote! { #(#attrs)* { @@ -268,51 +256,31 @@ fn init_fields( // SAFETY: TODO unsafe { #write(&raw mut (*#slot).#ident, #value_ident) }; } - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; } } InitializerKind::Init { ident, value, .. } => { // Again span for better diagnostics let init = format_ident!("init", span = value.span()); - // NOTE: the field accessor ensures that the initialized field is properly aligned. - // Unaligned fields will cause the compiler to emit E0793. We do not support - // unaligned fields since `Init::__init` requires an aligned pointer; the call to - // `ptr::write` below has the same requirement. - let (value_init, accessor) = if pinned { - let project_ident = format_ident!("__project_{ident}"); - ( - quote! { - // SAFETY: - // - `slot` is valid, because we are inside of an initializer closure, we - // return when an error/panic occurs. - // - We also use `#data` to require the correct trait (`Init` or `PinInit`) - // for `#ident`. - unsafe { #data.#ident(&raw mut (*#slot).#ident, #init)? }; - }, - quote! { - // SAFETY: TODO - unsafe { #data.#project_ident(&mut (*#slot).#ident) } - }, - ) + let value_init = if pinned { + quote! { + // SAFETY: + // - `slot` is valid, because we are inside of an initializer closure, we + // return when an error/panic occurs. + // - We also use `#data` to require the correct trait (`Init` or `PinInit`) + // for `#ident`. + unsafe { #data.#ident(&raw mut (*#slot).#ident, #init)? }; + } } else { - ( - quote! { - // SAFETY: `slot` is valid, because we are inside of an initializer - // closure, we return when an error/panic occurs. - unsafe { - ::pin_init::Init::__init( - #init, - &raw mut (*#slot).#ident, - )? - }; - }, - quote! { - // SAFETY: TODO - unsafe { &mut (*#slot).#ident } - }, - ) + quote! { + // SAFETY: `slot` is valid, because we are inside of an initializer + // closure, we return when an error/panic occurs. + unsafe { + ::pin_init::Init::__init( + #init, + &raw mut (*#slot).#ident, + )? + }; + } }; quote! { #(#attrs)* @@ -320,9 +288,6 @@ fn init_fields( let #init = #value; #value_init } - #(#cfgs)* - #[allow(unused_variables)] - let #ident = #accessor; } } InitializerKind::Code { block: value, .. } => quote! { @@ -335,18 +300,41 @@ fn init_fields( if let Some(ident) = kind.ident() { // `mixed_site` ensures that the guard is not accessible to the user-controlled code. let guard = format_ident!("__{ident}_guard", span = Span::mixed_site()); + + // NOTE: The reference is derived from the guard so that it only lives as long as the + // guard does and cannot escape the scope. If it's created via `&mut (*#slot).#ident` + // like the unaligned field guard, it will become effectively `'static`. + let accessor = if pinned { + let project_ident = format_ident!("__project_{ident}"); + quote! { + // SAFETY: the initialization is pinned. + unsafe { #data.#project_ident(#guard.let_binding()) } + } + } else { + quote! { + #guard.let_binding() + } + }; + res.extend(quote! { #(#cfgs)* - // Create the drop guard: + // Create the drop guard. // - // We rely on macro hygiene to make it impossible for users to access this local - // variable. - // SAFETY: We forget the guard later when initialization has succeeded. - let #guard = unsafe { + // SAFETY: + // - `&raw mut (*slot).#ident` is valid. + // - `make_field_check` checks that `&raw mut (*slot).#ident` is properly aligned. + // - `(*slot).#ident` has been initialized above. + // - We only need the ownership to the pointee back when initialization has + // succeeded, where we `forget` the guard. + let mut #guard = unsafe { ::pin_init::__internal::DropGuard::new( &raw mut (*slot).#ident ) }; + + #(#cfgs)* + #[allow(unused_variables)] + let #ident = #accessor; }); guards.push(guard); guard_attrs.push(cfgs); diff --git a/rust/pin-init/src/__internal.rs b/rust/pin-init/src/__internal.rs index 90adbdc1893b..5720a621aed7 100644 --- a/rust/pin-init/src/__internal.rs +++ b/rust/pin-init/src/__internal.rs @@ -238,32 +238,42 @@ fn stack_init_reuse() { /// When a value of this type is dropped, it drops a `T`. /// /// Can be forgotten to prevent the drop. +/// +/// # Invariants +/// +/// - `ptr` is valid and properly aligned. +/// - `*ptr` is initialized and owned by this guard. pub struct DropGuard { ptr: *mut T, } impl DropGuard { - /// Creates a new [`DropGuard`]. It will [`ptr::drop_in_place`] `ptr` when it gets dropped. + /// Creates a drop guard and transfer the ownership of the pointer content. /// - /// # Safety + /// The ownership is only relinguished if the guard is forgotten via [`core::mem::forget`]. /// - /// `ptr` must be a valid pointer. + /// # Safety /// - /// It is the callers responsibility that `self` will only get dropped if the pointee of `ptr`: - /// - has not been dropped, - /// - is not accessible by any other means, - /// - will not be dropped by any other means. + /// - `ptr` is valid and properly aligned. + /// - `*ptr` is initialized, and the ownership is transferred to this guard. #[inline] pub unsafe fn new(ptr: *mut T) -> Self { + // INVARIANT: By safety requirement. Self { ptr } } + + /// Create a let binding for accessor use. + #[inline] + pub fn let_binding(&mut self) -> &mut T { + // SAFETY: Per type invariant. + unsafe { &mut *self.ptr } + } } impl Drop for DropGuard { #[inline] fn drop(&mut self) { - // SAFETY: A `DropGuard` can only be constructed using the unsafe `new` function - // ensuring that this operation is safe. + // SAFETY: `self.ptr` is valid, properly aligned and `*self.ptr` is owned by this guard. unsafe { ptr::drop_in_place(self.ptr) } } } -- cgit v1.2.3 From 838d852da8503372f3a1779bfbd1ccb93153ab4e Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 26 Apr 2026 16:42:00 +0200 Subject: rust: allow `clippy::collapsible_match` globally The `clippy::collapsible_match` lint [1] can make code harder to read in certain cases [2], e.g. CLIPPY P rust/libmacros.so - due to command line change warning: this `if` can be collapsed into the outer `match` --> rust/pin-init/internal/src/helpers.rs:91:17 | 91 | / if nesting == 1 { 92 | | impl_generics.push(tt.clone()); 93 | | impl_generics.push(tt); 94 | | skip_until_comma = false; 95 | | } | |_________________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_match = note: `-W clippy::collapsible-match` implied by `-W clippy::all` = help: to override `-W clippy::all` add `#[allow(clippy::collapsible_match)]` help: collapse nested if block | 90 ~ TokenTree::Punct(p) if skip_until_comma && p.as_char() == ',' 91 ~ && nesting == 1 => { 92 | impl_generics.push(tt.clone()); 93 | impl_generics.push(tt); 94 | skip_until_comma = false; 95 ~ } | The lint does not have much upside -- when the suggestion may be a good one, it would still read fine when nested anyway. And it is the kind of lint that may easily bias people to just apply the suggestion instead of allowing it. [ In addition, as Gary points out [3], the suggestion is also wrong [4] and in the process of being fixed [5], possibly for Rust 1.97.0: Link: https://lore.kernel.org/rust-for-linux/DI3YV94TH9I3.1SOHW51552497@garyguo.net/ [3] Link: https://github.com/rust-lang/rust-clippy/issues/16875 [4] Link: https://github.com/rust-lang/rust-clippy/pull/16878 [5] - Miguel ] Thus just let developers decide on their own. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_match [1] Link: https://lore.kernel.org/rust-for-linux/CANiq72nWYJna_hdFxjQCQZK6yJBrr1Mb86iKavivV0U0BgufeA@mail.gmail.com/ [2] Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260426144201.227108-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index e27c91ea56fc..621d84aa4700 100644 --- a/Makefile +++ b/Makefile @@ -486,6 +486,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ -Wclippy::cast_lossless \ + -Aclippy::collapsible_match \ -Wclippy::ignored_unit_patterns \ -Aclippy::incompatible_msrv \ -Wclippy::mut_mut \ -- cgit v1.2.3 From 2adc8664018c1cc595c7c0c98474a33c7fe32a85 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 26 Apr 2026 16:42:01 +0200 Subject: rust: allow `clippy::collapsible_if` globally Similar to `clippy::collapsible_match` (globally allowed in the previous commit), the `clippy::collapsible_if` lint [1] can make code harder to read in certain cases. Thus just let developers decide on their own. In addition, remove the existing `expect` we had. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Suggested-by: Gary Guo Link: https://lore.kernel.org/rust-for-linux/DGROP5CHU1QZ.1OKJRAUZXE9WC@garyguo.net/ Link: https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if [1] Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260426144201.227108-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- Makefile | 1 + drivers/android/binder/range_alloc/array.rs | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 621d84aa4700..28f4ae452441 100644 --- a/Makefile +++ b/Makefile @@ -486,6 +486,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::as_ptr_cast_mut \ -Wclippy::as_underscore \ -Wclippy::cast_lossless \ + -Aclippy::collapsible_if \ -Aclippy::collapsible_match \ -Wclippy::ignored_unit_patterns \ -Aclippy::incompatible_msrv \ diff --git a/drivers/android/binder/range_alloc/array.rs b/drivers/android/binder/range_alloc/array.rs index ada1d1b4302e..081d19b09d4b 100644 --- a/drivers/android/binder/range_alloc/array.rs +++ b/drivers/android/binder/range_alloc/array.rs @@ -204,7 +204,6 @@ impl ArrayRangeAllocator { // caller will mark them as unused, which means that they can be freed if the system comes // under memory pressure. let mut freed_range = FreedRange::interior_pages(offset, size); - #[expect(clippy::collapsible_if)] // reads better like this if offset % PAGE_SIZE != 0 { if i == 0 || self.ranges[i - 1].endpoint() <= (offset & PAGE_MASK) { freed_range.start_page_idx -= 1; -- cgit v1.2.3 From dff205550e714f1cb65f27409586e2612905fa88 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Thu, 16 Apr 2026 21:57:03 +0800 Subject: hwmon: (lm63) Add locking to avoid TOCTOU The functions show_fan(), show_pwm1(), show_temp11(), temp2_crit_hyst_show(), and show_lut_temp_hyst() access shared cached data without holding the update lock. This can cause TOCTOU races if the cached values change between the checks and the later calculations. Those cached values are updated in lm63_update_device(). In the general case, the affected functions combine multiple cached values without locking and can therefore observe a mixed old/new snapshot. In addition, show_fan() reads data->fan[nr] locklessly while lm63_update_device() updates data->fan[0] in two steps, which can expose an intermediate torn value and potentially trigger a divide-by-zero error. This means that converting the macro to a function is not sufficient to fix show_fan(). Hold the update lock across the whole read and calculation sequence so that the values remain stable. Check the other functions in the driver as well. Keep them unchanged because they either do not access shared cached values multiple times or already do so under lock. Link: https://lore.kernel.org/linux-hwmon/CALbr=LYJ_ehtp53HXEVkSpYoub+XYSTU8Rg=o1xxMJ8=5z8B-g@mail.gmail.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: e872c91e726e ("hwmon: (lm63) Add support for unsigned upper temperature limits") Fixes: d216f6809eb6 ("hwmon: (lm63) Expose automatic fan speed control lookup table") Signed-off-by: Gui-Dong Han Link: https://lore.kernel.org/r/20260416135703.53262-1-hanguidong02@gmail.com [groeck: Use lm63_update_device() to get driver data in temp2_crit_hyst_store] Signed-off-by: Guenter Roeck --- drivers/hwmon/lm63.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index 035176a98ce9..30500b4d2221 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -333,7 +333,13 @@ static ssize_t show_fan(struct device *dev, struct device_attribute *devattr, { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm63_data *data = lm63_update_device(dev); - return sprintf(buf, "%d\n", FAN_FROM_REG(data->fan[attr->index])); + int fan; + + mutex_lock(&data->update_lock); + fan = FAN_FROM_REG(data->fan[attr->index]); + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", fan); } static ssize_t set_fan(struct device *dev, struct device_attribute *dummy, @@ -366,12 +372,14 @@ static ssize_t show_pwm1(struct device *dev, struct device_attribute *devattr, int nr = attr->index; int pwm; + mutex_lock(&data->update_lock); if (data->pwm_highres) pwm = data->pwm1[nr]; else pwm = data->pwm1[nr] >= 2 * data->pwm1_freq ? 255 : (data->pwm1[nr] * 255 + data->pwm1_freq) / (2 * data->pwm1_freq); + mutex_unlock(&data->update_lock); return sprintf(buf, "%d\n", pwm); } @@ -529,6 +537,7 @@ static ssize_t show_temp11(struct device *dev, struct device_attribute *devattr, int nr = attr->index; int temp; + mutex_lock(&data->update_lock); if (!nr) { /* * Use unsigned temperature unless its value is zero. @@ -544,7 +553,10 @@ static ssize_t show_temp11(struct device *dev, struct device_attribute *devattr, else temp = TEMP11_FROM_REG(data->temp11[nr]); } - return sprintf(buf, "%d\n", temp + data->temp2_offset); + temp += data->temp2_offset; + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", temp); } static ssize_t set_temp11(struct device *dev, struct device_attribute *devattr, @@ -592,9 +604,14 @@ static ssize_t temp2_crit_hyst_show(struct device *dev, struct device_attribute *dummy, char *buf) { struct lm63_data *data = lm63_update_device(dev); - return sprintf(buf, "%d\n", temp8_from_reg(data, 2) - + data->temp2_offset - - TEMP8_FROM_REG(data->temp2_crit_hyst)); + int temp; + + mutex_lock(&data->update_lock); + temp = temp8_from_reg(data, 2) + data->temp2_offset + - TEMP8_FROM_REG(data->temp2_crit_hyst); + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", temp); } static ssize_t show_lut_temp_hyst(struct device *dev, @@ -602,10 +619,14 @@ static ssize_t show_lut_temp_hyst(struct device *dev, { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); struct lm63_data *data = lm63_update_device(dev); + int temp; - return sprintf(buf, "%d\n", lut_temp_from_reg(data, attr->index) - + data->temp2_offset - - TEMP8_FROM_REG(data->lut_temp_hyst)); + mutex_lock(&data->update_lock); + temp = lut_temp_from_reg(data, attr->index) + data->temp2_offset + - TEMP8_FROM_REG(data->lut_temp_hyst); + mutex_unlock(&data->update_lock); + + return sprintf(buf, "%d\n", temp); } /* @@ -616,7 +637,7 @@ static ssize_t temp2_crit_hyst_store(struct device *dev, struct device_attribute *dummy, const char *buf, size_t count) { - struct lm63_data *data = dev_get_drvdata(dev); + struct lm63_data *data = lm63_update_device(dev); struct i2c_client *client = data->client; long val; int err; -- cgit v1.2.3 From c9e3878ae2f57fd6786279cf5d9dc6e6e1b52f5a Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 30 Apr 2026 17:38:29 -0500 Subject: Revert "drm/nouveau/gsp: add support for GA100" This reverts commit 20e0c197802c545db220157fafd567a10f2b7672. Despite claiming to add GA100 support, that commit actually has quite a few problems. It falsely claims that there is no VBIOS. GA100 does have a VBIOS, but it has no display engine, so it cannot use the PRAMIN method the read VBIOS and must fall back to using PROM. For whatever reason, the VBIOS on GA100 has an "Init-from-ROM" (IFR) header where the PCI Expansion ROM would normally be found. So to find that ROM, Nouveau needs to parse the IFR header. The commit also falsely claimed that there is no graphics (GR) engine. So rather than try to fix that commit, just revert it and start over from scratch. Signed-off-by: Timur Tabi Link: https://patch.msgid.link/20260430223838.2530778-2-ttabi@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 11 +++++++++-- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c | 4 ++++ drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c | 18 +++++------------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 72848ed80df7..b101e14f841e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2513,6 +2513,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, .devinit = { 0x00000001, ga100_devinit_new }, .fault = { 0x00000001, tu102_fault_new }, .fb = { 0x00000001, ga100_fb_new }, @@ -2529,7 +2530,6 @@ nv170_chipset = { .vfn = { 0x00000001, ga100_vfn_new }, .ce = { 0x000003ff, ga100_ce_new }, .fifo = { 0x00000001, ga100_fifo_new }, - .sec2 = { 0x00000001, tu102_sec2_new }, }; static const struct nvkm_device_chip @@ -3341,7 +3341,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; - case 0x170: device->chip = &nv170_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x173: device->chip = &nv173_chipset; break; case 0x174: device->chip = &nv174_chipset; break; @@ -3361,6 +3360,14 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x1b6: device->chip = &nv1b6_chipset; break; case 0x1b7: device->chip = &nv1b7_chipset; break; default: + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + if (!device->chip) { nvdev_error(device, "unknown chipset (%08x)\n", boot0); ret = -ENODEV; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c index fdd820eeef81..27a13aeccd3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c @@ -41,11 +41,15 @@ ga100_gsp_flcn = { static const struct nvkm_gsp_func ga100_gsp = { .flcn = &ga100_gsp_flcn, + .fwsec = &tu102_gsp_fwsec, .sig_section = ".fwsignature_ga100", .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index dd82c76b8b9a..19cb269e7a26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c @@ -318,13 +318,8 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* - * Calculate FB layout. FRTS is a memory region created by the FWSEC-FRTS firmware. - * FWSEC comes from VBIOS. So on systems with no VBIOS (e.g. GA100), the FRTS does - * not exist. Therefore, use the existence of VBIOS to determine whether to reserve - * an FRTS region. - */ - gsp->fb.wpr2.frts.size = device->bios ? 0x100000 : 0; + /* Calculate FB layout. */ + gsp->fb.wpr2.frts.size = 0x100000; gsp->fb.wpr2.frts.addr = ALIGN_DOWN(gsp->fb.bios.addr, 0x20000) - gsp->fb.wpr2.frts.size; gsp->fb.wpr2.boot.size = gsp->boot.fw.size; @@ -348,12 +343,9 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* Only boot FWSEC-FRTS if it actually exists */ - if (gsp->fb.wpr2.frts.size) { - ret = nvkm_gsp_fwsec_frts(gsp); - if (WARN_ON(ret)) - return ret; - } + ret = nvkm_gsp_fwsec_frts(gsp); + if (WARN_ON(ret)) + return ret; /* Reset GSP into RISC-V mode. */ ret = gsp->func->reset(gsp); -- cgit v1.2.3 From a177ae30f78688f75ef9c6277a152c5d6979b10e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 30 Apr 2026 16:49:51 +0200 Subject: netfilter: flowtable: fix inline vlan encapsulation in xmit path Several issues in the inline vlan support: - The layer 2 encapsulation representation in the tuple takes encap[0] as the outer header and encap[1] as the inner header as seen from the ingress path. Reverse the encap loop to push first the inner then the outer vlan header. - Postpone pushing the layer 2 header once destination device is known. This allows to calculate the needed hearoom via LL_RESERVED_SPACE to accommodate the layer 2 headers. - Add and use nf_flow_vlan_push() as suggested by Eric Woudstra, this is a simplified version of skb_vlan_push() for egress path only. Fixes: c653d5a78f34 ("netfilter: flowtable: inline vlan encapsulation in xmit path") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 110 ++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 37 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 8d5fb7e940a1..0ce3c209050c 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -462,32 +462,6 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, nf_flow_ip_tunnel_pop(ctx, skb); } -struct nf_flow_xmit { - const void *dest; - const void *source; - struct net_device *outdev; -}; - -static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, - struct nf_flow_xmit *xmit) -{ - struct net_device *dev = xmit->outdev; - unsigned int hh_len = LL_RESERVED_SPACE(dev); - - if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - skb = skb_expand_head(skb, hh_len); - if (!skb) - return NF_STOLEN; - } - - skb->dev = dev; - dev_hard_header(skb, dev, ntohs(skb->protocol), - xmit->dest, xmit->source, skb->len); - dev_queue_xmit(skb); - - return NF_STOLEN; -} - static struct flow_offload_tuple_rhash * nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx, struct nf_flowtable *flow_table, struct sk_buff *skb) @@ -553,6 +527,32 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, return 1; } +/* Similar to skb_vlan_push. */ +static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id, + u32 needed_headroom) +{ + if (skb_vlan_tag_present(skb)) { + struct vlan_hdr *vhdr; + + if (skb_cow_head(skb, needed_headroom + VLAN_HLEN)) + return -1; + + __skb_push(skb, VLAN_HLEN); + if (skb_mac_header_was_set(skb)) + skb->mac_header -= VLAN_HLEN; + + vhdr = (struct vlan_hdr *)skb->data; + skb->network_header -= VLAN_HLEN; + vhdr->h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + vhdr->h_vlan_encapsulated_proto = skb->protocol; + skb->protocol = skb->vlan_proto; + skb_postpush_rcsum(skb, skb->data, VLAN_HLEN); + } + __vlan_hwaccel_put_tag(skb, proto, id); + + return 0; +} + static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) { int data_len = skb->len + sizeof(__be16); @@ -739,17 +739,19 @@ static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb, } static int nf_flow_encap_push(struct sk_buff *skb, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, + struct net_device *outdev) { + u32 needed_headroom = LL_RESERVED_SPACE(outdev); int i; - for (i = 0; i < tuple->encap_num; i++) { + for (i = tuple->encap_num - 1; i >= 0; i--) { switch (tuple->encap[i].proto) { case htons(ETH_P_8021Q): case htons(ETH_P_8021AD): - skb_reset_mac_header(skb); - if (skb_vlan_push(skb, tuple->encap[i].proto, - tuple->encap[i].id) < 0) + if (nf_flow_vlan_push(skb, tuple->encap[i].proto, + tuple->encap[i].id, + needed_headroom) < 0) return -1; break; case htons(ETH_P_PPP_SES): @@ -762,6 +764,44 @@ static int nf_flow_encap_push(struct sk_buff *skb, return 0; } +struct nf_flow_xmit { + const void *dest; + const void *source; + struct net_device *outdev; + struct flow_offload_tuple *tuple; +}; + +static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct net_device *dev = xmit->outdev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { + skb = skb_expand_head(skb, hh_len); + if (!skb) + return; + } + + skb->dev = dev; + dev_hard_header(skb, dev, ntohs(skb->protocol), + xmit->dest, xmit->source, skb->len); + dev_queue_xmit(skb); +} + +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + if (xmit->tuple->encap_num) { + if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0) + return NF_DROP; + } + + __nf_flow_queue_xmit(net, skb, xmit); + + return NF_STOLEN; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -806,9 +846,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rtable(tuplehash->tuple.dst_cache); @@ -838,6 +875,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; return nf_flow_queue_xmit(state->net, skb, &xmit); } @@ -1128,9 +1166,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, &ip6_daddr, encap_limit) < 0) return NF_DROP; - if (nf_flow_encap_push(skb, other_tuple) < 0) - return NF_DROP; - switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: rt = dst_rt6_info(tuplehash->tuple.dst_cache); @@ -1160,6 +1195,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, WARN_ON_ONCE(1); return NF_DROP; } + xmit.tuple = other_tuple; return nf_flow_queue_xmit(state->net, skb, &xmit); } -- cgit v1.2.3 From 69c54f80f4a7072b51b5b5939185ca5e572be982 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 30 Apr 2026 16:49:53 +0200 Subject: netfilter: flowtable: fix inline pppoe encapsulation in xmit path Address two issues in the inline pppoe encapsulation: - Add needs_gso_segment flag to segment PPPoE packets in software given that there is no GSO support for this. - Use FLOW_OFFLOAD_XMIT_DIRECT since neighbour cache is not available in point-to-point device, use the hardware address that is obtained via flowtable path discovery (ie. fill_forward_path). Fixes: 18d27bed0880 ("netfilter: flowtable: inline pppoe encapsulation in xmit path") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 4 +++- net/netfilter/nf_flow_table_core.c | 1 + net/netfilter/nf_flow_table_ip.c | 42 ++++++++++++++++++++++++++++++++--- net/netfilter/nf_flow_table_path.c | 7 +++++- 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index b09c11c048d5..7b23b245a5a8 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -148,9 +148,10 @@ struct flow_offload_tuple { /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; - u8 dir:2, + u16 dir:2, xmit_type:3, encap_num:2, + needs_gso_segment:1, tun_num:2, in_vlan_ingress:2; u16 mtu; @@ -232,6 +233,7 @@ struct nf_flow_route { u32 hw_ifindex; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + u8 needs_gso_segment:1; } out; enum flow_offload_xmit_type xmit_type; } tuple[FLOW_OFFLOAD_DIR_MAX]; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 2c4140e6f53c..785d8c244a77 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -122,6 +122,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, flow_tuple->tun = route->tuple[dir].in.tun; flow_tuple->encap_num = route->tuple[dir].in.num_encaps; + flow_tuple->needs_gso_segment = route->tuple[dir].out.needs_gso_segment; flow_tuple->tun_num = route->tuple[dir].in.num_tuns; switch (route->tuple[dir].xmit_type) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 0ce3c209050c..2eba64eb393a 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -553,7 +553,8 @@ static int nf_flow_vlan_push(struct sk_buff *skb, __be16 proto, u16 id, return 0; } -static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) +static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id, + u32 needed_headroom) { int data_len = skb->len + sizeof(__be16); struct ppp_hdr { @@ -562,7 +563,7 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id) } *ph; __be16 proto; - if (skb_cow_head(skb, PPPOE_SES_HLEN)) + if (skb_cow_head(skb, needed_headroom + PPPOE_SES_HLEN)) return -1; switch (skb->protocol) { @@ -755,7 +756,8 @@ static int nf_flow_encap_push(struct sk_buff *skb, return -1; break; case htons(ETH_P_PPP_SES): - if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0) + if (nf_flow_pppoe_push(skb, tuple->encap[i].id, + needed_headroom) < 0) return -1; break; } @@ -769,6 +771,7 @@ struct nf_flow_xmit { const void *source; struct net_device *outdev; struct flow_offload_tuple *tuple; + bool needs_gso_segment; }; static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, @@ -789,10 +792,41 @@ static void __nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, dev_queue_xmit(skb); } +static unsigned int nf_flow_encap_gso_xmit(struct net *net, struct sk_buff *skb, + struct nf_flow_xmit *xmit) +{ + struct sk_buff *segs, *nskb; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs)) + return NF_DROP; + + if (segs) + consume_skb(skb); + else + segs = skb; + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + + if (nf_flow_encap_push(segs, xmit->tuple, xmit->outdev) < 0) { + kfree_skb(segs); + kfree_skb_list(nskb); + return NF_STOLEN; + } + __nf_flow_queue_xmit(net, segs, xmit); + } + + return NF_STOLEN; +} + static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb, struct nf_flow_xmit *xmit) { if (xmit->tuple->encap_num) { + if (skb_is_gso(skb) && xmit->needs_gso_segment) + return nf_flow_encap_gso_xmit(net, skb, xmit); + if (nf_flow_encap_push(skb, xmit->tuple, xmit->outdev) < 0) return NF_DROP; } @@ -876,6 +910,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_DROP; } xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } @@ -1196,6 +1231,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_DROP; } xmit.tuple = other_tuple; + xmit.needs_gso_segment = tuplehash->tuple.needs_gso_segment; return nf_flow_queue_xmit(state->net, skb, &xmit); } diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c index 6bb9579dcc2a..9e88ea6a2eef 100644 --- a/net/netfilter/nf_flow_table_path.c +++ b/net/netfilter/nf_flow_table_path.c @@ -86,6 +86,7 @@ struct nft_forward_info { u8 ingress_vlans; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; + bool needs_gso_segment; enum flow_offload_xmit_type xmit_type; }; @@ -138,8 +139,11 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack, path->encap.proto; info->num_encaps++; } - if (path->type == DEV_PATH_PPPOE) + if (path->type == DEV_PATH_PPPOE) { memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; + info->needs_gso_segment = 1; + } break; case DEV_PATH_BRIDGE: if (is_zero_ether_addr(info->h_source)) @@ -279,6 +283,7 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); route->tuple[dir].xmit_type = info.xmit_type; } + route->tuple[dir].out.needs_gso_segment = info.needs_gso_segment; } int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, -- cgit v1.2.3 From e027c218c482c6a0ae1948129ccda3b0a2033368 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Tue, 28 Apr 2026 15:41:01 +0200 Subject: net: phy: micrel: fix LAN8814 QSGMII soft reset LAN8814 QSGMII soft reset was moved into the probe function to avoid triggering it for each of 4 PHY-s in the package. However, that broke QSGMII link between the MAC and PHY on most LAN8814 PHY-s, specificaly for us on the Microchip LAN969x switch. Reading the QSGMII status registers it was visible that lanes were only partially synced. It looks like the reset timing is crucial, so lets move the reset back into the .config_init function but guard it with phy_package_init_once() to avoid it being triggered on each of 4 PHY-s in the package. Change the probe function to use phy_package_probe_once() for coma and PtP setup. Fixes: 96a9178a29a6 ("net: phy: micrel: lan8814 fix reset of the QSGMII interface") Signed-off-by: Robert Marko Link: https://patch.msgid.link/20260428134138.1741253-1-robert.marko@sartura.hr Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2aa1dedd21b8..e211a523c258 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4548,6 +4548,13 @@ static int lan8814_config_init(struct phy_device *phydev) struct kszphy_priv *lan8814 = phydev->priv; int ret; + if (phy_package_init_once(phydev)) + /* Reset the PHY */ + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + /* Based on the interface type select how the advertise ability is * encoded, to set as SGMII or as USGMII. */ @@ -4655,13 +4662,7 @@ static int lan8814_probe(struct phy_device *phydev) priv->is_ptp_available = err == LAN8814_REV_LAN8814 || err == LAN8814_REV_LAN8818; - if (phy_package_init_once(phydev)) { - /* Reset the PHY */ - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, - LAN8814_QSGMII_SOFT_RESET, - LAN8814_QSGMII_SOFT_RESET_BIT, - LAN8814_QSGMII_SOFT_RESET_BIT); - + if (phy_package_probe_once(phydev)) { err = lan8814_release_coma_mode(phydev); if (err) return err; -- cgit v1.2.3 From 3744b0964d5267c0b651bcd8f8c25db6bf4ccbac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 29 Apr 2026 17:46:48 +0200 Subject: ipv6: Implement limits on extension header parsing ipv6_{skip_exthdr,find_hdr}() and ip6_{tnl_parse_tlv_enc_lim, protocol_deliver_rcu}() iterate over IPv6 extension headers until they find a non-extension-header protocol or run out of packet data. The loops have no iteration counter, relying solely on the packet length to bound them. For a crafted packet with 8-byte extension headers filling a 64KB jumbogram, this means a worst case of up to ~8k iterations with a skb_header_pointer call each. ipv6_skip_exthdr(), for example, is used where it parses the inner quoted packet inside an incoming ICMPv6 error: - icmpv6_rcv - checksum validation - case ICMPV6_DEST_UNREACH - icmpv6_notify - pskb_may_pull() <- pull inner IPv6 header - ipv6_skip_exthdr() <- iterates here - pskb_may_pull() - ipprot->err_handler() <- sk lookup The per-iteration cost of ipv6_skip_exthdr itself is generally light, but skb_header_pointer becomes more costly on reassembled packets: the first ~1232 bytes of the inner packet are in the skb's linear area, but the remaining ~63KB are in the frag_list where skb_copy_bits is needed to read data. Initially, the idea was to add a configurable limit via a new sysctl knob with default 8, in line with knobs from commit 47d3d7ac656a ("ipv6: Implement limits on Hop-by-Hop and Destination options"), but two reasons eventually argued against it: - It adds to UAPI that needs to be maintained forever, and upcoming work is restricting extension header ordering anyway, leaving little reason for another sysctl knob - exthdrs_core.c is always built-in even when CONFIG_IPV6=n, where struct net has no .ipv6 member, so the read site would need an ifdef'd fallback to a constant anyway Therefore, just use a constant (IP6_MAX_EXT_HDRS_CNT). All four extension header walking functions are now bound by this limit. Note that the check in ip6_protocol_deliver_rcu() happens right before the goto resubmit, such that we don't have to have a test for ipv6_ext_hdr() in the fast-path. There's an ongoing IETF draft-iurman-6man-eh-occurrences to enforce IPv6 extension headers ordering and occurrence. The latter also discusses security implications. As per RFC8200 section 4.1, the occurrence rules for extension headers provide a practical upper bound which is 8. In order to be conservative, let's define IP6_MAX_EXT_HDRS_CNT as 12 to leave enough room for quirky setups. In the unlikely event that this is still not enough, then we might need to reconsider a sysctl. Signed-off-by: Daniel Borkmann Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Reviewed-by: Justin Iurman Link: https://patch.msgid.link/20260429154648.809751-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 6 ++++++ include/net/ipv6.h | 3 +++ net/ipv6/exthdrs_core.c | 7 +++++++ net/ipv6/ip6_input.c | 5 +++++ net/ipv6/ip6_tunnel.c | 4 ++++ 5 files changed, 25 insertions(+) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index e0ca3904ff8e..2f312d1f67d6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -99,6 +99,7 @@ FN(FRAG_TOO_FAR) \ FN(TCP_MINTTL) \ FN(IPV6_BAD_EXTHDR) \ + FN(IPV6_TOO_MANY_EXTHDRS) \ FN(IPV6_NDISC_FRAG) \ FN(IPV6_NDISC_HOP_LIMIT) \ FN(IPV6_NDISC_BAD_CODE) \ @@ -494,6 +495,11 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_MINTTL, /** @SKB_DROP_REASON_IPV6_BAD_EXTHDR: Bad IPv6 extension header. */ SKB_DROP_REASON_IPV6_BAD_EXTHDR, + /** + * @SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS: Number of IPv6 extension + * headers in the packet exceeds IP6_MAX_EXT_HDRS_CNT. + */ + SKB_DROP_REASON_IPV6_TOO_MANY_EXTHDRS, /** @SKB_DROP_REASON_IPV6_NDISC_FRAG: invalid frag (suppress_frag_ndisc). */ SKB_DROP_REASON_IPV6_NDISC_FRAG, /** @SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT: invalid hop limit. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d042afe7a245..1dec81faff28 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -90,6 +90,9 @@ struct ip_tunnel_info; #define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ #define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ +/* Hard limit on traversed IPv6 extension headers */ +#define IP6_MAX_EXT_HDRS_CNT 12 + /* * Addr type * diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 49e31e4ae7b7..9d06d487e8b1 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -73,6 +73,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp) { u8 nexthdr = *nexthdrp; + int exthdr_cnt = 0; *frag_offp = 0; @@ -82,6 +83,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -1; @@ -190,6 +193,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; + int exthdr_cnt = 0; bool found; if (fragoff) @@ -216,6 +220,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -ENOENT; } + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + return -EBADMSG; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (!hp) return -EBADMSG; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 967b07aeb683..8972863c93ee 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -403,6 +403,7 @@ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, bool have_final) { + int exthdr_cnt = IP6CB(skb)->flags & IP6SKB_HOPBYHOP ? 1 : 0; const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; @@ -487,6 +488,10 @@ resubmit_final: nexthdr = ret; goto resubmit_final; } else { + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) { + SKB_DR_SET(reason, IPV6_TOO_MANY_EXTHDRS); + goto discard; + } goto resubmit; } } else if (ret == 0) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c468c83af0f2..9d1037ac082f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -399,11 +399,15 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); u8 nexthdr = ipv6h->nexthdr; + int exthdr_cnt = 0; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; u16 optlen; + if (unlikely(exthdr_cnt++ >= IP6_MAX_EXT_HDRS_CNT)) + break; + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; -- cgit v1.2.3 From 26ebd12e67bfc3543d77ce586c33ef29fcafab20 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 29 Apr 2026 16:19:30 +0800 Subject: net: enetc: fix VSI mailbox timeout handling and DMA lifecycle In the current VSI mailbox implementation, the VSI allocates a DMA buffer to store the message sent to the PSI. When the PSI receives the message request from the VSI, the hardware copies the message data from this DMA buffer to PSI's DMA buffer for processing. When enetc_msg_vsi_send() times out, two scenarios can occur: 1) Use-after-free: If the hardware hasn't completed message copying when the VSI frees the buffer, the hardware may subsequently copy the data from freed memory to PSI's DMA buffer. 2) Message race: If PSI hasn't processed the previous message when the next message is sent, the VSI may receive the previous message's reply, leading to incorrect handling. To address these issues, implement the following changes: - Check the mailbox busy status before sending a new message. If the mailbox is in busy state, it indicates the previous message is still being processed, so return an error immediately. - Add the 'msg' field to struct enetc_si to preserve the DMA buffer information. The caller of enetc_msg_vsi_send() no longer frees the DMA buffer. Instead, defer freeing until it is safe to do so (when mailbox is not busy on next send). - Add cleanup in enetc_vf_remove() to free the last message buffer. This ensures the DMA buffer remains valid during message copying and prevents message reply mismatches. Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260429081930.3259824-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.h | 1 + drivers/net/ethernet/freescale/enetc/enetc_vf.c | 42 ++++++++++++++++++++----- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index e663bb5e614e..e691144e8756 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -330,6 +330,7 @@ struct enetc_si { struct workqueue_struct *workqueue; struct work_struct rx_mode_task; struct dentry *debugfs_root; + struct enetc_msg_swbd msg; /* Only valid for VSI */ }; #define ENETC_SI_ALIGN 32 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 6c4b374bcb0e..df8e95cc47d0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -17,11 +17,36 @@ static void enetc_msg_vsi_write_msg(struct enetc_hw *hw, enetc_wr(hw, ENETC_VSIMSGSNDAR0, val); } +static void enetc_msg_dma_free(struct device *dev, struct enetc_msg_swbd *msg) +{ + if (msg->vaddr) { + dma_free_coherent(dev, msg->size, msg->vaddr, msg->dma); + msg->vaddr = NULL; + } +} + static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) { + struct device *dev = &si->pdev->dev; int timeout = 100; u32 vsimsgsr; + /* The VSI mailbox may be busy if last message was not yet processed + * by PSI. So need to check the mailbox status before sending. + */ + vsimsgsr = enetc_rd(&si->hw, ENETC_VSIMSGSR); + if (vsimsgsr & ENETC_VSIMSGSR_MB) { + /* It is safe to free the DMA buffer here, the caller does + * not access the DMA buffer if enetc_msg_vsi_send() fails. + */ + enetc_msg_dma_free(dev, msg); + dev_err(dev, "VSI mailbox is busy\n"); + return -EIO; + } + + /* Free the DMA buffer of the last message */ + enetc_msg_dma_free(dev, &si->msg); + si->msg = *msg; enetc_msg_vsi_write_msg(&si->hw, msg); do { @@ -32,12 +57,15 @@ static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) usleep_range(1000, 2000); } while (--timeout); - if (!timeout) + if (!timeout) { + dev_err(dev, "VSI mailbox timeout\n"); + return -ETIMEDOUT; + } /* check for message delivery error */ if (vsimsgsr & ENETC_VSIMSGSR_MS) { - dev_err(&si->pdev->dev, "VSI command execute error: %d\n", + dev_err(dev, "VSI command execute error: %d\n", ENETC_SIMSGSR_GET_MC(vsimsgsr)); return -EIO; } @@ -50,7 +78,6 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, { struct enetc_msg_cmd_set_primary_mac *cmd; struct enetc_msg_swbd msg; - int err; msg.size = ALIGN(sizeof(struct enetc_msg_cmd_set_primary_mac), 64); msg.vaddr = dma_alloc_coherent(priv->dev, msg.size, &msg.dma, @@ -67,11 +94,7 @@ static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, memcpy(&cmd->mac, saddr, sizeof(struct sockaddr)); /* send the command and wait */ - err = enetc_msg_vsi_send(priv->si, &msg); - - dma_free_coherent(priv->dev, msg.size, msg.vaddr, msg.dma); - - return err; + return enetc_msg_vsi_send(priv->si, &msg); } static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr) @@ -259,6 +282,7 @@ static void enetc_vf_remove(struct pci_dev *pdev) { struct enetc_si *si = pci_get_drvdata(pdev); struct enetc_ndev_priv *priv; + struct enetc_msg_swbd msg; priv = netdev_priv(si->ndev); unregister_netdev(si->ndev); @@ -270,7 +294,9 @@ static void enetc_vf_remove(struct pci_dev *pdev) free_netdev(si->ndev); + msg = si->msg; enetc_pci_remove(pdev); + enetc_msg_dma_free(&pdev->dev, &msg); } static const struct pci_device_id enetc_vf_id_table[] = { -- cgit v1.2.3 From 694de316f607fe2473d52ca0707e3918e72c1562 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 29 Apr 2026 16:37:42 +0800 Subject: net: libwx: fix VF illegal register access Register WX_CFG_PORT_ST is a PF restricted register. When a VF is initialized, attempting to read this register triggers an illegal register access, which lead to a system hang. When the device is VF, the bus function ID can be obtained directly from the PCI_FUNC(pdev->devfn). Fixes: a04ea57aae37 ("net: libwx: fix device bus LAN ID") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/4D1F4452D21DE107+20260429083743.88961-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index d3772d01e00b..2451f6b20b11 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2480,8 +2480,11 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, - rd32(wx, WX_CFG_PORT_ST)); + if (pdev->is_virtfn) + wx->bus.func = PCI_FUNC(pdev->devfn); + else + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { -- cgit v1.2.3 From 7a33345153eeeda195c55f15be27074e4c3b5109 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 29 Apr 2026 16:37:43 +0800 Subject: net: libwx: use request_irq for VF misc interrupt Currently, request_threaded_irq() is used with a primary handler but a NULL threaded handler, while also setting the IRQF_ONESHOT flag. This specific combination triggers a WARNING since the commit aef30c8d569c ("genirq: Warn about using IRQF_ONESHOT without a threaded handler"). WARNING: kernel/irq/manage.c:1502 at __setup_irq+0x4fa/0x760 Fix the issue by switching to request_irq(), which is the appropriate interface or a non-threaded interrupt handler, and removing the unnecessary IRQF_ONESHOT flag. Fixes: eb4898fde1de ("net: libwx: add wangxun vf common api") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/786DDC7D5CCA6D0A+20260429083743.88961-2-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_vf_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c index 29cdbed2e5ec..94ff8f5f0b4c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c @@ -99,8 +99,8 @@ int wx_request_msix_irqs_vf(struct wx *wx) } } - err = request_threaded_irq(wx->msix_entry->vector, wx_msix_misc_vf, - NULL, IRQF_ONESHOT, netdev->name, wx); + err = request_irq(wx->msix_entry->vector, wx_msix_misc_vf, + 0, netdev->name, wx); if (err) { wx_err(wx, "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; -- cgit v1.2.3 From 75df490c9e8457990c8b227650f6491218ce018b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 29 Apr 2026 14:02:31 +0200 Subject: net: airoha: Move entries to queue head in case of DMA mapping failure in airoha_dev_xmit() In order to respect the original descriptor order and avoid any potential IOMMU fault or memory corruption, move pending queue entries to the head of hw queue tx_list if the DMA mapping of current inflight packet fails in airoha_dev_xmit routine. Fixes: 3f47e67dff1f7 ("net: airoha: Add the capability to consume out-of-order DMA tx descriptors") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260429-airoha-xmit-unmap-error-path-v2-1-32e43b7c6d25@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index f8b3d53bccad..d0c0c0ec8a80 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2120,14 +2120,12 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, return NETDEV_TX_OK; error_unmap: - while (!list_empty(&tx_list)) { - e = list_first_entry(&tx_list, struct airoha_queue_entry, - list); + list_for_each_entry(e, &tx_list, list) { dma_unmap_single(dev->dev.parent, e->dma_addr, e->dma_len, DMA_TO_DEVICE); e->dma_addr = 0; - list_move_tail(&e->list, &q->tx_list); } + list_splice(&tx_list, &q->tx_list); spin_unlock_bh(&q->lock); error: -- cgit v1.2.3 From aaadccde312f1f6c752461e015adcaa25d463cbc Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:13 +0530 Subject: octeontx2-af: npc: cn20k: Propagate MCAM key-type errors on cn20k npc_mcam_idx_2_key_type() can fail; callers used to ignore it and still used kw_type when enabling, configuring, copying, and reading MCAM entries. That could program or decode hardware with an undefined key type. Return -EINVAL when key-type lookup fails. Return -EINVAL from npc_cn20k_copy_mcam_entry() when src and dest key types differ instead of failing silently. Change npc_cn20k_{enable,config,copy,read}_mcam_entry() to return int on success or error. Thread those errors through the cn20k MCAM write and read mbox handlers, the cn20k baseline steer read path, NPC defrag move (disable/copy/enable with dev_err and -EFAULT), and the DMAC update path in rvu_npc_fs.c. Make npc_copy_mcam_entry() return int so the cn20k branch can return npc_cn20k_copy_mcam_entry() without a void/int mismatch, and fail NPC_MCAM_SHIFT_ENTRY when copy fails. Cc: Suman Ghosh Cc: Dan Carpenter Fixes: 6d1e70282f76 ("octeontx2-af: npc: cn20k: Use common APIs") Link: https://lore.kernel.org/netdev/adiQJvuKlEhq2ILx@stanley.mountain/ Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-2-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 122 +++++++++++++++------ .../net/ethernet/marvell/octeontx2/af/cn20k/npc.h | 20 ++-- .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 18 ++- .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 20 ++-- 4 files changed, 124 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 7291fdb89b03..7170dcf26200 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -798,7 +798,7 @@ program_mkex_extr: iounmap(mkex_prfl_addr); } -void +int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, int index, bool enable) { @@ -808,7 +808,9 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, u64 cfg, hw_prio; u8 kw_type; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; + if (kw_type == NPC_MCAM_KEY_X2) { cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, @@ -819,7 +821,7 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), cfg); - return; + return 0; } /* For NPC_CN20K_MCAM_KEY_X4 keys, both the banks @@ -836,6 +838,8 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), cfg); } + + return 0; } void @@ -1042,9 +1046,9 @@ npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, } } -void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio, u8 req_kw_type) +int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type) { struct npc_mcam *mcam = &rvu->hw->mcam; int mcam_idx = index % mcam->banksize; @@ -1052,10 +1056,13 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, int kw = 0; u8 kw_type; + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; + /* Disable before mcam entry update */ - npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false)) + return -EINVAL; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); /* CAM1 takes the comparison value and * CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'. * CAM1 = 0 & CAM0 = 1 => match if key = 0 @@ -1120,9 +1127,11 @@ set_cfg: /* PF installing VF rule */ npc_cn20k_set_mcam_bank_cfg(rvu, blkaddr, mcam_idx, bank, kw_type, enable, hw_prio); + + return 0; } -void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) +int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cfg, sreg, dreg, soff, doff; @@ -1132,10 +1141,15 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) dbank = npc_get_bank(mcam, dest); sbank = npc_get_bank(mcam, src); - npc_mcam_idx_2_key_type(rvu, src, &src_kwtype); - npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype); + + if (npc_mcam_idx_2_key_type(rvu, src, &src_kwtype)) + return -EINVAL; + + if (npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype)) + return -EINVAL; + if (src_kwtype != dest_kwtype) - return; + return -EINVAL; src &= (mcam->banksize - 1); dest &= (mcam->banksize - 1); @@ -1170,6 +1184,8 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) if (src_kwtype == NPC_MCAM_KEY_X2) break; } + + return 0; } static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, @@ -1179,16 +1195,17 @@ static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, entry->kw_mask[idx] = cam1 ^ cam0; } -void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct cn20k_mcam_entry *entry, - u8 *intf, u8 *ena, u8 *hw_prio) +int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct cn20k_mcam_entry *entry, + u8 *intf, u8 *ena, u8 *hw_prio) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cam0, cam1, bank_cfg, cfg; int kw = 0, bank; u8 kw_type; - npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) + return -EINVAL; bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -1298,6 +1315,8 @@ read_action: cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 1)); entry->vtag_action = cfg; + + return 0; } int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, @@ -1335,11 +1354,10 @@ int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, if (is_pffunc_af(req->hdr.pcifunc)) nix_intf = req->intf; - npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, - &req->entry_data, req->enable_entry, - req->hw_prio, req->req_kw_type); + rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio, req->req_kw_type); - rc = 0; exit: mutex_unlock(&mcam->lock); return rc; @@ -1361,11 +1379,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_read_entry(struct rvu *rvu, mutex_lock(&mcam->lock); rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); - if (!rc) - npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, - &rsp->entry_data, &rsp->intf, - &rsp->enable, &rsp->hw_prio); + if (rc) + goto fail; + rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, + &rsp->entry_data, &rsp->intf, + &rsp->enable, &rsp->hw_prio); +fail: mutex_unlock(&mcam->lock); return rc; } @@ -1375,11 +1395,13 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, struct npc_mcam_alloc_and_write_entry_rsp *rsp) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npc_mcam_free_entry_req free_req = { 0 }; struct npc_mcam_alloc_entry_req entry_req; struct npc_mcam_alloc_entry_rsp entry_rsp; struct npc_mcam *mcam = &rvu->hw->mcam; u16 entry = NPC_MCAM_ENTRY_INVALID; - int blkaddr, rc; + struct msg_rsp free_rsp; + int blkaddr, rc, err; u8 nix_intf; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -1415,12 +1437,23 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, - &req->entry_data, req->enable_entry, - req->hw_prio, req->req_kw_type); + rc = npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio, req->req_kw_type); mutex_unlock(&mcam->lock); + if (rc) { + free_req.hdr.pcifunc = req->hdr.pcifunc; + free_req.entry = entry_rsp.entry; + err = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &free_rsp); + if (err) + dev_err(rvu->dev, + "%s: Error to free mcam idx %u\n", + __func__, entry_rsp.entry); + return rc; + } + rsp->entry = entry_rsp.entry; return 0; } @@ -1480,9 +1513,9 @@ int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, read_entry: /* Read the mcam entry */ - npc_cn20k_read_mcam_entry(rvu, blkaddr, index, - &rsp->entry, &intf, - &enable, &hw_prio); + rc = npc_cn20k_read_mcam_entry(rvu, blkaddr, index, + &rsp->entry, &intf, + &enable, &hw_prio); mutex_unlock(&mcam->lock); out: return rc; @@ -3607,9 +3640,30 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(midx, bank)); - npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false); - npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx); - npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true); + /* If bug happened during copy/enable mcam, then there is a bug in allocation + * algorithm itself. There is no point in rewinding and returning, as it + * will face further issue. Return error after printing error + */ + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false)) { + dev_err(rvu->dev, + "%s: Error happened while disabling old_mid=%u\n", + __func__, old_midx); + return -EFAULT; + } + + if (npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx)) { + dev_err(rvu->dev, + "%s: Error happened while copying old_midx=%u new_midx=%u\n", + __func__, old_midx, new_midx); + return -EFAULT; + } + + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true)) { + dev_err(rvu->dev, + "%s: Error happened while enabling new_mid=%u\n", + __func__, new_midx); + return -EFAULT; + } midx = new_midx % mcam->banksize; bank = new_midx / mcam->banksize; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 815d0b257a7e..8f3eea9cfb1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -320,16 +320,16 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc); int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast); -void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio, u8 req_kw_type); -void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, - int index, bool enable); -void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, - u16 src, u16 dest); -void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct cn20k_mcam_entry *entry, u8 *intf, - u8 *ena, u8 *hw_prio); +int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type); +int npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, + int index, bool enable); +int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, + u16 src, u16 dest); +int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct cn20k_mcam_entry *entry, u8 *intf, + u8 *ena, u8 *hw_prio); void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index); int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index c2ca5ed1d028..ecaf0946b852 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -241,7 +241,10 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, if (index < 0 || index >= mcam->banksize * mcam->banks) return; - return npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) + dev_err(rvu->dev, "Error to %s mcam %u entry\n", + enable ? "enable" : "disable", index); + return; } index &= (mcam->banksize - 1); @@ -589,8 +592,8 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CFG(src, sbank)) & 1; } -static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, - int blkaddr, u16 src, u16 dest) +static int npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, u16 src, u16 dest) { int dbank = npc_get_bank(mcam, dest); int sbank = npc_get_bank(mcam, src); @@ -630,6 +633,7 @@ static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, NPC_AF_MCAMEX_BANKX_CFG(src, sbank)); rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CFG(dest, dbank), cfg); + return 0; } u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, @@ -3266,7 +3270,10 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, npc_enable_mcam_entry(rvu, mcam, blkaddr, new_entry, false); /* Copy rule from old entry to new entry */ - npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry); + if (npc_copy_mcam_entry(rvu, mcam, blkaddr, old_entry, new_entry)) { + rc = NPC_MCAM_INVALID_REQ; + break; + } /* Copy counter mapping, if any */ cntr = mcam->entry2cntr_map[old_entry]; @@ -3284,7 +3291,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, /* If shift has failed then report the failed index */ if (index != req->shift_count) { - rc = NPC_MCAM_PERM_DENIED; + if (!rc) + rc = NPC_MCAM_PERM_DENIED; rsp->failed_entry_idx = index; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index b45798d9fdab..fe10554b1f0e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1980,13 +1980,15 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); - if (is_cn20k(rvu->pdev)) - npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, - cn20k_entry, &intf, - &enable, &hw_prio); - else + if (is_cn20k(rvu->pdev)) { + if (npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, + cn20k_entry, &intf, + &enable, &hw_prio)) + return -EINVAL; + } else { npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, entry, &intf, &enable); + } npc_update_entry(rvu, NPC_DMAC, &mdata, ether_addr_to_u64(pfvf->mac_addr), 0, @@ -2038,8 +2040,12 @@ void npc_mcam_enable_flows(struct rvu *rvu, u16 target) continue; } - if (rule->vfvlan_cfg) - npc_update_dmac_value(rvu, blkaddr, rule, pfvf); + if (rule->vfvlan_cfg) { + if (npc_update_dmac_value(rvu, blkaddr, rule, pfvf)) + dev_err(rvu->dev, + "Update dmac failed for %u, target=%#x\n", + rule->entry, target); + } if (rule->rx_action.op == NIX_RX_ACTION_DEFAULT) { if (!def_ucast_rule) -- cgit v1.2.3 From 1100af13fd14b523f1b0634c14be497b41c78958 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:14 +0530 Subject: octeontx2-af: npc: cn20k: Drop debugfs_create_file() error checks in init debugfs is not intended to be checked for allocation failures the way other kernel APIs are: callers should not fail probe or subsystem init because a debugfs node could not be created, including when debugfs is disabled in Kconfig. Replacing NULL checks with IS_ERR() checks is similarly wrong for optional debugfs. Remove dentry checks and -EFAULT returns from npc_cn20k_debugfs_init(). See: https://staticthinking.wordpress.com/2023/07/24/ debugfs-functions-are-not-supposed-to-be-checked/ Cc: Dan Carpenter Fixes: 528530dff56b ("octeontx2-af: npc: cn20k: add debugfs support") Link: https://lore.kernel.org/netdev/adjNGPWKMOk3KgWL@stanley.mountain/ Reviewed-by: Simon Horman Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-3-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/debugfs.c | 33 +++++++--------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c index 3debf2fae1a4..6f13296303cb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -249,34 +249,21 @@ DEFINE_SHOW_ATTRIBUTE(npc_defrag); int npc_cn20k_debugfs_init(struct rvu *rvu) { struct npc_priv_t *npc_priv = npc_priv_get(); - struct dentry *npc_dentry; - npc_dentry = debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_mcam_layout_fops); + debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_mcam_layout_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, + rvu, &npc_mcam_default_fops); - npc_dentry = debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, - rvu, &npc_mcam_default_fops); + debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_vidx2idx_map_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_idx2vidx_map_fops); - npc_dentry = debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_vidx2idx_map_fops); - if (!npc_dentry) - return -EFAULT; - - npc_dentry = debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_idx2vidx_map_fops); - if (!npc_dentry) - return -EFAULT; - - npc_dentry = debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, - npc_priv, &npc_defrag_fops); - if (!npc_dentry) - return -EFAULT; + debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_defrag_fops); return 0; } -- cgit v1.2.3 From adb5ff41efbc0a9d86fabf880076973379db6e49 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:15 +0530 Subject: octeontx2-af: npc: cn20k: Propagate errors in defrag MCAM alloc rollback npc_defrag_alloc_free_slots() allocates MCAM indexes in up to two passes on bank0 then bank1. On failure it rolls back by freeing entries already placed in save[]. __npc_subbank_alloc() can return a negative errno while only part of the indexes are valid. The rollback loop used rc for npc_mcam_idx_2_subbank_idx() as well, so a successful lookup stored zero in rc and a later __npc_subbank_free() failure could still end with return 0 when the allocation path had also left rc at zero (for example shortfall after zero return values from the alloc helpers). Jump to the rollback path immediately when either __npc_subbank_alloc() call fails, preserving its errno. If both calls succeed but the total allocated count is still less than cnt, set rc to -ENOSPC before rollback. Use a separate err variable for npc_mcam_idx_2_subbank_idx() so a successful lookup no longer clears a non-zero rc from the allocation phase. Cc: Dan Carpenter Fixes: 645c6e3c1999 ("octeontx2-af: npc: cn20k: virtual index support") Link: https://lore.kernel.org/netdev/adjNJEpILRZATB2N@stanley.mountain/ Reviewed-by: Simon Horman Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-4-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 7170dcf26200..87da43088b67 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -2338,6 +2338,7 @@ err2: __npc_subbank_mark_free(rvu, sb); err1: kfree(save); + *alloc_cnt = 0; return rc; } @@ -3515,7 +3516,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, { int alloc_cnt1, alloc_cnt2; struct npc_subbank *sb; - int rc, sb_off, i; + int rc, sb_off, i, err; bool deleted; sb = &npc_priv.sb[f->idx]; @@ -3529,6 +3530,7 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, NPC_MCAM_LOWER_PRIO, false, cnt, save, cnt, true, &alloc_cnt1); + if (alloc_cnt1 < cnt) { rc = __npc_subbank_alloc(rvu, sb, NPC_MCAM_KEY_X2, sb->b1b, @@ -3544,15 +3546,17 @@ static int npc_defrag_alloc_free_slots(struct rvu *rvu, dev_err(rvu->dev, "%s: Failed to alloc cnt=%u alloc_cnt1=%u alloc_cnt2=%u\n", __func__, cnt, alloc_cnt1, alloc_cnt2); + rc = -ENOSPC; goto fail_free_alloc; } + return 0; fail_free_alloc: for (i = 0; i < alloc_cnt1 + alloc_cnt2; i++) { - rc = npc_mcam_idx_2_subbank_idx(rvu, save[i], - &sb, &sb_off); - if (rc) { + err = npc_mcam_idx_2_subbank_idx(rvu, save[i], + &sb, &sb_off); + if (err) { dev_err(rvu->dev, "%s: Error to find subbank for mcam idx=%u\n", __func__, save[i]); -- cgit v1.2.3 From d7e5940c4c508df73b15d9bc29628a83b3674fff Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:16 +0530 Subject: octeontx2-af: npc: cn20k: Fix target map and rule npc_defrag_move_vdx_to_free() disables, copies, and enables the MCAM entry at a new index but previously left entry2target_pffunc[] and the mcam_rules list still keyed to the old index. Copy the target PF association to the new slot, clear the old one, and retarget the rule entry so software state matches the relocated hardware context. Fixes: 645c6e3c1999 ("octeontx2-af: npc: cn20k: virtual index support") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-5-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 87da43088b67..70ce3f49adc1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -3602,9 +3602,10 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, struct npc_defrag_node *v, int cnt, u16 *save) { + u16 new_midx, old_midx, vidx, target_pf; struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_npc_mcam_rule *rule, *tmp; int i, vidx_cnt, rc, sb_off; - u16 new_midx, old_midx, vidx; struct npc_subbank *sb; bool deleted; u16 pcifunc; @@ -3723,8 +3724,21 @@ int npc_defrag_move_vdx_to_free(struct rvu *rvu, mcam->entry2pfvf_map[new_midx] = pcifunc; /* Counter is not preserved */ mcam->entry2cntr_map[new_midx] = new_midx; + target_pf = mcam->entry2target_pffunc[old_midx]; + mcam->entry2target_pffunc[new_midx] = target_pf; + mcam->entry2target_pffunc[old_midx] = NPC_MCAM_INVALID_MAP; + npc_mcam_set_bit(mcam, new_midx); + /* Note: list order is not functionally required for mcam_rules */ + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + if (rule->entry != old_midx) + continue; + + rule->entry = new_midx; + break; + } + /* Mark as invalid */ v->vidx[vidx_cnt - i - 1] = -1; save[cnt - i - 1] = -1; -- cgit v1.2.3 From d2dabf09632c84b7acdc0fb2eeb6b6fe9c0f9106 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:17 +0530 Subject: octeontx2-af: npc: cn20k: Clear MCAM entries by index and key width Replace the old four-argument CN20K MCAM clear with a per-bank static helper and npc_cn20k_clear_mcam_entry() that takes a logical MCAM index, resolves the key width via npc_mcam_idx_2_key_type(), and clears either one bank (X2) or every bank (X4). Call it from npc_clear_mcam_entry() on cn20k and log when key-type lookup fails. Use the per-bank helper from npc_cn20k_config_mcam_entry() for pre-program clears. For loopback VFs, use the promisc MCAM index as ucast_idx when copying RSS action for promisc, matching cn20k default-rule layout. Cc: Suman Ghosh Fixes: 6d1e70282f76 ("octeontx2-af: npc: cn20k: Use common APIs") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-6-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 37 +++++++++++++++++++--- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.h | 3 +- .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 17 ++++++++-- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 70ce3f49adc1..112c37c190b1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -842,8 +842,8 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, return 0; } -void -npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) +static void +npc_clear_x2_entry(struct rvu *rvu, int blkaddr, int bank, int index) { rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1), @@ -877,6 +877,33 @@ npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank), 0); } +int +npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int mcam_idx) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int bank = npc_get_bank(mcam, mcam_idx); + u8 kw_type; + int index; + + if (npc_mcam_idx_2_key_type(rvu, mcam_idx, &kw_type)) + return -EINVAL; + + index = mcam_idx & (mcam->banksize - 1); + + if (kw_type == NPC_MCAM_KEY_X2) { + npc_clear_x2_entry(rvu, blkaddr, bank, index); + return 0; + } + + /* For NPC_MCAM_KEY_X4 keys, both the banks + * need to be programmed with the same value. + */ + for (bank = 0; bank < mcam->banks_per_entry; bank++) + npc_clear_x2_entry(rvu, blkaddr, bank, index); + + return 0; +} + static void npc_cn20k_get_keyword(struct cn20k_mcam_entry *entry, int idx, u64 *cam0, u64 *cam1) { @@ -1071,7 +1098,7 @@ int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, */ if (kw_type == NPC_MCAM_KEY_X2) { /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_cn20k_clear_mcam_entry(rvu, blkaddr, bank, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, bank, mcam_idx); npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, mcam_idx, intf, entry, bank, kw_type, kw, req_kw_type); @@ -1096,8 +1123,8 @@ int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, } /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_cn20k_clear_mcam_entry(rvu, blkaddr, 0, mcam_idx); - npc_cn20k_clear_mcam_entry(rvu, blkaddr, 1, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, 0, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, 1, mcam_idx); npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, mcam_idx, intf, entry, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 8f3eea9cfb1d..2f761b97f91b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -330,8 +330,7 @@ int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, struct cn20k_mcam_entry *entry, u8 *intf, u8 *ena, u8 *hw_prio); -void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, - int bank, int index); +int npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int index); int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); u16 npc_cn20k_vidx2idx(u16 index); u16 npc_cn20k_idx2vidx(u16 idx); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index ecaf0946b852..44ca65efc80f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -261,6 +261,13 @@ static void npc_clear_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int bank = npc_get_bank(mcam, index); int actbank = bank; + if (is_cn20k(rvu->pdev)) { + if (npc_cn20k_clear_mcam_entry(rvu, blkaddr, index)) + dev_err(rvu->dev, "%s Failed to clear mcam %u\n", + __func__, index); + return; + } + index &= (mcam->banksize - 1); for (; bank < (actbank + mcam->banks_per_entry); bank++) { rvu_write64(rvu, blkaddr, @@ -755,9 +762,15 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, /* If the corresponding PF's ucast action is RSS, * use the same action for promisc also + * Please note that for lbk(s) "index" and "ucast_idx" + * will be same. */ - ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); + if (is_lbk_vf(rvu, pcifunc)) + ucast_idx = index; + else + ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_UCAST_ENTRY); + if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); -- cgit v1.2.3 From 2b6d6bb7282c34dd8c04ee782393231acf5a26e2 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:18 +0530 Subject: octeontx2-af: npc: cn20k: Fix bank value For X4 keys its loop reused the bank parameter as the loop counter, so bank no longer reflected the caller's bank after the loop and the control flow was hard to follow. Program NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT directly in npc_cn20k_config_mcam_entry(): one CFG write for X2 using the computed bank, and one CFG write per bank inside the X4 action loop. Enable the entry at the end with npc_cn20k_enable_mcam_entry(..., true) instead of embedding the enable bit in bank_cfg via the removed helper. Cc: Suman Ghosh Fixes: 4e527f1e5c15 ("octeontx2-af: npc: cn20k: Add new mailboxes for CN20K silicon") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-7-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 92 +++++++++------------- 1 file changed, 37 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 112c37c190b1..4773277fd409 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -1045,34 +1045,6 @@ static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam, kw, req_kw_type); } -static void -npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, - int bank, u8 kw_type, bool enable, u8 hw_prio) -{ - struct npc_mcam *mcam = &rvu->hw->mcam; - u64 bank_cfg; - - bank_cfg = (u64)hw_prio << 24; - if (enable) - bank_cfg |= 0x1; - - if (kw_type == NPC_MCAM_KEY_X2) { - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), - bank_cfg); - return; - } - - /* For NPC_MCAM_KEY_X4 keys, both the banks - * need to be programmed with the same value. - */ - for (bank = 0; bank < mcam->banks_per_entry; bank++) { - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), - bank_cfg); - } -} - int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, u8 intf, struct cn20k_mcam_entry *entry, bool enable, u8 hw_prio, u8 req_kw_type) @@ -1080,6 +1052,7 @@ int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, struct npc_mcam *mcam = &rvu->hw->mcam; int mcam_idx = index % mcam->banksize; int bank = index / mcam->banksize; + u64 bank_cfg = (u64)hw_prio << 24; int kw = 0; u8 kw_type; @@ -1119,41 +1092,50 @@ int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, bank, 1), entry->vtag_action); - goto set_cfg; - } - /* Clear mcam entry to avoid writes being suppressed by NPC */ - npc_clear_x2_entry(rvu, blkaddr, 0, mcam_idx); - npc_clear_x2_entry(rvu, blkaddr, 1, mcam_idx); - - npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, - mcam_idx, intf, entry, - kw_type, req_kw_type); - for (bank = 0; bank < mcam->banks_per_entry; bank++) { - /* Set 'action' */ + /* Set HW priority */ rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 0), - entry->action); + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); - /* Set TAG 'action' */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 1), - entry->vtag_action); + } else { + /* Clear mcam entry to avoid writes being suppressed by NPC */ + npc_clear_x2_entry(rvu, blkaddr, 0, mcam_idx); + npc_clear_x2_entry(rvu, blkaddr, 1, mcam_idx); - /* Set 'action2' for inline receive */ - rvu_write64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, - bank, 2), - entry->action2); + npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, + mcam_idx, intf, entry, + kw_type, req_kw_type); + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + /* Set 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 0), + entry->action); + + /* Set TAG 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 1), + entry->vtag_action); + + /* Set 'action2' for inline receive */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 2), + entry->action2); + + /* Set HW priority */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); + } } -set_cfg: /* TODO: */ /* PF installing VF rule */ - npc_cn20k_set_mcam_bank_cfg(rvu, blkaddr, mcam_idx, bank, - kw_type, enable, hw_prio); + if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) + return -EINVAL; return 0; } -- cgit v1.2.3 From f6803eb070bfb9a5114d16ae15053106bc7842ae Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:19 +0530 Subject: octeontx2-af: npc: cn20k: Fix MCAM actions read npc_cn20k_read_mcam_entry() always reloaded action and vtag_action from bank 0 after programming the CAM words. Use the bank returned by npc_get_bank() for the ACTION reads as well, and read those registers once up front so both X2 and X4 paths share the same metadata. Return directly from the X2 keyword path now that the action fields are already populated. Cc: Suman Ghosh Fixes: 6d1e70282f76 ("octeontx2-af: npc: cn20k: Use common APIs") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-8-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 26 +++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 4773277fd409..bb0a9ac7aab3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -1219,6 +1219,18 @@ int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0)); + entry->action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 1)); + entry->vtag_action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 2)); + entry->action2 = cfg; + cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1)) & 3; @@ -1268,7 +1280,7 @@ int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, bank, 0)); npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); - goto read_action; + return 0; } for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) { @@ -1313,18 +1325,6 @@ int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); } -read_action: - /* 'action' is set to same value for both bank '0' and '1'. - * Hence, reading bank '0' should be enough. - */ - cfg = rvu_read64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 0)); - entry->action = cfg; - - cfg = rvu_read64(rvu, blkaddr, - NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 1)); - entry->vtag_action = cfg; - return 0; } -- cgit v1.2.3 From afb474bd4ffc314de766afc295ac64b42856f48e Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:20 +0530 Subject: octeontx2-af: npc: cn20k: Initialize default-rule index outputs up front npc_cn20k_dft_rules_idx_get() wrote USHRT_MAX into individual outputs only on some error paths (lbk promisc lookup, VF ucast lookup, and the PF rule walk), which could leave other caller slots stale across retries. Set every non-NULL bcast/mcast/promisc/ucast pointer to USHRT_MAX once at entry, then drop the duplicate assignments on failure. Successful lookups still overwrite the relevant slot before returning. Fixes: 09d3b7a1403f ("octeontx2-af: npc: cn20k: Allocate default MCAM indexes") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-9-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index bb0a9ac7aab3..b3f34b84c114 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -4016,6 +4016,13 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, void *val; int i, j; + for (i = 0; i < ARRAY_SIZE(ptr); i++) { + if (!ptr[i]) + continue; + + *ptr[i] = USHRT_MAX; + } + if (!npc_priv.init_done) return 0; @@ -4031,7 +4038,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID], pcifunc); - *ptr[0] = USHRT_MAX; return -ESRCH; } @@ -4051,7 +4057,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID], pcifunc); - *ptr[3] = USHRT_MAX; return -ESRCH; } @@ -4071,7 +4076,6 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, __func__, npc_dft_rule_name[i], pcifunc); - *ptr[j] = USHRT_MAX; continue; } -- cgit v1.2.3 From 013717353c03b65f5b00a5cefa1515b6b45777b7 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:21 +0530 Subject: octeontx2-af: npc: cn20k: Tear down default MCAM rules explicitly on free npc_cn20k_dft_rules_free() used the NPC MCAM mbox "free all" path, which does not match how cn20k tracks default-rule MCAM slots indexes. Resolve the default-rule indices, then for each valid slot clear the bitmap entry, drop the PF/VF map, disable the MCAM line, clear the target function, and npc_cn20k_idx_free(). Remove any matching software mcam_rules nodes. On hard failure from idx_free, WARN and stop so the box stays up for analysis. In npc_mcam_free_all_entries(), prefetch the same default-rule indices and, on cn20k, skip bitmap clear and idx_free when the scanned entry is one of those reserved defaults (they are released by npc_cn20k_dft_rules_free). Fixes: 09d3b7a1403f ("octeontx2-af: npc: cn20k: Allocate default MCAM indexes") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-10-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 51 +++++++++++++++---- .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 59 +++++++++++++++------- 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index b3f34b84c114..1129565a01bd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -4178,11 +4178,11 @@ static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) { - struct npc_mcam_free_entry_req free_req = { 0 }; + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 ptr[4] = {[0 ... 3] = USHRT_MAX}; + struct rvu_npc_mcam_rule *rule, *tmp; unsigned long index; - struct msg_rsp rsp; - u16 ptr[4]; - int rc, i; + int blkaddr, rc, i; void *map; if (!npc_priv.init_done) @@ -4240,14 +4240,43 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) } free_rules: + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return; + for (int i = 0; i < 4; i++) { + if (ptr[i] == USHRT_MAX) + continue; - free_req.hdr.pcifunc = pcifunc; - free_req.all = 1; - rc = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); - if (rc) - dev_err(rvu->dev, - "%s: Error deleting default entries (pcifunc=%#x\n", - __func__, pcifunc); + mutex_lock(&mcam->lock); + npc_mcam_clear_bit(mcam, ptr[i]); + mcam->entry2pfvf_map[ptr[i]] = NPC_MCAM_INVALID_MAP; + npc_cn20k_enable_mcam_entry(rvu, blkaddr, ptr[i], false); + mcam->entry2target_pffunc[ptr[i]] = 0x0; + mutex_unlock(&mcam->lock); + + rc = npc_cn20k_idx_free(rvu, &ptr[i], 1); + if (rc) { + /* Non recoverable error. Let us WARN and return. Keep system alive to + * enable debugging + */ + WARN(1, "%s Error deleting default entries (pcifunc=%#x) mcam_idx=%u\n", + __func__, pcifunc, ptr[i]); + return; + } + } + + mutex_lock(&mcam->lock); + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { + for (int i = 0; i < 4; i++) { + if (ptr[i] != rule->entry) + continue; + + list_del(&rule->list); + kfree(rule); + break; + } + } + mutex_unlock(&mcam->lock); } int npc_cn20k_dft_rules_alloc(struct rvu *rvu, u16 pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 44ca65efc80f..5d349d131fdb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2521,33 +2521,58 @@ void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index) static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pcifunc) { + u16 dft_idxs[NPC_DFT_RULE_MAX_ID] = {[0 ... NPC_DFT_RULE_MAX_ID - 1] = USHRT_MAX}; + bool cn20k_dft_rl; u16 index, cntr; int rc; + npc_cn20k_dft_rules_idx_get(rvu, pcifunc, + &dft_idxs[NPC_DFT_RULE_BCAST_ID], + &dft_idxs[NPC_DFT_RULE_MCAST_ID], + &dft_idxs[NPC_DFT_RULE_PROMISC_ID], + &dft_idxs[NPC_DFT_RULE_UCAST_ID]); + /* Scan all MCAM entries and free the ones mapped to 'pcifunc' */ for (index = 0; index < mcam->bmap_entries; index++) { - if (mcam->entry2pfvf_map[index] == pcifunc) { + if (mcam->entry2pfvf_map[index] != pcifunc) + continue; + + cn20k_dft_rl = false; + + if (is_cn20k(rvu->pdev)) { + if (dft_idxs[NPC_DFT_RULE_BCAST_ID] == index || + dft_idxs[NPC_DFT_RULE_MCAST_ID] == index || + dft_idxs[NPC_DFT_RULE_PROMISC_ID] == index || + dft_idxs[NPC_DFT_RULE_UCAST_ID] == index) { + cn20k_dft_rl = true; + } + } + + /* Disable the entry */ + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); + + if (!cn20k_dft_rl) { mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; /* Free the entry in bitmap */ npc_mcam_clear_bit(mcam, index); - /* Disable the entry */ - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); - - /* Update entry2counter mapping */ - cntr = mcam->entry2cntr_map[index]; - if (cntr != NPC_MCAM_INVALID_MAP) - npc_unmap_mcam_entry_and_cntr(rvu, mcam, - blkaddr, index, - cntr); mcam->entry2target_pffunc[index] = 0x0; - if (is_cn20k(rvu->pdev)) { - rc = npc_cn20k_idx_free(rvu, &index, 1); - if (rc) - dev_err(rvu->dev, - "Failed to free mcam idx=%u pcifunc=%#x\n", - index, pcifunc); - } } + + /* Update entry2counter mapping */ + cntr = mcam->entry2cntr_map[index]; + if (cntr != NPC_MCAM_INVALID_MAP) + npc_unmap_mcam_entry_and_cntr(rvu, mcam, + blkaddr, index, + cntr); + + if (!is_cn20k(rvu->pdev) || cn20k_dft_rl) + continue; + + rc = npc_cn20k_idx_free(rvu, &index, 1); + if (rc) + dev_err(rvu->dev, + "Failed to free mcam idx=%u pcifunc=%#x\n", + index, pcifunc); } } -- cgit v1.2.3 From bc968f61bf0ad4f085559e5e3d168105fdf88204 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 29 Apr 2026 07:57:22 +0530 Subject: octeontx2-af: npc: cn20k: Reject missing default-rule MCAM indices When cn20k default L2 rules are not installed, npc_cn20k_dft_rules_idx_get() leaves broadcast, multicast, promiscuous, and unicast slots at USHRT_MAX. npc_get_nixlf_mcam_index() previously returned that sentinel as a valid MCAM index, so callers could program hardware with an invalid index. Return -EINVAL from the cn20k branches of npc_get_nixlf_mcam_index() when the requested slot is still USHRT_MAX. Harden cn20k NPC MCAM entry helpers to reject out-of-range indices before touching hardware. Drop the early bounds check in npc_enable_mcam_entry() for cn20k so invalid indices are validated inside npc_cn20k_enable_mcam_entry() instead of being silently ignored. In rvu_npc_update_flowkey_alg_idx(), treat negative MCAM indices like out-of-range values, and only update RSS actions for promiscuous and all-multi paths when the resolved index is non-negative. Cc: Suman Ghosh Fixes: 6d1e70282f76 ("octeontx2-af: npc: cn20k: Use common APIs") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260429022722.1110289-11-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.c | 14 ++- .../net/ethernet/marvell/octeontx2/af/cn20k/npc.h | 1 + .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 + .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 137 +++++++++++++++++++-- .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 10 +- 5 files changed, 155 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 1129565a01bd..6b3f453fd500 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -808,6 +808,9 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, u64 cfg, hw_prio; u8 kw_type; + if (index < 0 || index >= mcam->total_entries) + return -EINVAL; + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) return -EINVAL; @@ -1056,6 +1059,9 @@ int npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, int kw = 0; u8 kw_type; + if (index < 0 || index >= mcam->total_entries) + return -EINVAL; + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) return -EINVAL; @@ -1148,6 +1154,9 @@ int npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) int bank, i, sb, db; int dbank, sbank; + if (src >= mcam->total_entries || dest >= mcam->total_entries) + return -EINVAL; + dbank = npc_get_bank(mcam, dest); sbank = npc_get_bank(mcam, src); @@ -1213,6 +1222,9 @@ int npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, int kw = 0, bank; u8 kw_type; + if (index >= mcam->total_entries) + return -EINVAL; + if (npc_mcam_idx_2_key_type(rvu, index, &kw_type)) return -EINVAL; @@ -4170,7 +4182,7 @@ int rvu_mbox_handler_npc_get_dft_rl_idxs(struct rvu *rvu, struct msg_req *req, return 0; } -static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) +bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) { return is_pf_cgxmapped(rvu, rvu_get_pf(rvu->pdev, pcifunc)) || is_lbk_vf(rvu, pcifunc); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 2f761b97f91b..3d5eb952cc07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -335,5 +335,6 @@ int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); u16 npc_cn20k_vidx2idx(u16 index); u16 npc_cn20k_idx2vidx(u16 idx); int npc_cn20k_defrag(struct rvu *rvu); +bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index ef5b081162eb..f977734ae712 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3577,6 +3577,9 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc, mcam_index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, type); + if (mcam_index < 0) + return -EINVAL; + err = nix_update_mce_list(rvu, pcifunc, mce_list, mce_idx, mcam_index, add); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 5d349d131fdb..3c814d157ab9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -163,14 +163,35 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, if (rc) return -EFAULT; + if (is_lbk_vf(rvu, pcifunc)) { + if (promisc == USHRT_MAX) + return -EINVAL; + return promisc; + } + + if (is_cgx_vf(rvu, pcifunc)) { + if (ucast == USHRT_MAX) + return -EINVAL; + + return ucast; + } + switch (type) { case NIXLF_BCAST_ENTRY: + if (bcast == USHRT_MAX) + return -EINVAL; return bcast; case NIXLF_ALLMULTI_ENTRY: + if (mcast == USHRT_MAX) + return -EINVAL; return mcast; case NIXLF_PROMISC_ENTRY: + if (promisc == USHRT_MAX) + return -EINVAL; return promisc; case NIXLF_UCAST_ENTRY: + if (ucast == USHRT_MAX) + return -EINVAL; return ucast; default: return -EINVAL; @@ -238,9 +259,6 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int actbank = bank; if (is_cn20k(rvu->pdev)) { - if (index < 0 || index >= mcam->banksize * mcam->banks) - return; - if (npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable)) dev_err(rvu->dev, "Error to %s mcam %u entry\n", enable ? "enable" : "disable", index); @@ -434,6 +452,15 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf, NIXLF_UCAST_ENTRY); + + if (index < 0) { + dev_err(rvu->dev, + "%s: failed to get ucast entry pcifunc:0x%x\n", + __func__, pf_func); + /* Action 0 is drop */ + return 0; + } + bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -700,6 +727,12 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } /* Don't change the action if entry is already enabled * Otherwise RSS action may get overwritten. @@ -755,11 +788,21 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); + /* In cn20k, default indexes are installed only for CGX mapped + * and lbk interfaces + */ if (is_cgx_vf(rvu, pcifunc)) index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, NIXLF_PROMISC_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + /* If the corresponding PF's ucast action is RSS, * use the same action for promisc also * Please note that for lbk(s) "index" and "ucast_idx" @@ -770,6 +813,12 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, else ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast/promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, @@ -844,6 +893,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); + + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get promisc entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -884,6 +941,12 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get bcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } if (!hw->cap.nix_rx_multicast) { /* Early silicon doesn't support pkt replication, @@ -948,12 +1011,25 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get mcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } /* If the corresponding PF's ucast action is RSS, * use the same action for multicast entry also */ ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@ -1018,6 +1094,13 @@ void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get mcast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -1130,8 +1213,12 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, index = mcam_index; } - if (index >= mcam->total_entries) + if (index < 0 || index >= mcam->total_entries) { + dev_err(rvu->dev, + "%s: Invalid mcam index, pcifunc=%#x\n", + __func__, pcifunc); return; + } bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); @@ -1175,16 +1262,18 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, /* If PF's promiscuous entry is enabled, * Set RSS action for that entry as well */ - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, - blkaddr, alg_idx); + if (index >= 0) + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, + blkaddr, alg_idx); index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); /* If PF's allmulti entry is enabled, * Set RSS action for that entry as well */ - npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, - blkaddr, alg_idx); + if (index >= 0) + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, + blkaddr, alg_idx); } } @@ -1197,12 +1286,22 @@ void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc, int index, blkaddr, mce_idx; struct rvu_pfvf *pfvf; + /* multicast pkt replication is not enabled for AF's VFs & SDP links */ + if (is_lbk_vf(rvu, pcifunc) || is_sdp_pfvf(rvu, pcifunc)) + return; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return; index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, type); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get entry for pcifunc=%#x, type=%u\n", + __func__, pcifunc, type); + return; + } /* disable MCAM entry when packet replication is not supported by hw */ if (!hw->cap.nix_rx_multicast && !is_vf(pcifunc)) { @@ -1231,6 +1330,10 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, struct npc_mcam *mcam = &rvu->hw->mcam; int index, blkaddr; + /* only CGX or LBK interfaces have default entries */ + if (is_cn20k(rvu->pdev) && !npc_is_cgx_or_lbk(rvu, pcifunc)) + return; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return; @@ -1240,6 +1343,12 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, pfvf->nix_rx_intf)) { index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } @@ -3897,6 +4006,12 @@ int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu, /* Read the default ucast entry if there is no pkt steering rule */ index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (index < 0) { + mutex_unlock(&mcam->lock); + rc = NIX_AF_ERR_AF_LF_INVALID; + goto out; + } + read_entry: /* Read the mcam entry */ npc_read_mcam_entry(rvu, mcam, blkaddr, index, &rsp->entry, &intf, @@ -3970,6 +4085,12 @@ void rvu_npc_clear_ucast_entry(struct rvu *rvu, int pcifunc, int nixlf) ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); + if (ucast_idx < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for pcifunc=%#x\n", + __func__, pcifunc); + return; + } npc_enable_mcam_entry(rvu, mcam, blkaddr, ucast_idx, false); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index fe10554b1f0e..6ae9cdcb608b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1444,7 +1444,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, struct msg_rsp write_rsp; struct mcam_entry *entry; bool new = false; - u16 entry_index; + int entry_index; int err; installed_features = req->features; @@ -1477,6 +1477,14 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, if (req->default_rule) { entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf, NIXLF_UCAST_ENTRY); + + if (entry_index < 0) { + dev_err(rvu->dev, + "%s: Error to get ucast entry for target=%#x\n", + __func__, target); + return -EINVAL; + } + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index); } -- cgit v1.2.3 From a2e5b58811c7bb7d63f366f586cc7317f20e62e7 Mon Sep 17 00:00:00 2001 From: Avi Radinsky Date: Wed, 29 Apr 2026 18:35:23 -0400 Subject: Documentation: riscv: cmodx: fix typos Fix typos in the dynamic ftrace section: atmoic -> atomic (twice), pacthable -> patchable, derect -> directed. Signed-off-by: Avi Radinsky Acked-by: Randy Dunlap Link: https://patch.msgid.link/391d16fb-5f11-45fa-8f3b-1debe095695e@tennr.com Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/cmodx.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst index 40ba53bed5df..cbfa812a11b4 100644 --- a/Documentation/arch/riscv/cmodx.rst +++ b/Documentation/arch/riscv/cmodx.rst @@ -21,13 +21,13 @@ call at each patchable function entry, and patches it dynamically at runtime to enable or disable the redirection. In the case of RISC-V, 2 instructions, AUIPC + JALR, are required to compose a function call. However, it is impossible to patch 2 instructions and expect that a concurrent read-side executes them -without a race condition. This series makes atmoic code patching possible in +without a race condition. This series makes atomic code patching possible in RISC-V ftrace. Kernel preemption makes things even worse as it allows the old state to persist across the patching process with stop_machine(). In order to get rid of stop_machine() and run dynamic ftrace with full kernel preemption, we partially initialize each patchable function entry at boot-time, -setting the first instruction to AUIPC, and the second to NOP. Now, atmoic +setting the first instruction to AUIPC, and the second to NOP. Now, atomic patching is possible because the kernel only has to update one instruction. According to Ziccif, as long as an instruction is naturally aligned, the ISA guarantee an atomic update. @@ -36,8 +36,8 @@ By fixing down the first instruction, AUIPC, the range of the ftrace trampoline is limited to +-2K from the predetermined target, ftrace_caller, due to the lack of immediate encoding space in RISC-V. To address the issue, we introduce CALL_OPS, where an 8B naturally align metadata is added in front of each -pacthable function. The metadata is resolved at the first trampoline, then the -execution can be derect to another custom trampoline. +patchable function. The metadata is resolved at the first trampoline, then the +execution can be directed to another custom trampoline. CMODX in the User Space ----------------------- -- cgit v1.2.3 From 4d2b03699460b8fd5df34408a03a84a1a7ff8aa1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 9 Apr 2026 09:11:39 +0000 Subject: riscv: errata: Fix bitwise vs logical AND in MIPS errata patching The condition checking whether a specific errata needs patching uses logical AND (&&) instead of bitwise AND (&). Since logical AND only checks that both operands are non-zero, this causes all errata patches to be applied whenever any single errata is detected, rather than only applying the matching one. The SiFive errata implementation correctly uses bitwise AND for the same check. Fixes: 0b0ca959d206 ("riscv: errata: Fix the PAUSE Opcode for MIPS P8700") Signed-off-by: Michael Neuling Assisted-by: Cursor:claude-4.6-opus-high-thinking Link: https://patch.msgid.link/20260409091143.1348853-2-mikey@neuling.org [pjw@kernel.org: fixed checkpatch warning] Signed-off-by: Paul Walmsley --- arch/riscv/errata/mips/errata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/errata/mips/errata.c b/arch/riscv/errata/mips/errata.c index e984a8152208..2c3dc2259e93 100644 --- a/arch/riscv/errata/mips/errata.c +++ b/arch/riscv/errata/mips/errata.c @@ -57,7 +57,7 @@ void mips_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, } tmp = (1U << alt->patch_id); - if (cpu_req_errata && tmp) { + if (cpu_req_errata & tmp) { mutex_lock(&text_mutex); patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt), alt->alt_len); -- cgit v1.2.3 From c897cf120696b94f56ed0f3197ba9a77071a59ec Mon Sep 17 00:00:00 2001 From: Dmitriy Zharov Date: Thu, 30 Apr 2026 22:35:22 +0400 Subject: Input: xpad - add support for ASUS ROG RAIKIRI II Add the VID/PIDs for the ASUS ROG RAIKIRI II controller to xpad_device and the VID to xpad_table. The controller has a physical PC/XBOX toggle which switches between XBOX360 and XBOXONE protocols. Signed-off-by: Dmitriy Zharov Link: https://patch.msgid.link/20260430183522.122151-1-contact@zharov.dev Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 19ce90da89e9..97daec39bfee 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -186,6 +186,10 @@ static const struct xpad_device { { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, + { 0x0b05, 0x1c91, "ASUS ROG RAIKIRI II", 0, XTYPE_XBOX360 }, + { 0x0b05, 0x1c92, "ASUS ROG RAIKIRI II WIRELESS", 0, XTYPE_XBOX360 }, + { 0x0b05, 0x1c96, "ASUS ROG RAIKIRI II XBOX", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, + { 0x0b05, 0x1d04, "ASUS ROG RAIKIRI II XBOX WIRELESS", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, @@ -507,6 +511,7 @@ static const struct usb_device_id xpad_table[] = { { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */ XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */ XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ + XPAD_XBOX360_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0db0), /* Micro Star International X-Box 360 controllers */ -- cgit v1.2.3 From 1f6ac0f8441c48c4cc250141e1da8486c13512ba Mon Sep 17 00:00:00 2001 From: Qbeliw Tanaka Date: Thu, 30 Apr 2026 21:44:12 -0700 Subject: Input: xpad - add "Nova 2 Lite" from GameSir Add support for the gamepad "Nova 2 Lite" from GameSir, compatible with the Xbox 360 gamepad. Signed-off-by: Qbeliw Tanaka Link: https://patch.msgid.link/20260429.162040.930225048583399359.q.tanaka@gmx.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 97daec39bfee..feb8f368f834 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -395,6 +395,7 @@ static const struct xpad_device { { 0x3285, 0x0662, "Nacon Revolution5 Pro", 0, XTYPE_XBOX360 }, { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, + { 0x3537, 0x100f, "GameSir Nova 2 Lite", 0, XTYPE_XBOX360 }, { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x3651, 0x1000, "CRKD SG", 0, XTYPE_XBOX360 }, { 0x366c, 0x0005, "ByoWave Proteus Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE, FLAG_DELAY_INIT }, -- cgit v1.2.3 From baa3c65435fb3f450b262672bc06db887a92d397 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 30 Apr 2026 21:55:01 +0200 Subject: netfilter: flowtable: use skb_pull_rcsum() to pop vlan/pppoe header This adjusts the checksum, if required, after pulling the layer 2 header, either the pppoe header or the inner vlan header in the double-tagged vlan packets. Fixes: 4cd91f7c290f ("netfilter: flowtable: add vlan support") Fixes: 72efd585f714 ("netfilter: flowtable: add pppoe support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 2eba64eb393a..9c05a50d6013 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -445,13 +445,13 @@ static void nf_flow_encap_pop(struct nf_flowtable_ctx *ctx, switch (skb->protocol) { case htons(ETH_P_8021Q): vlan_hdr = (struct vlan_hdr *)skb->data; - __skb_pull(skb, VLAN_HLEN); + skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vlan_hdr); skb_reset_network_header(skb); break; case htons(ETH_P_PPP_SES): skb->protocol = __nf_flow_pppoe_proto(skb); - skb_pull(skb, PPPOE_SES_HLEN); + skb_pull_rcsum(skb, PPPOE_SES_HLEN); skb_reset_network_header(skb); break; } -- cgit v1.2.3 From 845db023a8aeba8b14315a846dcfba31ee727fb1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 1 May 2026 19:23:12 +0800 Subject: ublk: don't issue uring_cmd from fallback task work When ublk_ch_uring_cmd_cb() runs as fallback task work (e.g., because the submitting task is exiting), the command should not be issued as current is a kworker, not the daemon task. This can cause io->task to capture the wrong task in __ublk_fetch(), leading to a task mismatch warning in ublk_uring_cmd_cancel_fn(). Check tw.cancel and return -ECANCELED instead of issuing the command from fallback context. Fixes: 3421c7f68bba ("ublk: make sure io cmd handled in submitter task context") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260501112312.947327-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8e5f3738c203..d10460d29e4a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -3496,8 +3496,10 @@ static void ublk_ch_uring_cmd_cb(struct io_tw_req tw_req, io_tw_token_t tw) { unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS; struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); - int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); + int ret = -ECANCELED; + if (!tw.cancel) + ret = ublk_ch_uring_cmd_local(cmd, issue_flags); if (ret != -EIOCBQUEUED) io_uring_cmd_done(cmd, ret, issue_flags); } -- cgit v1.2.3 From 56722cfbb78d7eb41756cd78dc5192d08bd14f3d Mon Sep 17 00:00:00 2001 From: Rámon van Raaij Date: Thu, 30 Apr 2026 21:12:24 +0200 Subject: ALSA: hda/realtek: Add codec SSID quirk for Lenovo Yoga Pro 9 16IMH9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Yoga Pro 9 16IMH9 (codec SSID 17aa:38d6) shares PCI audio device subsystem ID 17aa:3811 with the Legion S7 15IMH05. The existing SND_PCI_QUIRK entry for the Legion routes both machines to ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS, which does not bind the TAS2781 smart amplifiers, resulting in near-silent built-in speakers. Add an HDA_CODEC_QUIRK entry immediately before the conflicting PCI quirk that matches the Yoga Pro 9's unique codec SSID and routes it to ALC287_FIXUP_TAS2781_I2C. Codec quirks are evaluated after PCI quirks and take precedence, leaving the Legion S7 15IMH05 entry unaffected. This follows the same pattern used to disambiguate PCI SSID 17aa:3847 (shared between Yoga Pro 7 14IMH9 and Legion 7 16ACHG6), where a HDA_CODEC_QUIRK for codec SSID 17aa:38cf resolves the conflict. Signed-off-by: Rámon van Raaij Link: https://patch.msgid.link/20260430191224.patch1-ramon@vanraaij.eu Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 9a5747c2e359..e061673d3c2e 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7647,6 +7647,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x3802, "DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8", ALC287_FIXUP_TAS2781_I2C), + /* Yoga Pro 9 16IMH9 shares PCI SSID 17aa:3811 with Legion S7 15IMH05; + * use codec SSID to distinguish them + */ + HDA_CODEC_QUIRK(0x17aa, 0x38d6, "Lenovo Yoga Pro 9 16IMH9", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3811, "Legion S7 15IMH05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), -- cgit v1.2.3 From ad39a189bfebb3de580f390bc000f9e121c6aca3 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 30 Apr 2026 23:49:50 +0300 Subject: ALSA: usb-audio: add min_mute quirk for Razer Nommo V2 X ID 1532:055e Razer USA, Ltd Razer Nommo V2 X is tested to have muted min playback volume. Apply quirk for that. Link: https://gitlab.freedesktop.org/pipewire/pipewire/-/work_items/5235 Signed-off-by: Pauli Virtanen Link: https://patch.msgid.link/94449577332d14d7974864903825f27e5824ddbc.1777579951.git.pav@iki.fi Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 0b4ecc2c6bcc..f77ff05dff0b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2366,6 +2366,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x152a, 0x880a, /* NeuralDSP Quad Cortex */ 0), /* Doesn't have the vendor quirk which would otherwise apply */ + DEVICE_FLG(0x1532, 0x055e, /* Razer Nommo V2 X */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x154e, 0x1003, /* Denon DA-300USB */ -- cgit v1.2.3 From bb7235e226888607e6aac1288062fcb1ac105589 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 29 Apr 2026 15:30:10 +0100 Subject: kselftest/arm64: Include for user_gcs definition kselftest includes kernel uAPI headers with option: -isystem $(top_srcdir)/usr/include Include in libc-gcs.c for the definition of struct user_gcs from the uAPI headers, and remove the redundant definition in gcs-util.h. This fixes a compilation error on systems where the toolchain defines NT_ARM_GCS. Fixes: a505a52b4e29 ("kselftest/arm64: Add a GCS test program built with the system libc") Signed-off-by: Leo Yan Reviewed-by: Mark Brown Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/gcs/gcs-util.h | 6 ------ tools/testing/selftests/arm64/gcs/libc-gcs.c | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h index c99a6b39ac14..7a81bb07ed4b 100644 --- a/tools/testing/selftests/arm64/gcs/gcs-util.h +++ b/tools/testing/selftests/arm64/gcs/gcs-util.h @@ -18,12 +18,6 @@ #ifndef NT_ARM_GCS #define NT_ARM_GCS 0x410 - -struct user_gcs { - __u64 features_enabled; - __u64 features_locked; - __u64 gcspr_el0; -}; #endif /* Shadow Stack/Guarded Control Stack interface */ diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c index 17b2fabfec38..72e82bfbecc9 100644 --- a/tools/testing/selftests/arm64/gcs/libc-gcs.c +++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c @@ -16,6 +16,7 @@ #include #include +#include #include -- cgit v1.2.3 From 2e42a17b8f6bc3c0cd69d7556b588011d3ec2394 Mon Sep 17 00:00:00 2001 From: Eliot Courtney Date: Thu, 23 Apr 2026 21:36:52 +0900 Subject: rust: drm: gem: clean up GEM state in init failure case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if `drm_gem_object_init` fails, the object is freed without any cleanup. Perform the cleanup in that case. Cc: stable@vger.kernel.org Fixes: c284d3e42338 ("rust: drm: gem: Add GEM object abstraction") Signed-off-by: Eliot Courtney Reviewed-by: Alice Ryhl Reviewed-by: Onur Özkan Link: https://patch.msgid.link/20260423-fix-gem-1-v1-1-e12e35f7bba9@nvidia.com [ Move safety comment closer to unsafe block to avoid a clippy warning. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/drm/gem/mod.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 75acda7ba500..01b5bd47a333 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -277,8 +277,17 @@ impl Object { // SAFETY: `obj.as_raw()` is guaranteed to be valid by the initialization above. unsafe { (*obj.as_raw()).funcs = &Self::OBJECT_FUNCS }; - // SAFETY: The arguments are all valid per the type invariants. - to_result(unsafe { bindings::drm_gem_object_init(dev.as_raw(), obj.obj.get(), size) })?; + if let Err(err) = + // SAFETY: The arguments are all valid per the type invariants. + to_result(unsafe { + bindings::drm_gem_object_init(dev.as_raw(), obj.obj.get(), size) + }) + { + // SAFETY: `drm_gem_object_init()` initializes the private GEM object state before + // failing, so `drm_gem_private_object_fini()` is the matching cleanup. + unsafe { bindings::drm_gem_private_object_fini(obj.obj.get()) }; + return Err(err); + } // SAFETY: We will never move out of `Self` as `ARef` is always treated as pinned. let ptr = KBox::into_raw(unsafe { Pin::into_inner_unchecked(obj) }); -- cgit v1.2.3 From 4d8e74ad4585672489da6145b3328d415f50db82 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Thu, 30 Apr 2026 16:58:08 +0800 Subject: arm64: Reserve an extra page for early kernel mapping The final part of [data, end) segment may overflow into the next page of init_pg_end[1] which is the gap page before early_init_stack[2]: [1] crash_arm64_v9.0.1> vtop ffffffed00601000 VIRTUAL PHYSICAL ffffffed00601000 83401000 PAGE DIRECTORY: ffffffecffd62000 PGD: ffffffecffd62da0 => 10000000833fb003 PMD: ffffff80033fb018 => 10000000833fe003 PTE: ffffff80033fe008 => 68000083401f03 PAGE: 83401000 PTE PHYSICAL FLAGS 68000083401f03 83401000 (VALID|SHARED|AF|NG|PXN|UXN) PAGE PHYSICAL MAPPING INDEX CNT FLAGS fffffffec00d0040 83401000 0 0 1 4000 reserved [2] ffffffed002c8000 (r) __pi__data ffffffed0054e000 (d) __pi___bss_start ffffffed005f5000 (b) __pi_init_pg_dir ffffffed005fe000 (b) __pi_init_pg_end ffffffed005ff000 (B) early_init_stack ffffffed00608000 (b) __pi__end For 4K pages, the early kernel mapping may use 2MB block entries but the kernel segments are only 64KB aligned. Segment boundaries that fall within a 2MB block therefore require a PTE table so that different attributes can be applied on either side of the boundary. KERNEL_SEGMENT_COUNT still correctly counts the five permanent kernel VMAs registered by declare_kernel_vmas(). However, since commit 5973a62efa34 ("arm64: map [_text, _stext) virtual address range non-executable+read-only"), the early mapper also maps [_text, _stext) separately from [_stext, _etext). This adds one more early-only split and can require one more page-table page than the existing EARLY_SEGMENT_EXTRA_PAGES allowance reserves. Increase the 4K-page early mapping allowance by one page to cover that additional split. Fixes: 5973a62efa34 ("arm64: map [_text, _stext) virtual address range non-executable+read-only") Assisted-by: TRAE:GLM-5.1 Suggested-by: Ard Biesheuvel Signed-off-by: Zhaoyang Huang [catalin.marinas@arm.com: rewrote part of the commit log] [catalin.marinas@arm.com: expanded the code comment] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kernel-pgtable.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 74a4f738c5f5..229ee7976f69 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -68,7 +68,12 @@ #define KERNEL_SEGMENT_COUNT 5 #if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN -#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1) +/* + * KERNEL_SEGMENT_COUNT counts the permanent kernel VMAs. The early mapping + * has one additional split, [_text, _stext). Reserve one more page for the + * SWAPPER_BLOCK_SIZE-unaligned boundaries. + */ +#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 2) /* * The initial ID map consists of the kernel image, mapped as two separate * segments, and may appear misaligned wrt the swapper block size. This means -- cgit v1.2.3 From 030e8a40fff65ca6ac1c04a4d3c08afe72438922 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Mon, 27 Apr 2026 13:03:33 +0100 Subject: arm64: signal: Preserve POR_EL0 if poe_context is missing Commit 2e8a1acea859 ("arm64: signal: Improve POR_EL0 handling to avoid uaccess failures") delayed the write to POR_EL0 in rt_sigreturn to avoid spurious uaccess failures. This change however relies on the poe_context frame record being present: on a system supporting POE, calling sigreturn without a poe_context record now results in writing arbitrary data from the kernel stack into POR_EL0. Fix this by adding a __valid_fields member to struct user_access_state, and zeroing the struct on allocation. restore_poe_context() then indicates that the por_el0 field is valid by setting the corresponding bit in __valid_fields, and restore_user_access_state() only touches POR_EL0 if there is a valid value to set it to. This is in line with how POR_EL0 was originally handled; all frame records are currently optional, except fpsimd_context. To ensure that __valid_fields is kept in sync, fields (currently just por_el0) are now accessed via accessors and prefixed with __ to discourage direct access. Fixes: 2e8a1acea859 ("arm64: signal: Improve POR_EL0 handling to avoid uaccess failures") Cc: Reported-by: Will Deacon Signed-off-by: Kevin Brodsky Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 54 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 08ffc5a5aea4..38e6fa204c17 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -67,6 +67,9 @@ struct rt_sigframe_user_layout { unsigned long end_offset; }; +#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) + /* * Holds any EL0-controlled state that influences unprivileged memory accesses. * This includes both accesses done in userspace and uaccess done in the kernel. @@ -74,13 +77,35 @@ struct rt_sigframe_user_layout { * This state needs to be carefully managed to ensure that it doesn't cause * uaccess to fail when setting up the signal frame, and the signal handler * itself also expects a well-defined state when entered. + * + * The struct should be zero-initialised. Its members should only be accessed + * via the accessors below. __valid_fields tracks which of the fields are valid + * (have been set to some value). */ struct user_access_state { - u64 por_el0; + unsigned int __valid_fields; + u64 __por_el0; }; -#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) -#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) +#define UA_STATE_HAS_POR_EL0 BIT(0) + +static void set_ua_state_por_el0(struct user_access_state *ua_state, + u64 por_el0) +{ + ua_state->__por_el0 = por_el0; + ua_state->__valid_fields |= UA_STATE_HAS_POR_EL0; +} + +static int get_ua_state_por_el0(const struct user_access_state *ua_state, + u64 *por_el0) +{ + if (ua_state->__valid_fields & UA_STATE_HAS_POR_EL0) { + *por_el0 = ua_state->__por_el0; + return 0; + } + + return -ENOENT; +} /* * Save the user access state into ua_state and reset it to disable any @@ -94,7 +119,7 @@ static void save_reset_user_access_state(struct user_access_state *ua_state) for (int pkey = 0; pkey < arch_max_pkey(); pkey++) por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX); - ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); + set_ua_state_por_el0(ua_state, read_sysreg_s(SYS_POR_EL0)); write_sysreg_s(por_enable_all, SYS_POR_EL0); /* * No ISB required as we can tolerate spurious Overlay faults - @@ -122,8 +147,10 @@ static void set_handler_user_access_state(void) */ static void restore_user_access_state(const struct user_access_state *ua_state) { - if (system_supports_poe()) - write_sysreg_s(ua_state->por_el0, SYS_POR_EL0); + u64 por_el0; + + if (get_ua_state_por_el0(ua_state, &por_el0) == 0) + write_sysreg_s(por_el0, SYS_POR_EL0); } static void init_user_layout(struct rt_sigframe_user_layout *user) @@ -333,11 +360,16 @@ static int restore_fpmr_context(struct user_ctxs *user) static int preserve_poe_context(struct poe_context __user *ctx, const struct user_access_state *ua_state) { - int err = 0; + int err; + u64 por_el0; + + err = get_ua_state_por_el0(ua_state, &por_el0); + if (WARN_ON_ONCE(err)) + return err; __put_user_error(POE_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(*ctx), &ctx->head.size, err); - __put_user_error(ua_state->por_el0, &ctx->por_el0, err); + __put_user_error(por_el0, &ctx->por_el0, err); return err; } @@ -353,7 +385,7 @@ static int restore_poe_context(struct user_ctxs *user, __get_user_error(por_el0, &(user->poe->por_el0), err); if (!err) - ua_state->por_el0 = por_el0; + set_ua_state_por_el0(ua_state, por_el0); return err; } @@ -1095,7 +1127,7 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; - struct user_access_state ua_state; + struct user_access_state ua_state = {}; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -1507,7 +1539,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, { struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; - struct user_access_state ua_state; + struct user_access_state ua_state = {}; int err = 0; fpsimd_save_and_flush_current_state(); -- cgit v1.2.3 From 41ca998fbe30755191342b58e4f642cf3052ef2b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 May 2026 19:07:19 +0200 Subject: parisc: Fix 64-bit kernel build when CONFIG_COMPAT=n VDSO32_SYMBOL() is used in signal.c, defining the value to zero avoids liker issues when CONFIG_COMPAT=n. Signed-off-by: Helge Deller --- arch/parisc/include/asm/vdso.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h index 5501560f5ffe..e5cca3c9c8e7 100644 --- a/arch/parisc/include/asm/vdso.h +++ b/arch/parisc/include/asm/vdso.h @@ -6,13 +6,14 @@ #ifdef CONFIG_64BIT #include +#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) #endif #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) #include -#endif - -#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) #define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) +#else +#define VDSO32_SYMBOL(tsk, name) 0UL +#endif #endif /* __ASSEMBLER__ */ -- cgit v1.2.3 From cb48828f06afa232cc330f0f4d6be101067810b3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Apr 2026 20:17:45 +0100 Subject: selftests/rseq: Don't run tests with runner scripts outside of the scripts The rseq selftests include two runner scripts run_param_test.sh and run_syscall_errors_test.sh which set up the environment for test binaries and run them with various parameters. Currently we list these test binaries in TEST_GEN_PROGS but this results in the kselftest framework running them directly as well as via the runners, resulting in duplication and spurious failures when the environment is not correctly set up (eg, if glibc tries to use rseq). Move the binaries the runners invoke to TEST_GEN_PROGS_EXTENDED, binaries listed there are built but not run by the framework. The param_test benchmarks are not moved since they are not run by run_param_test.sh. Fixes: 830969e7821a ("selftests/rseq: Implement time slice extension test") Signed-off-by: Mark Brown Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260423-selftests-rseq-use-runner-v1-1-e13a133754c1@kernel.org Cc: stable@vger.kernel.org --- tools/testing/selftests/rseq/Makefile | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 4ef90823b652..0d1947c0d623 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -14,12 +14,15 @@ LDLIBS += -lpthread -ldl # still track changes to header files and depend on shared object. OVERRIDE_TARGETS = 1 -TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ - param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \ - syscall_errors_test slice_test - -TEST_GEN_PROGS_EXTENDED = librseq.so +TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test \ + param_test_benchmark param_test_mm_cid_benchmark slice_test + +TEST_GEN_PROGS_EXTENDED = librseq.so \ + param_test \ + param_test_compare_twice \ + param_test_mm_cid \ + param_test_mm_cid_compare_twice \ + syscall_errors_test TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh -- cgit v1.2.3 From 2cb68e45120dfc66404c7547d95b8ac6ff0b25ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Apr 2026 10:10:19 +0200 Subject: rseq: Set rseq::cpu_id_start to 0 on unregistration The RSEQ rework changed that to RSEQ_CPU_UNINITILIZED, which is obviously incompatible. Revert back to the original behavior. Fixes: 0f085b41880e ("rseq: Provide and use rseq_set_ids()") Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.271566313%40kernel.org Cc: stable@vger.kernel.org --- kernel/rseq.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/kernel/rseq.c b/kernel/rseq.c index 38d3ef540760..b9f11931ef78 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -236,11 +236,6 @@ static int __init rseq_debugfs_init(void) } __initcall(rseq_debugfs_init); -static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id) -{ - return rseq_set_ids_get_csaddr(t, ids, node_id, NULL); -} - static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs) { struct rseq __user *urseq = t->rseq.usrptr; @@ -384,19 +379,22 @@ void rseq_syscall(struct pt_regs *regs) static bool rseq_reset_ids(void) { - struct rseq_ids ids = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, - .mm_cid = 0, - }; + struct rseq __user *rseq = current->rseq.usrptr; /* * If this fails, terminate it because this leaves the kernel in * stupid state as exit to user space will try to fixup the ids * again. */ - if (rseq_set_ids(current, &ids, 0)) - return true; + scoped_user_rw_access(rseq, efault) { + unsafe_put_user(0, &rseq->cpu_id_start, efault); + unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault); + unsafe_put_user(0, &rseq->node_id, efault); + unsafe_put_user(0, &rseq->mm_cid, efault); + } + return true; +efault: force_sig(SIGSEGV); return false; } -- cgit v1.2.3 From e9766e6f7d330dce7530918d8c6e3ec96d6c6e24 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Apr 2026 10:14:41 +0200 Subject: rseq: Protect rseq_reset() against interrupts rseq_reset() uses memset() to clear the tasks rseq data. That's racy against membarrier() and preemption. Guard it with irqsave to cure this. Fixes: faba9d250eae ("rseq: Introduce struct rseq_data") Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.353887714%40kernel.org Cc: stable@vger.kernel.org --- include/linux/rseq.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/rseq.h b/include/linux/rseq.h index b9d62fc2140d..f446909551df 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -119,6 +119,8 @@ static inline void rseq_virt_userspace_exit(void) static inline void rseq_reset(struct task_struct *t) { + /* Protect against preemption and membarrier IPI */ + guard(irqsave)(); memset(&t->rseq, 0, sizeof(t->rseq)); t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; } -- cgit v1.2.3 From 010b7723c0a3b9ad58f50b715dbe2e7781d29400 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Apr 2026 09:34:45 +0200 Subject: rseq: Don't advertise time slice extensions if disabled If time slice extensions have been disabled on the kernel command line, then advertising them in RSEQ flags is wrong. Adjust the conditionals to reflect reality, fixup the misleading comments about the gap of these flags and the rseq::flags field. Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.437059375%40kernel.org Cc: stable@vger.kernel.org --- include/uapi/linux/rseq.h | 5 ++++- kernel/rseq.c | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index f69344fe6c08..ca6fe1f9d05e 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -28,7 +28,7 @@ enum rseq_cs_flags_bit { RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, - /* (3) Intentional gap to put new bits into a separate byte */ + /* (3) Intentional gap to keep new bits separate */ /* User read only feature flags */ RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, @@ -161,6 +161,9 @@ struct rseq { * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE + * + * It is now used for feature status advertisement by the kernel. + * See: enum rseq_cs_flags_bit for further information. */ __u32 flags; diff --git a/kernel/rseq.c b/kernel/rseq.c index b9f11931ef78..586f58f652c6 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -462,10 +462,11 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 return -EFAULT; if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { - rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; - if (rseq_slice_extension_enabled() && - (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON)) - rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + if (rseq_slice_extension_enabled()) { + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; + if (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + } } scoped_user_write_access(rseq, efault) { -- cgit v1.2.3 From ee9dce44362b2d8132c32964656ab6dff7dfbc6a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 1 May 2026 12:41:23 -0700 Subject: futex: Drop CLONE_THREAD requirement for private default hash alloc Currently need_futex_hash_allocate_default() depends on strict pthread semantics, abusing CLONE_THREAD. This breaks the non-concurrency assumptions when doing the mm->futex_ref pcpu allocations, leading to bugs[0] when sharing the mm in other ways; ie: BUG: KASAN: slab-use-after-free in futex_hash_put ... where the +1 bias can end up on a percpu counter that mm->futex_ref no longer points at. Loosen the check to cover any CLONE_VM clone, except vfork(). Excluding vfork keeps the existing paths untouched (no overhead), and we can't race in the first place: either the parent is suspended and the child runs alone, or mm->futex_ref is already allocated from an earlier CLONE_VM. Link: https://lore.kernel.org/all/CAL_bE8LsmCQ-FAtYDuwbJhOkt9p2wwYQwAbMh=PifC=VsiBM6A@mail.gmail.com/ [0] Fixes: d9b05321e21e ("futex: Move futex_hash_free() back to __mmput()") Reported-by: Yiming Qian Signed-off-by: Davidlohr Bueso Signed-off-by: Linus Torvalds --- kernel/fork.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index f1ad69c6dc2d..5f3fdfdb14c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1951,9 +1951,11 @@ static void rv_task_fork(struct task_struct *p) static bool need_futex_hash_allocate_default(u64 clone_flags) { - if ((clone_flags & (CLONE_THREAD | CLONE_VM)) != (CLONE_THREAD | CLONE_VM)) - return false; - return true; + /* + * Allocate a default futex hash for any sibling that will + * share the parent's mm, except vfork. + */ + return (clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM; } /* @@ -2380,10 +2382,6 @@ __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_cancel_cgroup; - /* - * Allocate a default futex hash for the user process once the first - * thread spawns. - */ if (need_futex_hash_allocate_default(clone_flags)) { retval = futex_hash_allocate_default(); if (retval) -- cgit v1.2.3 From 01eb80b767430ae868c48ad106c60eb61a508c85 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 10 Apr 2026 04:20:12 -0700 Subject: accel/qaic: fix incorrect counter check in RAS message decode The UE and UE_NF cases check ce_count against UINT_MAX before incrementing their respective counters. This is logically incorrect and prevents ue_count and ue_nf_count from incrementing when ce_count reaches UINT_MAX. Fixes: c11a50b170e7 ("accel/qaic: Add Reliability, Accessibility, Serviceability (RAS)") Signed-off-by: Alok Tiwari Reviewed-by: Jeff Hugo Signed-off-by: Jeff Hugo Link: https://patch.msgid.link/20260410112015.592546-1-alok.a.tiwari@oracle.com --- drivers/accel/qaic/qaic_ras.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic_ras.c b/drivers/accel/qaic/qaic_ras.c index cc0b75461e1a..6791af366cba 100644 --- a/drivers/accel/qaic/qaic_ras.c +++ b/drivers/accel/qaic/qaic_ras.c @@ -497,11 +497,11 @@ static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) qdev->ce_count++; break; case UE: - if (qdev->ce_count != UINT_MAX) + if (qdev->ue_count != UINT_MAX) qdev->ue_count++; break; case UE_NF: - if (qdev->ce_count != UINT_MAX) + if (qdev->ue_nf_count != UINT_MAX) qdev->ue_nf_count++; break; default: -- cgit v1.2.3 From 5234094c0150338c35280a75cc7842015f76b725 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 29 Apr 2026 15:43:35 +0200 Subject: smb: smbdirect: make use of DEFAULT_SYMBOL_NAMESPACE and EXPORT_SYMBOL_GPL This is a better solution than EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") as it makes it possible to rebuild smbdirect.ko against a running kernel and then load the existing cifs.ko and ksmbd.ko from the running kernel. Suggested-by: Christoph Hellwig Link: https://lore.kernel.org/linux-cifs/aehrPuY60VMcYGU8@infradead.org/ Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Christoph Hellwig Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 2 ++ fs/smb/server/transport_rdma.c | 2 ++ fs/smb/smbdirect/accept.c | 2 +- fs/smb/smbdirect/connect.c | 4 ++-- fs/smb/smbdirect/connection.c | 16 ++++++++-------- fs/smb/smbdirect/debug.c | 2 +- fs/smb/smbdirect/devices.c | 2 +- fs/smb/smbdirect/internal.h | 1 + fs/smb/smbdirect/listen.c | 2 +- fs/smb/smbdirect/mr.c | 6 +++--- fs/smb/smbdirect/public.h | 2 -- fs/smb/smbdirect/rw.c | 2 +- fs/smb/smbdirect/socket.c | 20 ++++++++++---------- 13 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 75f9f91a7ec9..b9826185de18 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -558,3 +558,5 @@ void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) server->rdma_readwrite_threshold, m); } + +MODULE_IMPORT_NS("SMBDIRECT"); diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a8242c00096f..346c051e31f5 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -540,3 +540,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .rdma_write = smb_direct_rdma_write, .free_transport = smb_direct_free_transport, }; + +MODULE_IMPORT_NS("SMBDIRECT"); diff --git a/fs/smb/smbdirect/accept.c b/fs/smb/smbdirect/accept.c index 704b271af3a8..529740005838 100644 --- a/fs/smb/smbdirect/accept.c +++ b/fs/smb/smbdirect/accept.c @@ -854,4 +854,4 @@ struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, return nsc; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_accept); +EXPORT_SYMBOL_GPL(smbdirect_socket_accept); diff --git a/fs/smb/smbdirect/connect.c b/fs/smb/smbdirect/connect.c index 8addee43a381..cd726b399afe 100644 --- a/fs/smb/smbdirect/connect.c +++ b/fs/smb/smbdirect/connect.c @@ -60,7 +60,7 @@ int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst) */ return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect); +EXPORT_SYMBOL_GPL(smbdirect_connect); static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc) { @@ -922,4 +922,4 @@ int smbdirect_connect_sync(struct smbdirect_socket *sc, return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect_sync); +EXPORT_SYMBOL_GPL(smbdirect_connect_sync); diff --git a/fs/smb/smbdirect/connection.c b/fs/smb/smbdirect/connection.c index 822366718d45..fe9912e53da6 100644 --- a/fs/smb/smbdirect/connection.c +++ b/fs/smb/smbdirect/connection.c @@ -706,7 +706,7 @@ bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) return false; return true; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_is_connected); +EXPORT_SYMBOL_GPL(smbdirect_connection_is_connected); int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) { @@ -779,7 +779,7 @@ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); +EXPORT_SYMBOL_GPL(smbdirect_connection_wait_for_connected); void smbdirect_connection_idle_timer_work(struct work_struct *work) { @@ -958,7 +958,7 @@ release_credit: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_batch_flush); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_batch_flush); struct smbdirect_send_batch * smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, @@ -976,7 +976,7 @@ smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, return batch; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_init_send_batch_storage); +EXPORT_SYMBOL_GPL(smbdirect_init_send_batch_storage); static int smbdirect_connection_wait_for_send_bcredit(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch) @@ -1263,7 +1263,7 @@ lcredit_failed: bcredit_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_single_iter); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_single_iter); int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) { @@ -1288,7 +1288,7 @@ int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_wait_zero_pending); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_wait_zero_pending); int smbdirect_connection_send_iter(struct smbdirect_socket *sc, struct iov_iter *iter, @@ -1373,7 +1373,7 @@ int smbdirect_connection_send_iter(struct smbdirect_socket *sc, return total_count; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_iter); +EXPORT_SYMBOL_GPL(smbdirect_connection_send_iter); static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) { @@ -1937,7 +1937,7 @@ read_rfc1002_done: goto again; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_recvmsg); +EXPORT_SYMBOL_GPL(smbdirect_connection_recvmsg); static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, struct page *page, size_t off, size_t len) diff --git a/fs/smb/smbdirect/debug.c b/fs/smb/smbdirect/debug.c index a66a19d4a463..05ba7c8d165e 100644 --- a/fs/smb/smbdirect/debug.c +++ b/fs/smb/smbdirect/debug.c @@ -85,4 +85,4 @@ void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, atomic_read(&sc->mr_io.ready.count), atomic_read(&sc->mr_io.used.count)); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_legacy_debug_proc_show); +EXPORT_SYMBOL_GPL(smbdirect_connection_legacy_debug_proc_show); diff --git a/fs/smb/smbdirect/devices.c b/fs/smb/smbdirect/devices.c index 44962f221c35..7adacbdfe12e 100644 --- a/fs/smb/smbdirect/devices.c +++ b/fs/smb/smbdirect/devices.c @@ -238,7 +238,7 @@ u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev) return RDMA_NODE_UNSPECIFIED; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_netdev_rdma_capable_node_type); +EXPORT_SYMBOL_GPL(smbdirect_netdev_rdma_capable_node_type); __init int smbdirect_devices_init(void) { diff --git a/fs/smb/smbdirect/internal.h b/fs/smb/smbdirect/internal.h index 2d5acf2c21bc..82529b41708b 100644 --- a/fs/smb/smbdirect/internal.h +++ b/fs/smb/smbdirect/internal.h @@ -6,6 +6,7 @@ #ifndef __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ #define __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ +#define DEFAULT_SYMBOL_NAMESPACE "SMBDIRECT" #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "smbdirect.h" diff --git a/fs/smb/smbdirect/listen.c b/fs/smb/smbdirect/listen.c index 143a7618d95f..2f78bcaedbf8 100644 --- a/fs/smb/smbdirect/listen.c +++ b/fs/smb/smbdirect/listen.c @@ -90,7 +90,7 @@ int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog) */ return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_listen); +EXPORT_SYMBOL_GPL(smbdirect_socket_listen); static int smbdirect_new_rdma_event_handler(struct rdma_cm_id *new_id, struct rdma_cm_event *event) diff --git a/fs/smb/smbdirect/mr.c b/fs/smb/smbdirect/mr.c index 5228e699cd5d..9e6ac9d8717a 100644 --- a/fs/smb/smbdirect/mr.c +++ b/fs/smb/smbdirect/mr.c @@ -380,7 +380,7 @@ dma_map_error: mutex_unlock(&mr->mutex); return NULL; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io); +EXPORT_SYMBOL_GPL(smbdirect_connection_register_mr_io); void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, struct smbdirect_buffer_descriptor_v1 *v1) @@ -397,7 +397,7 @@ void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, } mutex_unlock(&mr->mutex); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor); +EXPORT_SYMBOL_GPL(smbdirect_mr_io_fill_buffer_descriptor); /* * Deregister a MR after I/O is done @@ -490,4 +490,4 @@ put_kref: if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) mutex_unlock(&mr->mutex); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_deregister_mr_io); +EXPORT_SYMBOL_GPL(smbdirect_connection_deregister_mr_io); diff --git a/fs/smb/smbdirect/public.h b/fs/smb/smbdirect/public.h index 50088155e7c3..d4fb36e65254 100644 --- a/fs/smb/smbdirect/public.h +++ b/fs/smb/smbdirect/public.h @@ -13,8 +13,6 @@ struct smbdirect_socket; struct smbdirect_send_batch; struct smbdirect_mr_io; -#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") - #include u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); diff --git a/fs/smb/smbdirect/rw.c b/fs/smb/smbdirect/rw.c index c2f46b17731e..6fe38042cfb9 100644 --- a/fs/smb/smbdirect/rw.c +++ b/fs/smb/smbdirect/rw.c @@ -252,4 +252,4 @@ free_msg: kfree(msg); goto out; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_rdma_xmit); +EXPORT_SYMBOL_GPL(smbdirect_connection_rdma_xmit); diff --git a/fs/smb/smbdirect/socket.c b/fs/smb/smbdirect/socket.c index 1b4ab01b745e..39cca7219c4d 100644 --- a/fs/smb/smbdirect/socket.c +++ b/fs/smb/smbdirect/socket.c @@ -20,7 +20,7 @@ bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) return false; return true; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_frwr_is_supported); +EXPORT_SYMBOL_GPL(smbdirect_frwr_is_supported); static void smbdirect_socket_cleanup_work(struct work_struct *work); @@ -107,7 +107,7 @@ init_failed: alloc_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_kern); +EXPORT_SYMBOL_GPL(smbdirect_socket_create_kern); int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) { @@ -148,7 +148,7 @@ init_failed: alloc_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_accepting); +EXPORT_SYMBOL_GPL(smbdirect_socket_create_accepting); int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp) @@ -189,14 +189,14 @@ int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_initial_parameters); +EXPORT_SYMBOL_GPL(smbdirect_socket_set_initial_parameters); const struct smbdirect_socket_parameters * smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) { return &sc->parameters; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_get_current_parameters); +EXPORT_SYMBOL_GPL(smbdirect_socket_get_current_parameters); int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, enum ib_poll_context poll_ctx, @@ -220,7 +220,7 @@ int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_kernel_settings); +EXPORT_SYMBOL_GPL(smbdirect_socket_set_kernel_settings); void smbdirect_socket_set_logging(struct smbdirect_socket *sc, void *private_ptr, @@ -240,7 +240,7 @@ void smbdirect_socket_set_logging(struct smbdirect_socket *sc, sc->logging.needed = needed; sc->logging.vaprintf = vaprintf; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_logging); +EXPORT_SYMBOL_GPL(smbdirect_socket_set_logging); static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) { @@ -663,13 +663,13 @@ int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr) return 0; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_bind); +EXPORT_SYMBOL_GPL(smbdirect_socket_bind); void smbdirect_socket_shutdown(struct smbdirect_socket *sc) { smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_shutdown); +EXPORT_SYMBOL_GPL(smbdirect_socket_shutdown); static void smbdirect_socket_release_disconnect(struct kref *kref) { @@ -712,7 +712,7 @@ void smbdirect_socket_release(struct smbdirect_socket *sc) */ kref_put(&sc->refs.destroy, smbdirect_socket_release_destroy); } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_release); +EXPORT_SYMBOL_GPL(smbdirect_socket_release); int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status, -- cgit v1.2.3 From e768103cfbac30a49860aca08a7710d39dbdd470 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 29 Apr 2026 15:43:36 +0200 Subject: smb: smbdirect: introduce and use include/linux/smbdirect.h This makes it easier to rebuild cifs.ko and ksmbd.ko against a running kernel. Suggested-by: Christoph Hellwig Link: https://lore.kernel.org/linux-cifs/aehrPuY60VMcYGU8@infradead.org/ Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Christoph Hellwig Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- MAINTAINERS | 1 + fs/smb/client/smbdirect.c | 1 - fs/smb/client/smbdirect.h | 2 +- fs/smb/server/transport_rdma.c | 1 - fs/smb/server/transport_rdma.h | 2 +- fs/smb/smbdirect/internal.h | 3 +- fs/smb/smbdirect/public.h | 146 -------------------------------- fs/smb/smbdirect/smbdirect.h | 52 ------------ include/linux/smbdirect.h | 186 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 190 insertions(+), 204 deletions(-) delete mode 100644 fs/smb/smbdirect/public.h delete mode 100644 fs/smb/smbdirect/smbdirect.h create mode 100644 include/linux/smbdirect.h diff --git a/MAINTAINERS b/MAINTAINERS index 2c67ee25ffe6..060dca38dbf7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24650,6 +24650,7 @@ S: Maintained F: fs/smb/client/smbdirect.* F: fs/smb/smbdirect/ F: fs/smb/server/transport_rdma.* +F: include/linux/smbdirect.h SMC91x ETHERNET DRIVER M: Nicolas Pitre diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b9826185de18..563ef488a225 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -9,7 +9,6 @@ #include "cifs_debug.h" #include "cifsproto.h" #include "smb2proto.h" -#include "../smbdirect/public.h" /* Port numbers for SMBD transport */ #define SMB_PORT 445 diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 287ac849213d..be205ec02077 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -12,7 +12,7 @@ #include "cifsglob.h" -#include "../smbdirect/smbdirect.h" +#include extern int rdma_readwrite_threshold; extern int smbd_max_frmr_depth; diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 346c051e31f5..b6d63ff8a8a3 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -18,7 +18,6 @@ #include "smb_common.h" #include "../common/smb2status.h" #include "transport_rdma.h" -#include "../smbdirect/public.h" #define SMB_DIRECT_PORT_IWARP 5445 diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index bde3d88aecc7..8b78917a1795 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -25,6 +25,6 @@ static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { return 0; } #endif -#include "../smbdirect/smbdirect.h" +#include #endif /* __KSMBD_TRANSPORT_RDMA_H__ */ diff --git a/fs/smb/smbdirect/internal.h b/fs/smb/smbdirect/internal.h index 82529b41708b..e9959e6dc13a 100644 --- a/fs/smb/smbdirect/internal.h +++ b/fs/smb/smbdirect/internal.h @@ -9,9 +9,8 @@ #define DEFAULT_SYMBOL_NAMESPACE "SMBDIRECT" #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include "smbdirect.h" +#include #include "pdu.h" -#include "public.h" #include diff --git a/fs/smb/smbdirect/public.h b/fs/smb/smbdirect/public.h deleted file mode 100644 index d4fb36e65254..000000000000 --- a/fs/smb/smbdirect/public.h +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2025, Stefan Metzmacher - */ - -#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ -#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ - -struct smbdirect_buffer_descriptor_v1; -struct smbdirect_socket_parameters; - -struct smbdirect_socket; -struct smbdirect_send_batch; -struct smbdirect_mr_io; - -#include - -u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); - -bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); - -int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc); - -int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc); - -int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, - const struct smbdirect_socket_parameters *sp); - -const struct smbdirect_socket_parameters * -smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc); - -int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, - enum ib_poll_context poll_ctx, - gfp_t gfp_mask); - -#define SMBDIRECT_LOG_ERR 0x0 -#define SMBDIRECT_LOG_INFO 0x1 - -#define SMBDIRECT_LOG_OUTGOING 0x1 -#define SMBDIRECT_LOG_INCOMING 0x2 -#define SMBDIRECT_LOG_READ 0x4 -#define SMBDIRECT_LOG_WRITE 0x8 -#define SMBDIRECT_LOG_RDMA_SEND 0x10 -#define SMBDIRECT_LOG_RDMA_RECV 0x20 -#define SMBDIRECT_LOG_KEEP_ALIVE 0x40 -#define SMBDIRECT_LOG_RDMA_EVENT 0x80 -#define SMBDIRECT_LOG_RDMA_MR 0x100 -#define SMBDIRECT_LOG_RDMA_RW 0x200 -#define SMBDIRECT_LOG_NEGOTIATE 0x400 -void smbdirect_socket_set_logging(struct smbdirect_socket *sc, - void *private_ptr, - bool (*needed)(struct smbdirect_socket *sc, - void *private_ptr, - unsigned int lvl, - unsigned int cls), - void (*vaprintf)(struct smbdirect_socket *sc, - const char *func, - unsigned int line, - void *private_ptr, - unsigned int lvl, - unsigned int cls, - struct va_format *vaf)); - -bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); - -int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); - -int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr); - -void smbdirect_socket_shutdown(struct smbdirect_socket *sc); - -void smbdirect_socket_release(struct smbdirect_socket *sc); - -int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - bool is_last); - -/* - * This is only temporary and only needed - * as long as the client still requires - * to use smbdirect_connection_send_single_iter() - */ -struct smbdirect_send_batch_storage { - union { - struct list_head __msg_list; - __aligned_u64 __space[5]; - }; -}; - -struct smbdirect_send_batch * -smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, - bool need_invalidate_rkey, - unsigned int remote_key); - -int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - struct iov_iter *iter, - unsigned int flags, - u32 remaining_data_length); - -int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc); - -int smbdirect_connection_send_iter(struct smbdirect_socket *sc, - struct iov_iter *iter, - unsigned int flags, - bool need_invalidate, - unsigned int remote_key); - -int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, - struct msghdr *msg, - unsigned int flags); - -int smbdirect_connect(struct smbdirect_socket *sc, - const struct sockaddr *dst); - -int smbdirect_connect_sync(struct smbdirect_socket *sc, - const struct sockaddr *dst); - -int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog); - -struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, - long timeo, - struct proto_accept_arg *arg); - -int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, - void *buf, size_t buf_len, - struct smbdirect_buffer_descriptor_v1 *desc, - size_t desc_len, - bool is_read); - -struct smbdirect_mr_io * -smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, - struct iov_iter *iter, - bool writing, - bool need_invalidate); - -void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, - struct smbdirect_buffer_descriptor_v1 *v1); - -void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr); - -void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, - unsigned int rdma_readwrite_threshold, - struct seq_file *m); - -#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ */ diff --git a/fs/smb/smbdirect/smbdirect.h b/fs/smb/smbdirect/smbdirect.h deleted file mode 100644 index bbab5f7f7cc9..000000000000 --- a/fs/smb/smbdirect/smbdirect.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2025 Stefan Metzmacher - */ - -#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ -#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ - -#include - -/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ -struct smbdirect_buffer_descriptor_v1 { - __le64 offset; - __le32 token; - __le32 length; -} __packed; - -/* - * Connection parameters mostly from [MS-SMBD] 3.1.1.1 - * - * These are setup and negotiated at the beginning of a - * connection and remain constant unless explicitly changed. - * - * Some values are important for the upper layer. - */ -struct smbdirect_socket_parameters { - __u64 flags; -#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) -#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) - __u32 resolve_addr_timeout_msec; - __u32 resolve_route_timeout_msec; - __u32 rdma_connect_timeout_msec; - __u32 negotiate_timeout_msec; - __u16 initiator_depth; /* limited to U8_MAX */ - __u16 responder_resources; /* limited to U8_MAX */ - __u16 recv_credit_max; - __u16 send_credit_target; - __u32 max_send_size; - __u32 max_fragmented_send_size; - __u32 max_recv_size; - __u32 max_fragmented_recv_size; - __u32 max_read_write_size; - __u32 max_frmr_depth; - __u32 keepalive_interval_msec; - __u32 keepalive_timeout_msec; -} __packed; - -#define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ - SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ - SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) - -#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */ diff --git a/include/linux/smbdirect.h b/include/linux/smbdirect.h new file mode 100644 index 000000000000..97f5ba730fa7 --- /dev/null +++ b/include/linux/smbdirect.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025, Stefan Metzmacher + */ + +#ifndef __LINUX_SMBDIRECT_H__ +#define __LINUX_SMBDIRECT_H__ + +#include + +/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ +struct smbdirect_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + +/* + * Connection parameters mostly from [MS-SMBD] 3.1.1.1 + * + * These are setup and negotiated at the beginning of a + * connection and remain constant unless explicitly changed. + * + * Some values are important for the upper layer. + */ +struct smbdirect_socket_parameters { + __u64 flags; +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) + __u32 resolve_addr_timeout_msec; + __u32 resolve_route_timeout_msec; + __u32 rdma_connect_timeout_msec; + __u32 negotiate_timeout_msec; + __u16 initiator_depth; /* limited to U8_MAX */ + __u16 responder_resources; /* limited to U8_MAX */ + __u16 recv_credit_max; + __u16 send_credit_target; + __u32 max_send_size; + __u32 max_fragmented_send_size; + __u32 max_recv_size; + __u32 max_fragmented_recv_size; + __u32 max_read_write_size; + __u32 max_frmr_depth; + __u32 keepalive_interval_msec; + __u32 keepalive_timeout_msec; +} __packed; + +#define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) + +struct smbdirect_socket; +struct smbdirect_send_batch; +struct smbdirect_mr_io; + +#include + +u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); + +bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); + +int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc); + +int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc); + +int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, + const struct smbdirect_socket_parameters *sp); + +const struct smbdirect_socket_parameters * +smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc); + +int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, + enum ib_poll_context poll_ctx, + gfp_t gfp_mask); + +#define SMBDIRECT_LOG_ERR 0x0 +#define SMBDIRECT_LOG_INFO 0x1 + +#define SMBDIRECT_LOG_OUTGOING 0x1 +#define SMBDIRECT_LOG_INCOMING 0x2 +#define SMBDIRECT_LOG_READ 0x4 +#define SMBDIRECT_LOG_WRITE 0x8 +#define SMBDIRECT_LOG_RDMA_SEND 0x10 +#define SMBDIRECT_LOG_RDMA_RECV 0x20 +#define SMBDIRECT_LOG_KEEP_ALIVE 0x40 +#define SMBDIRECT_LOG_RDMA_EVENT 0x80 +#define SMBDIRECT_LOG_RDMA_MR 0x100 +#define SMBDIRECT_LOG_RDMA_RW 0x200 +#define SMBDIRECT_LOG_NEGOTIATE 0x400 +void smbdirect_socket_set_logging(struct smbdirect_socket *sc, + void *private_ptr, + bool (*needed)(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls), + void (*vaprintf)(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf)); + +bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); + +int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); + +int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr); + +void smbdirect_socket_shutdown(struct smbdirect_socket *sc); + +void smbdirect_socket_release(struct smbdirect_socket *sc); + +int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + bool is_last); + +/* + * This is only temporary and only needed + * as long as the client still requires + * to use smbdirect_connection_send_single_iter() + */ +struct smbdirect_send_batch_storage { + union { + struct list_head __msg_list; + __aligned_u64 __space[5]; + }; +}; + +struct smbdirect_send_batch * +smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, + bool need_invalidate_rkey, + unsigned int remote_key); + +int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + struct iov_iter *iter, + unsigned int flags, + u32 remaining_data_length); + +int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc); + +int smbdirect_connection_send_iter(struct smbdirect_socket *sc, + struct iov_iter *iter, + unsigned int flags, + bool need_invalidate, + unsigned int remote_key); + +int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, + struct msghdr *msg, + unsigned int flags); + +int smbdirect_connect(struct smbdirect_socket *sc, + const struct sockaddr *dst); + +int smbdirect_connect_sync(struct smbdirect_socket *sc, + const struct sockaddr *dst); + +int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog); + +struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, + long timeo, + struct proto_accept_arg *arg); + +int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, + void *buf, size_t buf_len, + struct smbdirect_buffer_descriptor_v1 *desc, + size_t desc_len, + bool is_read); + +struct smbdirect_mr_io * +smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, + struct iov_iter *iter, + bool writing, + bool need_invalidate); + +void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1); + +void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr); + +void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, + unsigned int rdma_readwrite_threshold, + struct seq_file *m); + +#endif /* __LINUX_SMBDIRECT_H__ */ -- cgit v1.2.3 From 9900b9fee5a0e0f72d7c744b37c7c851d5785ac6 Mon Sep 17 00:00:00 2001 From: Yi Kuo Date: Wed, 29 Apr 2026 18:00:11 +0800 Subject: smb: smbdirect: fix MR registration for coalesced SG lists ib_dma_map_sg() modifies the provided scatterlist and returns the number of mapped entries, which can be fewer than the requested mr->sgt.nents if the DMA controller coalesces contiguous memory segments. Passing the original, uncoalesced count to ib_map_mr_sg() causes memory registration failures if coalescing actually occurs. Capture the actual mapped count returned by ib_dma_map_sg() and pass it to ib_map_mr_sg() to ensure correct MR registration. Also update the ib_dma_map_sg() error logging to drop the error pointer formatting, since the return value is an integer count rather than an error code. Ensure a proper error code (-EIO) is assigned when DMA mapping or MR registration fails. Fixes: de5ef8ec3c46 ("smb: smbdirect: introduce smbdirect_mr.c with client mr code") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221408 Reviewed-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Yi Kuo Signed-off-by: Steve French --- fs/smb/smbdirect/mr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/smb/smbdirect/mr.c b/fs/smb/smbdirect/mr.c index 9e6ac9d8717a..15c6363a2f97 100644 --- a/fs/smb/smbdirect/mr.c +++ b/fs/smb/smbdirect/mr.c @@ -269,7 +269,7 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, { const struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_mr_io *mr; - int ret, num_pages; + int ret, num_pages, num_mapped; struct ib_reg_wr *reg_wr; num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); @@ -300,19 +300,22 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, num_pages, iov_iter_count(iter), sp->max_frmr_depth); smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth); - ret = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - if (!ret) { + num_mapped = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + if (!num_mapped) { smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, - "ib_dma_map_sg num_pages=%u dir=%x ret=%d (%1pe)\n", - num_pages, mr->dir, ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + "ib_dma_map_sg num_pages=%u dir=%x num_mapped=%d\n", + num_pages, mr->dir, num_mapped); + ret = -EIO; goto dma_map_error; } - ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); - if (ret != mr->sgt.nents) { + ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, num_mapped, NULL, PAGE_SIZE); + if (ret != num_mapped) { smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, - "ib_map_mr_sg failed ret = %d nents = %u\n", - ret, mr->sgt.nents); + "ib_map_mr_sg failed ret = %d num_mapped = %u\n", + ret, num_mapped); + if (ret >= 0) + ret = -EIO; goto map_mr_error; } -- cgit v1.2.3 From f93836b236773862e9ee268a82e3614caf77ea01 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Thu, 30 Apr 2026 23:34:33 +0200 Subject: net: usb: r8152: add TRENDnet TUC-ET2G v2.0 The TRENDnet TUC-ET2G V2.0 is an RTL8156B based 2.5G Ethernet controller. Add the vendor and product ID values to the driver. This makes Ethernet work with the adapter. Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Andrew Lunn Reviewed-by: Birger Koblitz Link: https://patch.msgid.link/20260430213435.21821-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7337bf1b7d6a..1ace1d2398c9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10138,6 +10138,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02b) }, + { USB_DEVICE(VENDOR_ID_TRENDNET, 0xe02c) }, {} }; -- cgit v1.2.3 From 4f34002e2e37639133693c13a2a9977fab86880d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2026 07:06:11 +0000 Subject: ipmr: prevent info-leak in pmr_cache_report() Yiming Qian reported: ipmr_cache_report()` allocates a report skb with `alloc_skb(128, GFP_ATOMIC)` and appends a `struct igmphdr` using `skb_put()`. In the non-`IGMPMSG_WHOLEPKT` path it initializes only: - `igmp->type` - `igmp->code` but does not initialize: - `igmp->csum` - `igmp->group` Later, `igmpmsg_netlink_event()` copies the bytes after `sizeof(struct igmpmsg)` into the `IPMRA_CREPORT_PKT` netlink attribute and emits `RTM_NEWCACHEREPORT` on `RTNLGRP_IPV4_MROUTE_R`. As a result, 6 bytes of stale heap data from the skb head are disclosed to userspace. Let's use skb_put_zero() instead of skb_put() to fix this bug. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yiming Qian Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260430070611.4004529-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2058ca860294..05fb6eefe0be 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1112,11 +1112,12 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg->im_vif_hi = vifi >> 8; ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); - /* Add our header */ - igmp = skb_put(skb, sizeof(struct igmphdr)); + /* Add our header. + * Note that code, csum and group fields are cleared. + */ + igmp = skb_put_zero(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; - igmp->code = 0; ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ skb->transport_header = skb->network_header; } -- cgit v1.2.3 From 4b9e327991815e128ad3af75c3a04630a63ce3e0 Mon Sep 17 00:00:00 2001 From: Kai Zen Date: Thu, 30 Apr 2026 18:26:48 +0300 Subject: net: rtnetlink: zero ifla_vf_broadcast to avoid stack infoleak in rtnl_fill_vfinfo rtnl_fill_vfinfo() declares struct ifla_vf_broadcast on the stack without initialisation: struct ifla_vf_broadcast vf_broadcast; The struct contains a single fixed 32-byte field: /* include/uapi/linux/if_link.h */ struct ifla_vf_broadcast { __u8 broadcast[32]; }; The function then copies dev->broadcast into it using dev->addr_len as the length: memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); On Ethernet devices (the overwhelming majority of SR-IOV NICs) dev->addr_len is 6, so only the first 6 bytes of broadcast[] are written. The remaining 26 bytes retain whatever was previously on the kernel stack. The full struct is then handed to userspace via: nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) leaking up to 26 bytes of uninitialised kernel stack per VF per RTM_GETLINK request, repeatable. The other vf_* structs in the same function are explicitly zeroed for exactly this reason - see the memset() calls for ivi, vf_vlan_info, node_guid and port_guid a few lines above. vf_broadcast was simply missed when it was added. Reachability: any unprivileged local process can open AF_NETLINK / NETLINK_ROUTE without capabilities and send RTM_GETLINK with an IFLA_EXT_MASK attribute carrying RTEXT_FILTER_VF. The kernel walks each VF and emits IFLA_VF_BROADCAST, leaking 26 bytes of stack per VF per request. Stack residue at this call site can include return addresses and transient sensitive data; KASAN with stack instrumentation, or KMSAN, will flag the nla_put() when reproduced. Zero the on-stack struct before the partial memcpy, matching the existing pattern used for the other vf_* structs in the same function. Fixes: 75345f888f70 ("ipoib: show VF broadcast address") Cc: stable@vger.kernel.org Signed-off-by: Kai Zen Link: https://patch.msgid.link/3c506e8f936e52b57620269b55c348af05d413a2.1777557228.git.kai.aizen.dev@gmail.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b613bb6e07df..df042da422ef 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1572,6 +1572,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + memset(&vf_broadcast, 0, sizeof(vf_broadcast)); memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; -- cgit v1.2.3 From c6bebaa744f7579eb72800a262fbfeb93e40db04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2026 16:48:36 +0000 Subject: ipv4: igmp: annotate data-races in igmp_heard_query() Multiple cpus can run igmp_heard_query() concurrently. Add missing READ_ONCE()/WRITE_ONCE() over following in_dev fields. - mr_qrv - mr_qi - mr_qri - mr_v1_seen - mr_v2_seen Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+ae9a171f239b14485310@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69f38675.050a0220.3cbe47.0002.GAE@google.com Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260430164836.872079-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/igmp.c | 58 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a674fb44ec25..a9ad39064f3b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -122,16 +122,29 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ - IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +static bool IGMP_V1_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1) + return true; + seen = READ_ONCE(in_dev->mr_v1_seen); + return seen && time_before(jiffies, seen); +} + +static bool IGMP_V2_SEEN(const struct in_device *in_dev) +{ + unsigned long seen; + + if (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2) + return true; + if (IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2) + return true; + seen = READ_ONCE(in_dev->mr_v2_seen); + return seen && time_before(jiffies, seen); +} static int unsolicited_report_interval(struct in_device *in_dev) { @@ -954,23 +967,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int max_delay; int mark = 0; struct net *net = dev_net(in_dev->dev); - + unsigned long seen; if (len == 8) { + seen = jiffies + READ_ONCE(in_dev->mr_qrv) * READ_ONCE(in_dev->mr_qi) + + READ_ONCE(in_dev->mr_qri); if (ih->code == 0) { /* Alas, old v1 router presents here. */ max_delay = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_v1_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v1_seen, seen); group = 0; } else { /* v2 router present */ max_delay = ih->code*(HZ/IGMP_TIMER_SCALE); - in_dev->mr_v2_seen = jiffies + - (in_dev->mr_qrv * in_dev->mr_qi) + - in_dev->mr_qri; + WRITE_ONCE(in_dev->mr_v2_seen, seen); } /* cancel the interface change timer */ WRITE_ONCE(in_dev->mr_ifc_count, 0); @@ -995,6 +1006,8 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (!max_delay) max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ + unsigned long mr_qi; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return true; @@ -1015,15 +1028,16 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); - in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; - + WRITE_ONCE(in_dev->mr_qrv, + ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + WRITE_ONCE(in_dev->mr_qi, mr_qi); /* RFC3376, 8.3. Query Response Interval: * The number of seconds represented by the [Query Response * Interval] must be less than the [Query Interval]. */ - if (in_dev->mr_qri >= in_dev->mr_qi) - in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ; + if (READ_ONCE(in_dev->mr_qri) >= mr_qi) + WRITE_ONCE(in_dev->mr_qri, (mr_qi/HZ - 1) * HZ); if (!group) { /* general query */ if (ih3->nsrcs) -- cgit v1.2.3 From a5148bc2fa27092862ac4b9e7b5c8340d60cff34 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Wed, 29 Apr 2026 18:57:39 +0100 Subject: net: usb: cdc_ncm: add Apple Mac USB-C direct networking quirk Apple Silicon Macs expose two CDC NCM "private" data interfaces over USB-C with VID:PID 0x05ac:0x1905 and product string "Mac". This is the same protocol Apple already ships on iPhone (0x05ac:0x12a8) and iPad (0x05ac:0x12ab) for RemoteXPC since iOS 17 -- both data interfaces lack an interrupt status endpoint, so they rely on the FLAG_LINK_INTR- conditional bind path introduced in commit 3ec8d7572a69 ("CDC-NCM: add support for Apple's private interface"). The id_table currently has entries for iPhone and iPad but not for the Mac. Without a match, cdc_ncm falls through to the generic CDC NCM class-match entry, which uses the FLAG_LINK_INTR-having cdc_ncm_info struct, so bind_common() fails on the missing status endpoint and no netdev appears. Add id_table entries for both interface numbers (0 and 2) of the Mac, bound to the existing apple_private_interface_info driver_info. Verified empirically on a Mac Studio M3 Ultra running macOS 26.5: when a Mac is connected via USB-C, ioreg shows VID 0x05ac, PID 0x1905, product string "Mac", with two NCM data interfaces at numbers 0 and 2. The same PID is presented by all current Apple Silicon Mac models (MacBook Pro/Air, Mac mini, Mac Studio across the M-series), mirroring Apple's single-PID-per-family pattern from iPhone/iPad. After this patch, plugging a Mac into a Linux host running the patched kernel produces two enx... interfaces (one per data interface), "ip -br link" lists them as UP, and standard userspace networking (DHCP, NetworkManager shared mode, etc.) works without any modprobe overrides or out-of-tree modules. Signed-off-by: Alex Cheema Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260429175739.34426-1-alex@exolabs.net Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index bb9929727eb9..0223a172851e 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -2012,6 +2012,14 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long)&apple_private_interface_info, }, + /* Mac */ + { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 0), + .driver_info = (unsigned long)&apple_private_interface_info, + }, + { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x1905, 2), + .driver_info = (unsigned long)&apple_private_interface_info, + }, + /* Ericsson MBM devices like F5521gw */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, -- cgit v1.2.3 From 6d4106e8df94c0c52cf3ca6a6a0d01567fb3844e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2026 08:00:56 +0000 Subject: net/sched: sch_pie: annotate more data-races in pie_dump_stats() My prior patch missed few READ_ONCE()/WRITE_ONCE() annotations. Fixes: 5154561d9b11 ("net/sched: sch_pie: annotate data-races in pie_dump_stats()") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260430080056.35104-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_pie.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index fb53fbf0e328..b41f2def2e2c 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -219,16 +219,14 @@ void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params, * packet timestamp. */ if (!params->dq_rate_estimator) { - vars->qdelay = now - pie_get_enqueue_time(skb); + WRITE_ONCE(vars->qdelay, + backlog ? now - pie_get_enqueue_time(skb) : 0); if (vars->dq_tstamp != DTIME_INVALID) dtime = now - vars->dq_tstamp; vars->dq_tstamp = now; - if (backlog == 0) - vars->qdelay = 0; - if (dtime == 0) return; @@ -376,7 +374,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) delta += MAX_PROB / (100 / 2); - vars->prob += delta; + WRITE_ONCE(vars->prob, vars->prob + delta); if (delta > 0) { /* prevent overflow */ @@ -401,7 +399,7 @@ void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars, if (qdelay == 0 && qdelay_old == 0 && update_prob) /* Reduce drop probability to 98.4% */ - vars->prob -= vars->prob / 64; + WRITE_ONCE(vars->prob, vars->prob - vars->prob / 64); WRITE_ONCE(vars->qdelay, qdelay); vars->backlog_old = backlog; @@ -501,7 +499,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct pie_sched_data *q = qdisc_priv(sch); struct tc_pie_xstats st = { - .prob = q->vars.prob << BITS_PER_BYTE, + .prob = READ_ONCE(q->vars.prob) << BITS_PER_BYTE, .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) / NSEC_PER_USEC, .packets_in = READ_ONCE(q->stats.packets_in), @@ -512,7 +510,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) }; /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ - st.dq_rate_estimating = q->params.dq_rate_estimator; + st.dq_rate_estimating = READ_ONCE(q->params.dq_rate_estimator); /* unscale and return dq_rate in bytes per sec */ if (st.dq_rate_estimating) -- cgit v1.2.3 From 4bc852006b62eae8ea77e797192d089367e854ff Mon Sep 17 00:00:00 2001 From: Sagarika Sharma Date: Thu, 30 Apr 2026 20:09:00 +0000 Subject: ipv6: update route serial number on NETDEV_CHANGE When using IPv6 ECMP routes, if a netdev listed as a nexthop experiences a carrier change event (e.g., a bond device generating a NETDEV_CHANGE event after its slaves go linkdown), established connections utilizing that nexthop fail to fail over to other available nexthops. Instead, these connections stall or drop. This happens because the IPv6 FIB code does not invalidate the socket's cached destination when a NETDEV_CHANGE event occurs. While fib6_ifdown() correctly marks the nexthop with RTNH_F_LINKDOWN, it leaves the route's serial number unchanged. As a result, sockets with a previously cached dst do not realize the route is no longer viable and continue to try using the non-functional nexthop. This behavior contrasts with IPv4, which actively flushes cached destinations on a NETDEV_CHANGE event (see fib_netdev_event() in net/ipv4/fib_frontend.c). Fix this by updating the route serial number in fib6_ifdown() when setting RTNH_F_LINKDOWN. This invalidates stale cached destinations, forcing sockets to perform a new route lookup and fail over to a functioning nexthop. Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)") Signed-off-by: Sagarika Sharma Reviewed-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260430200909.527827-2-sharmasagarika@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 19eb6b702227..0dc0316530ca 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4995,6 +4995,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); break; } -- cgit v1.2.3 From d1ae37dc6881a6a9113c8545cdbba731393d8dcd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 30 Apr 2026 20:09:01 +0000 Subject: selftest: net: Add test for TCP flow failover with ECMP routes. Without the previous commit, TCP failed to switch to alternative IPv6 routes immediately upon carrier loss. It would persist with the dead route until reaching the threshold net.ipv4.tcp_retries1, leading to unnecessary delays in failover. Let's add a selftest for this scenario to ensure TCP fails over immediately upon a carrier loss event. Before: TEST: TCP IPv4 failover [ OK ] TEST: TCP IPv6 failover [FAIL] After: TEST: TCP IPv4 failover [ OK ] TEST: TCP IPv6 failover [ OK ] Signed-off-by: Kuniyuki Iwashima Signed-off-by: Sagarika Sharma Link: https://patch.msgid.link/20260430200909.527827-3-sharmasagarika@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/tcp_ecmp_failover.sh | 216 +++++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100755 tools/testing/selftests/net/tcp_ecmp_failover.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index a275ed584026..f3da38c54d27 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -96,6 +96,7 @@ TEST_PROGS := \ srv6_hl2encap_red_l2vpn_test.sh \ srv6_iptunnel_cache.sh \ stress_reuseport_listen.sh \ + tcp_ecmp_failover.sh \ tcp_fastopen_backup_key.sh \ test_bpf.sh \ test_bridge_backup_port.sh \ diff --git a/tools/testing/selftests/net/tcp_ecmp_failover.sh b/tools/testing/selftests/net/tcp_ecmp_failover.sh new file mode 100755 index 000000000000..5768aa8bff6a --- /dev/null +++ b/tools/testing/selftests/net/tcp_ecmp_failover.sh @@ -0,0 +1,216 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2026 Google LLC. +# +# This test verifies TCP flow failover between ECMP routes +# upon carrier loss on the active device. +# +# socat -----------------------------> socat +# | +# .-- veth-c1 -|- veth-s1 --. +# dummy0 -| | |-- dummy0 +# '-- veth-c2 -|- veth-s2 --' +# | +# + +REQUIRE_JQ=no +REQUIRE_MZ=no +NUM_NETIFS=0 + +source forwarding/lib.sh + +CLIENT_IP="10.0.59.1" +SERVER_IP="10.0.92.1" +CLIENT_IP6="2001:db8:5a9a::1" +SERVER_IP6="2001:db8:9292::1" + +setup_server() +{ + IP="ip -n $server" + NS_EXEC="ip netns exec $server" + + $IP link add dummy0 type dummy + $IP link set dummy0 up + + $IP -4 addr add $SERVER_IP/32 dev dummy0 + $IP -6 addr add $SERVER_IP6/128 dev dummy0 nodad + + $IP link set veth-s1 up + $IP link set veth-s2 up + + $IP -4 addr add 192.168.1.2/24 dev veth-s1 + $IP -4 addr add 192.168.2.2/24 dev veth-s2 + + $IP -4 route add $CLIENT_IP/32 \ + nexthop via 192.168.1.1 dev veth-s1 weight 1 \ + nexthop via 192.168.2.1 dev veth-s2 weight 1 + + $IP -6 addr add 2001:db8:1::2/64 dev veth-s1 nodad + $IP -6 addr add 2001:db8:2::2/64 dev veth-s2 nodad + + $IP -6 route add $CLIENT_IP6/128 \ + nexthop via 2001:db8:1::1 dev veth-s1 weight 1 \ + nexthop via 2001:db8:2::1 dev veth-s2 weight 1 +} + +setup_client() +{ + IP="ip -n $client" + NS_EXEC="ip netns exec $client" + + $IP link add dummy0 type dummy + $IP link set dummy0 up + + $IP -4 addr add $CLIENT_IP/32 dev dummy0 + $IP -6 addr add $CLIENT_IP6/128 dev dummy0 nodad + + $IP link set veth-c1 up + $IP link set veth-c2 up + + $IP -4 addr add 192.168.1.1/24 dev veth-c1 + $IP -4 addr add 192.168.2.1/24 dev veth-c2 + + $IP -4 route add $SERVER_IP/32 \ + nexthop via 192.168.1.2 dev veth-c1 weight 1 \ + nexthop via 192.168.2.2 dev veth-c2 weight 1 + + $IP -6 addr add 2001:db8:1::1/64 dev veth-c1 nodad + $IP -6 addr add 2001:db8:2::1/64 dev veth-c2 nodad + + $IP -6 route add $SERVER_IP6/128 \ + nexthop via 2001:db8:1::2 dev veth-c1 weight 1 \ + nexthop via 2001:db8:2::2 dev veth-c2 weight 1 + + # By default, tcp_retries1=3 triggers a route refresh + # after 3 retransmits (~5s). Ensure this never occurs + # for test stability. + $NS_EXEC sysctl -qw net.ipv4.tcp_retries1=100 + + # When NETDEV_CHANGE is issued for a dev tied to an ECMP + # route, RTNH_F_LINKDOWN is flagged and the sernum is + # bumped to invalidate the route via sk_dst_check(). + # + # Without ignore_routes_with_linkdown=1, subsequent + # lookups may still select the same RTNH_F_LINKDOWN route. + $NS_EXEC sysctl -qw net.ipv4.conf.veth-c1.ignore_routes_with_linkdown=1 + $NS_EXEC sysctl -qw net.ipv4.conf.veth-c2.ignore_routes_with_linkdown=1 + + $NS_EXEC sysctl -qw net.ipv6.conf.veth-c1.ignore_routes_with_linkdown=1 + $NS_EXEC sysctl -qw net.ipv6.conf.veth-c2.ignore_routes_with_linkdown=1 +} + +setup() +{ + setup_ns client server + + ip -n "$client" link add veth-c1 type veth peer veth-s1 netns "$server" + ip -n "$client" link add veth-c2 type veth peer veth-s2 netns "$server" + + setup_server + setup_client +} + +cleanup() +{ + cleanup_all_ns > /dev/null 2>&1 +} + +tcp_ecmp_failover() +{ + local pf=$1; shift + local server_ip=$1; shift + local client_ip=$1; shift + + RET=0 + + tcpdump_start veth-s1 "$server" + tcpdump_start veth-s2 "$server" + + ip netns exec "$server" \ + socat -u TCP-LISTEN:8080,pf="$pf",bind="$server_ip",reuseaddr /dev/null & + server_pid=$! + + # Wait for server to start listening. + # Sometimes client fails without this sleep. + sleep 1 + + ip netns exec "$client" \ + socat -u /dev/zero TCP:"$server_ip":8080,pf="$pf",bind="$client_ip" & + client_pid=$! + + # To capture enough packets. + sleep 3 + + tcpdump_stop veth-s1 + tcpdump_stop veth-s2 + + pkts_s1=$(tcpdump_show veth-s1 | wc -l) + pkts_s2=$(tcpdump_show veth-s2 | wc -l) + + tcpdump_cleanup veth-s1 + tcpdump_cleanup veth-s2 + + # Detect the device chosen by the client + if [ "$pkts_s1" -gt "$pkts_s2" ]; then + veth_down=veth-s1 + veth_up=veth-s2 + else + veth_down=veth-s2 + veth_up=veth-s1 + fi + + # Taking down $veth_down causes its peer to lose carrier, + # triggering NETDEV_CHANGE. This flags RTNH_F_LINKDOWN + # and bumps the sernum for the route associated with that + # peer, invalidating the cached dst in the TCP socket. + # + # Consequently, sk_dst_check() fails, forcing the subsequent + # lookup to select the remaining healthy route via $veth_up. + ip -n "$server" link set "$veth_down" down + + tcpdump_start "$veth_up" "$server" + + # To capture enough packets. + sleep 3 + + tcpdump_stop "$veth_up" + + kill -9 "$client_pid" > /dev/null 2>&1 + kill -9 "$server_pid" > /dev/null 2>&1 + wait 2> /dev/null + + pkts=$(tcpdump_show $veth_up | wc -l) + + tcpdump_cleanup "$veth_up" + + if [ "$pkts" -lt 1000 ]; then + RET=$ksft_fail + fi +} + +test_ipv4() +{ + setup + tcp_ecmp_failover IPv4 $SERVER_IP $CLIENT_IP + log_test "TCP IPv4 failover" + cleanup +} + +test_ipv6() +{ + setup + tcp_ecmp_failover IPv6 "[$SERVER_IP6]" "[$CLIENT_IP6]" + log_test "TCP IPv6 failover" + cleanup +} + +require_command socat +require_command tcpdump + +trap cleanup EXIT + +test_ipv4 +test_ipv6 + +exit "$EXIT_STATUS" -- cgit v1.2.3 From b1f1e80620deb49daf63c2e677046599b693dc1f Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Tue, 28 Apr 2026 23:08:54 +0900 Subject: ksmbd: centralize ksmbd_conn final release to plug transport leak ksmbd_conn_free() is one of four sites that can observe the last refcount drop of a struct ksmbd_conn. The other three fs/smb/server/connection.c ksmbd_conn_r_count_dec() fs/smb/server/oplock.c __free_opinfo() fs/smb/server/vfs_cache.c session_fd_check() end the conn with a bare kfree(), skipping ida_destroy(&conn->async_ida) and conn->transport->ops->free_transport(conn->transport). Whenever one of them is the last putter, the embedded async_ida and the entire transport struct leak -- for TCP, that is also the struct socket and the kvec iov. __free_opinfo() being a final putter is not theoretical. opinfo_put() queues the callback via call_rcu(&opinfo->rcu, free_opinfo_rcu), so ksmbd_server_terminate_conn() can deposit N opinfo releases in RCU and have ksmbd_conn_free() run in the handler thread before any of them fire. ksmbd_conn_free() then observes refcnt > 0 and short-circuits; the last RCU-delivered __free_opinfo() falls onto its bare kfree(conn) branch and the transport is lost. A/B validation in a QEMU/virtme guest, mounting //127.0.0.1/testshare: each iteration holds 8 files open via sleep processes, force-closes TCP with "ss -K sport = :445", kills the holders, lazy-umounts; repeated 10 times, then ksmbd shutdown and kmemleak scan. state conn_alloc conn_free tcp_free opi_rcu kmemleak ---------- ---------- --------- -------- ------- -------- pre-patch 20 20 10 160 7 with patch 20 20 20 160 0 Pre-patch conn_free=20 with tcp_free=10 directly demonstrates the bare-kfree paths skipping transport cleanup; kmemleak backtraces point into struct tcp_transport / iov. With this patch tcp_free matches conn_free at 20/20 and kmemleak is clean. Move the per-struct final release into __ksmbd_conn_release_work() and route the three bare-kfree final-put sites through a new ksmbd_conn_put(). Those sites now pair ida_destroy() and free_transport() with kfree(conn) regardless of which holder happens to release the last reference. stop_sessions() only triggers the transport shutdown and does not itself drop the last conn reference, so it is unaffected. The centralized release reaches sock_release() -> tcp_close() -> lock_sock_nested() (might_sleep) from every final putter, including __free_opinfo() invoked from an RCU softirq callback, which trips CONFIG_DEBUG_ATOMIC_SLEEP. Defer the release to a dedicated ksmbd_conn_wq workqueue so ksmbd_conn_put() is safe from any non-sleeping context. Make ksmbd_file own a strong connection reference while fp->conn is non-NULL so durable-preserve and final-close paths cannot dereference a stale connection. ksmbd_open_fd() and ksmbd_reopen_durable_fd() take the reference via ksmbd_conn_get() (the latter also reorders the fp->conn / fp->tcon assignments before __open_id() so the published fp is never observed with fp->conn == NULL); session_fd_check() and __ksmbd_close_fd() drop it via ksmbd_conn_put(). With that invariant, session_fd_check() can take a local conn pointer once and use it across the m_op_list and lock_list iterations even though op->conn puts may otherwise drop the last reference. At module exit the workqueue is flushed and destroyed after rcu_barrier(), so any release queued by a trailing RCU callback is drained before the inode hash and module text go away. Fixes: ee426bfb9d09 ("ksmbd: add refcnt to ksmbd_conn struct") Signed-off-by: DaeMyung Kang Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 101 ++++++++++++++++++++++++++++++++++++++------- fs/smb/server/connection.h | 6 +++ fs/smb/server/oplock.c | 7 +--- fs/smb/server/server.c | 12 ++++++ fs/smb/server/vfs_cache.c | 60 ++++++++++++++++++++++----- 5 files changed, 156 insertions(+), 30 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index c5aac4946cbe..95654a808162 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -79,6 +79,81 @@ static int create_proc_clients(void) { return 0; } static void delete_proc_clients(void) {} #endif +static struct workqueue_struct *ksmbd_conn_wq; + +int ksmbd_conn_wq_init(void) +{ + ksmbd_conn_wq = alloc_workqueue("ksmbd-conn-release", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + if (!ksmbd_conn_wq) + return -ENOMEM; + return 0; +} + +void ksmbd_conn_wq_destroy(void) +{ + if (ksmbd_conn_wq) { + destroy_workqueue(ksmbd_conn_wq); + ksmbd_conn_wq = NULL; + } +} + +/* + * __ksmbd_conn_release_work() - perform the final, once-per-struct cleanup + * of a ksmbd_conn whose refcount has just dropped to zero. + * + * This is the common release path used by ksmbd_conn_put() for the embedded + * state that outlives the connection thread: async_ida and the attached + * transport (which owns the socket and iov for TCP). Called from a workqueue + * so that sleep-allowed teardown (sock_release -> tcp_close -> + * lock_sock_nested) never runs from an RCU softirq callback (free_opinfo_rcu) + * or any other non-sleeping putter context. + */ +static void __ksmbd_conn_release_work(struct work_struct *work) +{ + struct ksmbd_conn *conn = + container_of(work, struct ksmbd_conn, release_work); + + ida_destroy(&conn->async_ida); + conn->transport->ops->free_transport(conn->transport); + kfree(conn); +} + +/** + * ksmbd_conn_get() - take a reference on @conn and return it. + * + * Returns @conn unchanged so callers can write + * "fp->conn = ksmbd_conn_get(work->conn);" in one expression. Returns NULL + * if @conn is NULL. + */ +struct ksmbd_conn *ksmbd_conn_get(struct ksmbd_conn *conn) +{ + if (!conn) + return NULL; + + atomic_inc(&conn->refcnt); + return conn; +} + +/** + * ksmbd_conn_put() - drop a reference and, if it was the last, queue the + * release onto ksmbd_conn_wq so it runs from process context. + * + * Callable from any context including RCU softirq callbacks and non-sleeping + * locks; the actual release is deferred to the workqueue. ksmbd_conn_wq is + * created in ksmbd_server_init() before any conn can be allocated and is + * destroyed in ksmbd_server_exit() after rcu_barrier(), so it is always + * non-NULL while a conn reference is held. + */ +void ksmbd_conn_put(struct ksmbd_conn *conn) +{ + if (!conn) + return; + + if (atomic_dec_and_test(&conn->refcnt)) + queue_work(ksmbd_conn_wq, &conn->release_work); +} + /** * ksmbd_conn_free() - free resources of the connection instance * @@ -93,23 +168,19 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) hash_del(&conn->hlist); up_write(&conn_list_lock); + /* + * request_buf / preauth_info / mechToken are only ever accessed by the + * connection handler thread that owns @conn. ksmbd_conn_free() is + * called from the transport free_transport() path when that thread is + * exiting, so it is safe to release them unconditionally even when + * ksmbd_conn_put() below is not the final putter (oplock / ksmbd_file + * holders only retain the conn pointer, not these per-thread buffers). + */ xa_destroy(&conn->sessions); kvfree(conn->request_buf); kfree(conn->preauth_info); kfree(conn->mechToken); - if (atomic_dec_and_test(&conn->refcnt)) { - /* - * async_ida is embedded in struct ksmbd_conn, so pair - * ida_destroy() with the final kfree() rather than with - * the unconditional field teardown above. This keeps - * the IDA valid for the entire lifetime of the struct, - * even while other refcount holders (oplock / vfs - * durable handles) still reference the connection. - */ - ida_destroy(&conn->async_ida); - conn->transport->ops->free_transport(conn->transport); - kfree(conn); - } + ksmbd_conn_put(conn); } /** @@ -136,6 +207,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) conn->um = ERR_PTR(-EOPNOTSUPP); if (IS_ERR(conn->um)) conn->um = NULL; + INIT_WORK(&conn->release_work, __ksmbd_conn_release_work); atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); atomic_set(&conn->refcnt, 1); @@ -512,8 +584,7 @@ void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn) if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) wake_up(&conn->r_count_q); - if (atomic_dec_and_test(&conn->refcnt)) - kfree(conn); + ksmbd_conn_put(conn); } int ksmbd_conn_transport_init(void) diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index de2d46941c93..e074be942582 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "smb_common.h" #include "ksmbd_work.h" @@ -120,6 +121,7 @@ struct ksmbd_conn { bool binding; atomic_t refcnt; bool is_aapl; + struct work_struct release_work; }; struct ksmbd_conn_ops { @@ -164,6 +166,10 @@ void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); +struct ksmbd_conn *ksmbd_conn_get(struct ksmbd_conn *conn); +void ksmbd_conn_put(struct ksmbd_conn *conn); +int ksmbd_conn_wq_init(void); +void ksmbd_conn_wq_destroy(void); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); int ksmbd_conn_write(struct ksmbd_work *work); int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index cd3f28b0e7cb..8feca02ddbf2 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -30,7 +30,6 @@ static DEFINE_RWLOCK(lease_list_lock); static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, u64 id, __u16 Tid) { - struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; struct oplock_info *opinfo; @@ -39,7 +38,7 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, return NULL; opinfo->sess = sess; - opinfo->conn = conn; + opinfo->conn = ksmbd_conn_get(work->conn); opinfo->level = SMB2_OPLOCK_LEVEL_NONE; opinfo->op_state = OPLOCK_STATE_NONE; opinfo->pending_break = 0; @@ -50,7 +49,6 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work, init_waitqueue_head(&opinfo->oplock_brk); atomic_set(&opinfo->refcount, 1); atomic_set(&opinfo->breaking_cnt, 0); - atomic_inc(&opinfo->conn->refcnt); return opinfo; } @@ -132,8 +130,7 @@ static void __free_opinfo(struct oplock_info *opinfo) { if (opinfo->is_lease) free_lease(opinfo); - if (opinfo->conn && atomic_dec_and_test(&opinfo->conn->refcnt)) - kfree(opinfo->conn); + ksmbd_conn_put(opinfo->conn); kfree(opinfo); } diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 58ef02c423fc..5d799b2d4c62 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -596,8 +596,14 @@ static int __init ksmbd_server_init(void) if (ret) goto err_crypto_destroy; + ret = ksmbd_conn_wq_init(); + if (ret) + goto err_workqueue_destroy; + return 0; +err_workqueue_destroy: + ksmbd_workqueue_destroy(); err_crypto_destroy: ksmbd_crypto_destroy(); err_release_inode_hash: @@ -623,6 +629,12 @@ static void __exit ksmbd_server_exit(void) { ksmbd_server_shutdown(); rcu_barrier(); + /* + * ksmbd_conn_put() defers the final release onto ksmbd_conn_wq, + * so drain it after rcu_barrier() has fired any pending RCU + * callbacks that may have queued a release. + */ + ksmbd_conn_wq_destroy(); ksmbd_release_inode_hash(); } diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 3551f01a3fa0..4a18107937cc 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -475,6 +475,17 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) kfree(smb_lock); } + /* + * Drop fp's strong reference on conn (taken in ksmbd_open_fd() / + * ksmbd_reopen_durable_fd()). Durable fps that reached the + * scavenger have already had fp->conn cleared by session_fd_check(), + * in which case there is nothing to drop here. + */ + if (fp->conn) { + ksmbd_conn_put(fp->conn); + fp->conn = NULL; + } + if (ksmbd_stream_fd(fp)) kfree(fp->stream.name); kfree(fp->owner.name); @@ -752,7 +763,14 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp) atomic_set(&fp->refcount, 1); fp->filp = filp; - fp->conn = work->conn; + /* + * fp owns a strong reference on fp->conn for as long as fp->conn is + * non-NULL, so session_fd_check() and __ksmbd_close_fd() never + * dereference a dangling pointer. Paired with ksmbd_conn_put() in + * session_fd_check() (durable preserve), in __ksmbd_close_fd() + * (final close), and on the error paths below. + */ + fp->conn = ksmbd_conn_get(work->conn); fp->tcon = work->tcon; fp->volatile_id = KSMBD_NO_FID; fp->persistent_id = KSMBD_NO_FID; @@ -774,6 +792,8 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp) return fp; err_out: + /* fp->conn was set and refcounted before every branch here. */ + ksmbd_conn_put(fp->conn); kmem_cache_free(filp_cache, fp); return ERR_PTR(ret); } @@ -1062,25 +1082,32 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, if (!is_reconnectable(fp)) return false; + if (WARN_ON_ONCE(!fp->conn)) + return false; + if (ksmbd_vfs_copy_durable_owner(fp, user)) return false; + /* + * fp owns a strong reference on fp->conn (taken in ksmbd_open_fd() + * / ksmbd_reopen_durable_fd()), so conn stays valid for the whole + * body of this function regardless of any op->conn puts below. + */ conn = fp->conn; ci = fp->f_ci; down_write(&ci->m_lock); list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn != conn) continue; - if (op->conn && atomic_dec_and_test(&op->conn->refcnt)) - kfree(op->conn); + ksmbd_conn_put(op->conn); op->conn = NULL; } up_write(&ci->m_lock); list_for_each_entry_safe(smb_lock, tmp_lock, &fp->lock_list, flist) { - spin_lock(&fp->conn->llist_lock); + spin_lock(&conn->llist_lock); list_del_init(&smb_lock->clist); - spin_unlock(&fp->conn->llist_lock); + spin_unlock(&conn->llist_lock); } fp->conn = NULL; @@ -1091,6 +1118,8 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, fp->durable_scavenger_timeout = jiffies_to_msecs(jiffies) + fp->durable_timeout; + /* Drop fp's own reference on conn. */ + ksmbd_conn_put(conn); return true; } @@ -1178,15 +1207,27 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) old_f_state = fp->f_state; fp->f_state = FP_NEW; + + /* + * Initialize fp's connection binding before publishing fp into the + * session's file table. If __open_id() is ordered first, a + * concurrent teardown that iterates the table can observe a valid + * volatile_id with fp->conn == NULL and preserve a + * partially-initialized fp. fp owns a strong reference on the new + * conn (see ksmbd_open_fd()); undo it on __open_id() failure. + */ + fp->conn = ksmbd_conn_get(conn); + fp->tcon = work->tcon; + __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { + fp->conn = NULL; + fp->tcon = NULL; + ksmbd_conn_put(conn); fp->f_state = old_f_state; return -EBADF; } - fp->conn = conn; - fp->tcon = work->tcon; - list_for_each_entry(smb_lock, &fp->lock_list, flist) { spin_lock(&conn->llist_lock); list_add_tail(&smb_lock->clist, &conn->lock_list); @@ -1198,8 +1239,7 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn) continue; - op->conn = fp->conn; - atomic_inc(&op->conn->refcnt); + op->conn = ksmbd_conn_get(fp->conn); } up_write(&ci->m_lock); -- cgit v1.2.3 From a42896bebfcc287ed1e61d820a888e33b1eb80ce Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Tue, 28 Apr 2026 23:08:55 +0900 Subject: ksmbd: harden file lifetime during session teardown __close_file_table_ids() is the per-session teardown that closes every fp belonging to a session (or to one tree connect on that session) by walking the session's volatile-id idr. The current loop has three related problems on busy or racing workloads: * Sleeping under ft->lock. The session-teardown skip callback, session_fd_check(), already sleeps in ksmbd_vfs_copy_durable_owner() -> kstrdup(GFP_KERNEL) and down_write(&fp->f_ci->m_lock) (a rw_semaphore). Running the callback inside write_lock(&ft->lock) trips CONFIG_DEBUG_ATOMIC_SLEEP / CONFIG_PROVE_LOCKING on a durable-fd workload. * Refcount accounting blind to f_state. The unconditional atomic_dec_and_test(&fp->refcount) does not distinguish FP_INITED (idr-owned reference still intact) from FP_CLOSED (an earlier ksmbd_close_fd() already consumed the idr-owned reference while leaving fp in the idr because a holder kept refcount non-zero). When the latter races with teardown the same path over-decrements into a holder reference and ksmbd_fd_put() later UAFs that holder. * FP_NEW window. Between __open_id() publishing fp into the session idr and ksmbd_update_fstate(..., FP_INITED) committing the transition at the end of smb2_open(), an fp is in FP_NEW and an intervening teardown that takes a transient reference and unpublishes the volatile id leaves the original idr-owned reference orphaned -- the opener is unaware that fp has been unpublished, returns success to the client, and the fp leaks at refcount = 1. Refactor __close_file_table_ids() to take a transient reference on fp and unpublish fp from the session idr *under ft->lock* before calling skip() outside the lock. A transient ref protects lifetime but not concurrent field mutation, so the idr_remove() is what keeps __ksmbd_lookup_fd() through this session's idr from granting a new ksmbd_fp_get() reference to an fp whose fp->conn / fp->tcon / fp->volatile_id / op->conn / lock_list links are about to be rewritten by session_fd_check(). Durable reconnect is unaffected because it reaches fp through the global durable table (ksmbd_lookup_durable_fd -> global_ft). Decide n_to_drop together with any FP_INITED -> FP_CLOSED transition under ft->lock so teardown and ksmbd_close_fd() never both consume the idr-owned reference. See ksmbd_mark_fp_closed() for the per-state accounting. For the FP_NEW path to be safe, the opener has to learn that fp was unpublished: ksmbd_update_fstate() now returns -ENOENT when an FP_NEW -> FP_INITED transition finds f_state already advanced or the volatile id cleared (both committed by teardown under ft->lock); smb2_open() propagates that as STATUS_OBJECT_NAME_INVALID and drops the original reference via ksmbd_fd_put(). The list removal cannot be left for a deferred final putter because fp->volatile_id has already been cleared and __ksmbd_remove_fd() will intentionally skip both idr_remove() and list_del_init(). Move the m_fp_list unlink in __ksmbd_remove_fd() above the volatile-id check so that an FP_NEW fp that happened to be added to m_fp_list (smb2_open() adds fp->node before ksmbd_update_fstate() runs) is still cleaned up on the deferred putter path; list_del_init() on an empty node is a no-op and remains safe for fps that were never added. Add a defensive guard in session_fd_check() that refuses non-FP_INITED fps so that even if a teardown reaches an FP_NEW fp it falls into the close branch (where the n_to_drop = 1 accounting keeps the opener's reference alive) instead of the durable-preserve branch (which mutates fp->conn / fp->tcon). Validation on a debug kernel additionally built with CONFIG_DEBUG_LIST and CONFIG_DEBUG_OBJECTS_WORK used a same-session two-tcon workload (open/write storm on one tcon, 50 tree disconnects on the other) and reported no list-corruption, work_struct ODEBUG, sleep-in-atomic, lockdep or kmemleak reports. Reverting only the __close_file_table_ids() hunk while keeping a forced-is_reconnectable() harness produced the expected sleep-in-atomic at vfs_cache.c:1095, confirming the ft->lock-out-of-sleepable-skip discipline. KASAN-enabled direct SMB2 coverage with durable handles enabled exercised ksmbd_close_tree_conn_fds(), ksmbd_close_session_fds(), the FP_NEW failure path, tree_conn_fd_check(), and a non-zero session_fd_check() durable-preserve return. This produced no KASAN, DEBUG_LIST, ODEBUG, or WARNING reports. Fixes: f44158485826 ("cifsd: add file operations") Signed-off-by: DaeMyung Kang Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 6 +- fs/smb/server/vfs_cache.c | 179 ++++++++++++++++++++++++++++++++++++++++------ fs/smb/server/vfs_cache.h | 4 +- 3 files changed, 164 insertions(+), 25 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 47b7af631f7b..62d4399a993d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3767,8 +3767,10 @@ err_out1: err_out2: if (!rc) { - ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); - rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); + rc = ksmbd_update_fstate(&work->sess->file_table, fp, + FP_INITED); + if (!rc) + rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); } if (rc) { if (rc == -EINVAL) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 4a18107937cc..dc4037ef1834 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -431,13 +431,13 @@ static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) { - if (!has_file_id(fp->volatile_id)) - return; - down_write(&fp->f_ci->m_lock); list_del_init(&fp->node); up_write(&fp->f_ci->m_lock); + if (!has_file_id(fp->volatile_id)) + return; + write_lock(&ft->lock); idr_remove(ft->idr, fp->volatile_id); write_unlock(&ft->lock); @@ -798,15 +798,58 @@ err_out: return ERR_PTR(ret); } -void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, - unsigned int state) +/** + * ksmbd_update_fstate() - update an fp state under the file-table lock + * @ft: file table that publishes @fp's volatile id + * @fp: file pointer to update + * @state: new state + * + * Return: 0 on success. The FP_NEW -> FP_INITED transition is special: + * -ENOENT if teardown already unpublished @fp by advancing the state or + * clearing the volatile id. Other state updates preserve the historical + * fire-and-forget behavior. + */ +int ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state) { + int ret; + if (!fp) - return; + return -ENOENT; write_lock(&ft->lock); - fp->f_state = state; + if (state == FP_INITED && + (fp->f_state != FP_NEW || !has_file_id(fp->volatile_id))) { + ret = -ENOENT; + } else { + fp->f_state = state; + ret = 0; + } write_unlock(&ft->lock); + + return ret; +} + +/* + * ksmbd_mark_fp_closed() - mark fp closed under ft->lock and return how many + * refs the teardown path owns. + * + * FP_INITED has a normal idr-owned reference, so teardown owns both that + * reference and the transient lookup reference. FP_NEW is still owned by the + * in-flight opener/reopener, which will drop the original reference after + * ksmbd_update_fstate(..., FP_INITED) observes the cleared volatile id. + * FP_CLOSED on entry means an earlier ksmbd_close_fd() already consumed the + * idr-owned ref. + */ +static int ksmbd_mark_fp_closed(struct ksmbd_file *fp) +{ + if (fp->f_state == FP_INITED) { + set_close_state_blocked_works(fp); + fp->f_state = FP_CLOSED; + return 2; + } + + return 1; } static int @@ -814,7 +857,8 @@ __close_file_table_ids(struct ksmbd_session *sess, struct ksmbd_tree_connect *tcon, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp, - struct ksmbd_user *user)) + struct ksmbd_user *user), + bool skip_preserves_fp) { struct ksmbd_file_table *ft = &sess->file_table; struct ksmbd_file *fp; @@ -822,32 +866,120 @@ __close_file_table_ids(struct ksmbd_session *sess, int num = 0; while (1) { + int n_to_drop; + write_lock(&ft->lock); fp = idr_get_next(ft->idr, &id); if (!fp) { write_unlock(&ft->lock); break; } - - if (skip(tcon, fp, sess->user) || - !atomic_dec_and_test(&fp->refcount)) { + if (!atomic_inc_not_zero(&fp->refcount)) { id++; write_unlock(&ft->lock); continue; } - set_close_state_blocked_works(fp); - idr_remove(ft->idr, fp->volatile_id); - fp->volatile_id = KSMBD_NO_FID; - write_unlock(&ft->lock); + if (skip_preserves_fp) { + /* + * Session teardown: skip() is session_fd_check(), + * which may sleep and mutates fp->conn / fp->tcon / + * fp->volatile_id when it chooses to preserve fp + * for durable reconnect. Unpublish fp from the + * session idr here, under ft->lock, so that + * __ksmbd_lookup_fd() through this session cannot + * grant a new ksmbd_fp_get() reference to an fp + * whose fields are about to be rewritten outside + * the lock. Durable reconnect still reaches fp via + * global_ft. + */ + idr_remove(ft->idr, id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + if (skip(tcon, fp, sess->user)) { + /* + * session_fd_check() has converted fp to + * durable-preserve state and cleared its + * per-conn fields. fp is already unpublished + * above; the original idr-owned ref keeps it + * alive for the durable scavenger. Drop only + * the transient ref. atomic_dec() is safe -- + * atomic_inc_not_zero() succeeded on a + * positive value and we added one more, so + * refcount cannot be zero here. + */ + atomic_dec(&fp->refcount); + id++; + continue; + } + + /* + * Keep the close-state decision under the same lock + * observed by ksmbd_update_fstate(), which is how an + * in-flight FP_NEW opener learns that teardown has + * cleared its volatile id. + */ + write_lock(&ft->lock); + n_to_drop = ksmbd_mark_fp_closed(fp); + write_unlock(&ft->lock); + } else { + /* + * Tree teardown: skip() is tree_conn_fd_check(), a + * cheap pointer compare that doesn't sleep and has + * no side effects, so keep the skip decision plus + * the unpublish-and-mark-closed sequence atomic + * under ft->lock. fps belonging to other tree + * connects (skip() == true) stay fully published in + * the session idr with no lock window. + */ + if (skip(tcon, fp, sess->user)) { + atomic_dec(&fp->refcount); + write_unlock(&ft->lock); + id++; + continue; + } + idr_remove(ft->idr, id); + fp->volatile_id = KSMBD_NO_FID; + n_to_drop = ksmbd_mark_fp_closed(fp); + write_unlock(&ft->lock); + } + /* + * fp->volatile_id is already cleared to prevent stale idr + * removal from a deferred final close. Remove fp from + * m_fp_list here because __ksmbd_remove_fd() will skip the + * list unlink when volatile_id is KSMBD_NO_FID. + */ down_write(&fp->f_ci->m_lock); list_del_init(&fp->node); up_write(&fp->f_ci->m_lock); - __ksmbd_close_fd(ft, fp); - - num++; + /* + * Drop the references this iteration owns: + * + * n_to_drop == 2: we observed FP_INITED and committed + * the FP_CLOSED transition ourselves, so we own the + * transient (+1) and the still-intact idr-owned ref. + * + * n_to_drop == 1: either a prior ksmbd_close_fd() + * already consumed the idr-owned ref, or fp was still + * FP_NEW and the in-flight opener/reopener must keep + * the original reference until ksmbd_update_fstate() + * observes the cleared volatile id. + * + * If we end up as the final putter, finalize fp and + * account the open_files_count decrement via the caller's + * atomic_sub(num, ...). Otherwise the remaining user's + * ksmbd_fd_put() reaches __put_fd_final(), which does its + * own atomic_dec(&open_files_count), so we must not count + * this fp here -- doing so would double-decrement the + * connection-wide counter. + */ + if (atomic_sub_and_test(n_to_drop, &fp->refcount)) { + __ksmbd_close_fd(NULL, fp); + num++; + } id++; } @@ -1082,6 +1214,9 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, if (!is_reconnectable(fp)) return false; + if (fp->f_state != FP_INITED) + return false; + if (WARN_ON_ONCE(!fp->conn)) return false; @@ -1127,7 +1262,8 @@ void ksmbd_close_tree_conn_fds(struct ksmbd_work *work) { int num = __close_file_table_ids(work->sess, work->tcon, - tree_conn_fd_check); + tree_conn_fd_check, + false); atomic_sub(num, &work->conn->stats.open_files_count); } @@ -1136,7 +1272,8 @@ void ksmbd_close_session_fds(struct ksmbd_work *work) { int num = __close_file_table_ids(work->sess, work->tcon, - session_fd_check); + session_fd_check, + true); atomic_sub(num, &work->conn->stats.open_files_count); } @@ -1268,7 +1405,7 @@ void ksmbd_destroy_file_table(struct ksmbd_session *sess) if (!ft->idr) return; - __close_file_table_ids(sess, NULL, session_fd_check); + __close_file_table_ids(sess, NULL, session_fd_check, true); idr_destroy(ft->idr); kfree(ft->idr); ft->idr = NULL; diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 866f32c10d4d..e6871266a94b 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -172,8 +172,8 @@ int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode); int ksmbd_init_global_file_table(void); void ksmbd_free_global_file_table(void); void ksmbd_set_fd_limit(unsigned long limit); -void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, - unsigned int state); +int ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state); bool ksmbd_vfs_compare_durable_owner(struct ksmbd_file *fp, struct ksmbd_user *user); -- cgit v1.2.3 From bf736184d063da1a552ffeff0481813599a182cc Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Tue, 28 Apr 2026 23:08:56 +0900 Subject: ksmbd: close durable scavenger races against m_fp_list lookups ksmbd_durable_scavenger() has two related races against any walker that iterates f_ci->m_fp_list, including ksmbd_lookup_fd_inode() (used by ksmbd_vfs_rename) and the share-mode checks in fs/smb/server/smb_common.c. (1) fp->node list-head reuse. Durable-preserved handles can remain linked on f_ci->m_fp_list after session teardown so share-mode checks still see them while the handle is reconnectable. The scavenger collected expired handles by adding fp->node to a local scavenger_list after removing them from the global durable idr. Because fp->node is the same list_head used by m_fp_list, list_add(&fp->node, &scavenger_list) overwrites the m_fp_list links and corrupts both lists. CONFIG_DEBUG_LIST can report this on the share-mode walk path. (2) Refcount race against m_fp_list walkers. The scavenger qualifies an expired durable handle with atomic_read(&fp->refcount) > 1 and fp->conn under global_ft.lock, removes fp from global_ft, then drops global_ft.lock before unlinking fp from m_fp_list and freeing it. During that gap fp is still linked on m_fp_list with f_state == FP_INITED. ksmbd_lookup_fd_inode() under m_lock read calls ksmbd_fp_get() (atomic_inc_not_zero on refcount that is still 1) and takes a live reference; the scavenger then unlinks and frees fp while the holder owns a reference, leading to UAF on the holder's subsequent ksmbd_fd_put() and on any field reads performed by a concurrent share-mode walker that iterates m_fp_list without taking ksmbd_fp_get() (smb_check_perm_dleases-like paths). Fix both: * Stop reusing fp->node as a scavenger-private list node. Remove one expired handle from global_ft under global_ft.lock, take an explicit transient reference, drop the lock, unlink fp->node from m_fp_list under f_ci->m_lock, then drop both the durable lifetime and transient references with atomic_sub_and_test(2, &fp->refcount). If the scavenger is the last putter the close runs there; otherwise an in-flight holder that already raced through the m_fp_list lookup owns the final close via its ksmbd_fd_put() path. The one-at-a-time disposal can rescan the durable idr when multiple handles expire in the same pass, but durable scavenging is a background expiration path and the final full scan recomputes min_timeout before the next wait. * Clear fp->persistent_id inside __ksmbd_remove_durable_fd() right after idr_remove(), so a delayed final close from a holder that snatched fp does not re-issue idr_remove() on a persistent id that idr_alloc_cyclic() in ksmbd_open_durable_fd() may have already handed out to a brand-new durable handle. * Bypass the per-conn open_files_count decrement in __put_fd_final() when fp is detached from any session table (fp->conn cleared by session_fd_check() at durable preserve -- paired with the volatile_id clear at unpublish, so checking fp->conn alone is sufficient). The walker that owns the final close runs from an unrelated work->conn whose stats.open_files_count never tracked this durable fp; without this guard the holder would underflow that unrelated counter. The two races are folded into one patch because patch (1) alone cleans up the corrupted list but leaves a deterministic UAF window for m_fp_list walkers that the transient-reference and persistent_id discipline in (2) close; bisecting onto an intermediate state would land on a UAF that pre-patch chaos merely made less reproducible. Validation: * CONFIG_DEBUG_LIST coverage for the list_head reuse path. * KASAN-enabled direct SMB2 durable-handle coverage that exercised ksmbd_durable_scavenger() and non-NULL ksmbd_lookup_fd_inode() returns while durable handles expired under concurrent rename lookups, with no KASAN, UAF, list-corruption, ODEBUG, or WARNING reports. * checkpatch --strict * make -j$(nproc) M=fs/smb/server Fixes: d484d621d40f ("ksmbd: add durable scavenger timer") Signed-off-by: DaeMyung Kang Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 102 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 26 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index dc4037ef1834..354c4d8a1cfb 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -418,6 +418,14 @@ static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp) return; idr_remove(global_ft.idr, fp->persistent_id); + /* + * Clear persistent_id so a later __ksmbd_close_fd() that runs from a + * delayed putter (e.g. when a concurrent ksmbd_lookup_fd_inode() + * walker held the final reference) does not re-issue idr_remove() on + * an id that idr_alloc_cyclic() may have already handed out to a new + * durable handle. + */ + fp->persistent_id = KSMBD_NO_FID; } static void ksmbd_remove_durable_fd(struct ksmbd_file *fp) @@ -521,6 +529,20 @@ static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft, static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp) { + /* + * Detached durable fp -- session_fd_check() cleared fp->conn at + * preserve, so this fp is no longer tracked by any conn's + * stats.open_files_count. This happens when + * ksmbd_scavenger_dispose_dh() hands the final close off to an + * m_fp_list walker (e.g. ksmbd_lookup_fd_inode()) whose work->conn + * is unrelated to the conn that originally opened the handle; close + * via the NULL-ft path so we do not underflow that unrelated + * counter. + */ + if (!fp->conn) { + __ksmbd_close_fd(NULL, fp); + return; + } __ksmbd_close_fd(&work->sess->file_table, fp); atomic_dec(&work->conn->stats.open_files_count); } @@ -1033,24 +1055,37 @@ static bool ksmbd_durable_scavenger_alive(void) return true; } -static void ksmbd_scavenger_dispose_dh(struct list_head *head) +static void ksmbd_scavenger_dispose_dh(struct ksmbd_file *fp) { - while (!list_empty(head)) { - struct ksmbd_file *fp; + /* + * Durable-preserved fp can remain linked on f_ci->m_fp_list for + * share-mode checks. Unlink it before final close; fp->node is not + * available as a scavenger-private list node because re-adding it to + * another list corrupts m_fp_list. + */ + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - fp = list_first_entry(head, struct ksmbd_file, node); - list_del_init(&fp->node); + /* + * Drop both the durable lifetime reference and the transient reference + * taken by the scavenger under global_ft.lock. If a concurrent + * ksmbd_lookup_fd_inode() (or any other m_fp_list walker) snatched fp + * before the unlink above, that holder owns the final close via + * ksmbd_fd_put() -> __ksmbd_close_fd(). Otherwise the scavenger is + * the last putter and finalises fp here. + */ + if (atomic_sub_and_test(2, &fp->refcount)) __ksmbd_close_fd(NULL, fp); - } } static int ksmbd_durable_scavenger(void *dummy) { struct ksmbd_file *fp = NULL; + struct ksmbd_file *expired_fp; unsigned int id; unsigned int min_timeout = 1; bool found_fp_timeout; - LIST_HEAD(scavenger_list); unsigned long remaining_jiffies; __module_get(THIS_MODULE); @@ -1060,8 +1095,6 @@ static int ksmbd_durable_scavenger(void *dummy) if (try_to_freeze()) continue; - found_fp_timeout = false; - remaining_jiffies = wait_event_timeout(dh_wq, ksmbd_durable_scavenger_alive() == false, __msecs_to_jiffies(min_timeout)); @@ -1070,23 +1103,39 @@ static int ksmbd_durable_scavenger(void *dummy) else min_timeout = DURABLE_HANDLE_MAX_TIMEOUT; - write_lock(&global_ft.lock); - idr_for_each_entry(global_ft.idr, fp, id) { - if (!fp->durable_timeout) - continue; + do { + expired_fp = NULL; + found_fp_timeout = false; - if (atomic_read(&fp->refcount) > 1 || - fp->conn) - continue; - - found_fp_timeout = true; - if (fp->durable_scavenger_timeout <= - jiffies_to_msecs(jiffies)) { - __ksmbd_remove_durable_fd(fp); - list_add(&fp->node, &scavenger_list); - } else { + write_lock(&global_ft.lock); + idr_for_each_entry(global_ft.idr, fp, id) { unsigned long durable_timeout; + if (!fp->durable_timeout) + continue; + + if (atomic_read(&fp->refcount) > 1 || + fp->conn) + continue; + + found_fp_timeout = true; + if (fp->durable_scavenger_timeout <= + jiffies_to_msecs(jiffies)) { + __ksmbd_remove_durable_fd(fp); + /* + * Take a transient reference so fp + * cannot be freed by an in-flight + * ksmbd_lookup_fd_inode() that found + * it through f_ci->m_fp_list while we + * drop global_ft.lock and reach the + * m_fp_list unlink in + * ksmbd_scavenger_dispose_dh(). + */ + atomic_inc(&fp->refcount); + expired_fp = fp; + break; + } + durable_timeout = fp->durable_scavenger_timeout - jiffies_to_msecs(jiffies); @@ -1094,10 +1143,11 @@ static int ksmbd_durable_scavenger(void *dummy) if (min_timeout > durable_timeout) min_timeout = durable_timeout; } - } - write_unlock(&global_ft.lock); + write_unlock(&global_ft.lock); - ksmbd_scavenger_dispose_dh(&scavenger_list); + if (expired_fp) + ksmbd_scavenger_dispose_dh(expired_fp); + } while (expired_fp); if (found_fp_timeout == false) break; -- cgit v1.2.3 From a74668eb2c0b866d7ac4823be6006ab2e227bc03 Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Wed, 29 Apr 2026 16:59:56 +0800 Subject: ksmbd: fail share config requests when path allocation fails Non-pipe shares must have a duplicated backing path before they can be published. share_config_request() currently calls kstrndup() for that path, but if the allocation fails it leaves ret unchanged. If veto list parsing succeeds and share->name exists, the partially built share is still inserted into the global share table with share->path left NULL. A later share-root SMB2 create uses tree_conn->share_conf->path as the lookup root. If the share was published with path == NULL, that request passes a NULL pathname into do_getname_kernel()/strlen() and can crash the ksmbd worker. Set ret = -ENOMEM when path duplication fails so the incomplete share is destroyed before publication. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Shuhao Fu Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/share_config.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index 53f44ff4d376..6f97f8d39657 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -167,7 +167,10 @@ static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, share->path = kstrndup(ksmbd_share_config_path(resp), path_len, KSMBD_DEFAULT_GFP); - if (share->path) { + if (!share->path) { + ret = -ENOMEM; + } else { + ret = 0; share->path_sz = strlen(share->path); while (share->path_sz > 1 && share->path[share->path_sz - 1] == '/') @@ -179,9 +182,10 @@ static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, share->force_directory_mode = resp->force_directory_mode; share->force_uid = resp->force_uid; share->force_gid = resp->force_gid; - ret = parse_veto_list(share, - KSMBD_SHARE_CONFIG_VETO_LIST(resp), - resp->veto_list_sz); + if (!ret) + ret = parse_veto_list(share, + KSMBD_SHARE_CONFIG_VETO_LIST(resp), + resp->veto_list_sz); if (!ret && share->path) { if (__ksmbd_override_fsids(work, share)) { kill_share(share); -- cgit v1.2.3 From 6fd7dd4e44d7840cb1ba0c3a895e9f576af3fe5c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 1 May 2026 08:34:55 +0900 Subject: ksmbd: fix kernel-doc warnings from ksmbd_conn_get/put() The kernel test robot reported W=1 build warnings for ksmbd_conn_get() and ksmbd_conn_put() due to missing parameter descriptions. Add the @conn description to fix these warnings. Reported-by: kernel test robot Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 95654a808162..8347495dbc62 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -122,6 +122,8 @@ static void __ksmbd_conn_release_work(struct work_struct *work) /** * ksmbd_conn_get() - take a reference on @conn and return it. * + * @conn: connection instance to get a reference to + * * Returns @conn unchanged so callers can write * "fp->conn = ksmbd_conn_get(work->conn);" in one expression. Returns NULL * if @conn is NULL. @@ -139,6 +141,8 @@ struct ksmbd_conn *ksmbd_conn_get(struct ksmbd_conn *conn) * ksmbd_conn_put() - drop a reference and, if it was the last, queue the * release onto ksmbd_conn_wq so it runs from process context. * + * @conn: connection instance to put a reference to + * * Callable from any context including RCU softirq callbacks and non-sleeping * locks; the actual release is deferred to the workqueue. ksmbd_conn_wq is * created in ksmbd_server_init() before any conn can be allocated and is -- cgit v1.2.3 From 996454bc0da84d5a1dedb1a7861823087e01a7ae Mon Sep 17 00:00:00 2001 From: Shota Zaizen Date: Tue, 28 Apr 2026 19:02:55 +0900 Subject: ksmbd: validate inherited ACE SID length smb_inherit_dacl() walks the parent directory DACL loaded from the security descriptor xattr. It verifies that each ACE contains the fixed SID header before using it, but does not verify that the variable-length SID described by sid.num_subauth is fully contained in the ACE. A malformed inheritable ACE can advertise more subauthorities than are present in the ACE. compare_sids() may then read past the ACE. smb_set_ace() also clamps the copied destination SID, but used the unchecked source SID count to compute the inherited ACE size. That could advance the temporary inherited ACE buffer pointer and nt_size accounting past the allocated buffer. Fix this by validating the parent ACE SID count and SID length before using the SID during inheritance. Compute the inherited ACE size from the copied SID so the size matches the bounded destination SID. Reject the inherited DACL if size accumulation would overflow smb_acl.size or the security descriptor allocation size. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Shota Zaizen Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 66 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 4bbc2c27e680..c1d1f34581d6 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1068,7 +1068,26 @@ static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type, ace->flags = flags; ace->access_req = access_req; smb_copy_sid(&ace->sid, sid); - ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + (sid->num_subauth * 4)); + ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + + (ace->sid.num_subauth * 4)); +} + +static int smb_append_inherited_ace(struct smb_ace **ace, int *nt_size, + u16 *ace_cnt, const struct smb_sid *sid, + u8 type, u8 flags, __le32 access_req) +{ + int ace_size; + + smb_set_ace(*ace, sid, type, flags, access_req); + ace_size = le16_to_cpu((*ace)->size); + /* pdacl->size is __le16 and includes struct smb_acl. */ + if (check_add_overflow(*nt_size, ace_size, nt_size) || + *nt_size > U16_MAX - (int)sizeof(struct smb_acl)) + return -EINVAL; + + (*ace_cnt)++; + *ace = (struct smb_ace *)((char *)*ace + ace_size); + return 0; } int smb_inherit_dacl(struct ksmbd_conn *conn, @@ -1157,6 +1176,12 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, CIFS_SID_BASE_SIZE) break; + if (parent_aces->sid.num_subauth > SID_MAX_SUB_AUTHORITIES || + pace_size < offsetof(struct smb_ace, sid) + + CIFS_SID_BASE_SIZE + + sizeof(__le32) * parent_aces->sid.num_subauth) + break; + aces_size -= pace_size; flags = parent_aces->flags; @@ -1186,22 +1211,24 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, } if (is_dir && creator && flags & CONTAINER_INHERIT_ACE) { - smb_set_ace(aces, psid, parent_aces->type, inherited_flags, - parent_aces->access_req); - nt_size += le16_to_cpu(aces->size); - ace_cnt++; - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); + rc = smb_append_inherited_ace(&aces, &nt_size, &ace_cnt, + psid, parent_aces->type, + inherited_flags, + parent_aces->access_req); + if (rc) + goto free_aces_base; flags |= INHERIT_ONLY_ACE; psid = creator; } else if (is_dir && !(parent_aces->flags & NO_PROPAGATE_INHERIT_ACE)) { psid = &parent_aces->sid; } - smb_set_ace(aces, psid, parent_aces->type, flags | inherited_flags, - parent_aces->access_req); - nt_size += le16_to_cpu(aces->size); - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); - ace_cnt++; + rc = smb_append_inherited_ace(&aces, &nt_size, &ace_cnt, psid, + parent_aces->type, + flags | inherited_flags, + parent_aces->access_req); + if (rc) + goto free_aces_base; pass: parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size); } @@ -1211,7 +1238,7 @@ pass: struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; - int pntsd_alloc_size; + size_t pntsd_alloc_size; if (parent_pntsd->osidoffset) { powner_sid = (struct smb_sid *)((char *)parent_pntsd + @@ -1224,8 +1251,19 @@ pass: pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); } - pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size + - pgroup_sid_size + sizeof(struct smb_acl) + nt_size; + if (check_add_overflow(sizeof(struct smb_ntsd), + (size_t)powner_sid_size, + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, + (size_t)pgroup_sid_size, + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, sizeof(struct smb_acl), + &pntsd_alloc_size) || + check_add_overflow(pntsd_alloc_size, (size_t)nt_size, + &pntsd_alloc_size)) { + rc = -EINVAL; + goto free_aces_base; + } pntsd = kzalloc(pntsd_alloc_size, KSMBD_DEFAULT_GFP); if (!pntsd) { -- cgit v1.2.3 From 6ebcbb53fc9bc30843054ed99fd60b8e542628f4 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 1 May 2026 06:23:20 +0000 Subject: riscv: Fix register corruption from uninitialized cregs on error compat_riscv_gpr_set() calls cregs_to_regs() unconditionally, even when user_regset_copyin() fails. Since cregs is an uninitialized stack variable, a copyin failure causes uninitialized stack data to be written into the target task's pt_regs, corrupting its register state and potentially leaking kernel stack contents. compat_restore_sigcontext() has the same issue: it calls cregs_to_regs() even when __copy_from_user() fails, leading to the same corruption of the signal-returning task's register state on error. Only call cregs_to_regs() when the user copy succeeds. Fixes: 4608c159594f ("riscv: compat: ptrace: Add compat_arch_ptrace implement") Fixes: 7383ee05314b ("riscv: compat: signal: Add rt_frame implementation") Signed-off-by: Michael Neuling Assisted-by: Cursor:claude-4.6-opus-high-thinking Link: https://patch.msgid.link/20260501062320.2339562-1-mikey@neuling.org Signed-off-by: Paul Walmsley --- arch/riscv/kernel/compat_signal.c | 2 ++ arch/riscv/kernel/ptrace.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/compat_signal.c b/arch/riscv/kernel/compat_signal.c index 6ec4e34255a9..cf3eb33a11e4 100644 --- a/arch/riscv/kernel/compat_signal.c +++ b/arch/riscv/kernel/compat_signal.c @@ -107,6 +107,8 @@ static long compat_restore_sigcontext(struct pt_regs *regs, /* sc_regs is structured the same as the start of pt_regs */ err = __copy_from_user(&cregs, &sc->sc_regs, sizeof(sc->sc_regs)); + if (unlikely(err)) + return err; cregs_to_regs(&cregs, regs); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 93de2e7a3074..793bcee46182 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -577,8 +577,8 @@ static int compat_riscv_gpr_set(struct task_struct *target, struct compat_user_regs_struct cregs; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &cregs, 0, -1); - - cregs_to_regs(&cregs, task_pt_regs(target)); + if (!ret) + cregs_to_regs(&cregs, task_pt_regs(target)); return ret; } -- cgit v1.2.3 From db909bd7986c10da074917af3dae83a60fa65093 Mon Sep 17 00:00:00 2001 From: "Guo Ren (Alibaba DAMO Academy)" Date: Sun, 25 Jan 2026 00:52:12 -0500 Subject: riscv: mm: Fixup no5lvl failure when vaddr is invalid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike no4lvl, no5lvl still continues to detect satp, which requires va=pa mapping. When pa=0x800000000000, no5lvl would fail in Sv48 mode due to an illegal VA value of 0x800000000000. So, prevent detecting the satp flow for no5lvl, when vaddr is invalid. Add the is_vaddr_valid() function for checking. Fixes: 26e7aacb83df ("riscv: Allow to downgrade paging mode from the command line") Cc: Alexandre Ghiti Cc: Björn Töpel Signed-off-by: Guo Ren (Alibaba DAMO Academy) Tested-by: Fangyu Yu Link: https://patch.msgid.link/20260125055212.433163-1-guoren@kernel.org [pjw@kernel.org: cleaned up commit message] Signed-off-by: Paul Walmsley --- arch/riscv/mm/init.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index decd7df40fa4..fa8d2f6f554b 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -792,6 +792,27 @@ static void __init set_mmap_rnd_bits_max(void) mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; } +static bool __init is_vaddr_valid(unsigned long va) +{ + unsigned long up = 0; + + switch (satp_mode) { + case SATP_MODE_39: + up = 1UL << 38; + break; + case SATP_MODE_48: + up = 1UL << 47; + break; + case SATP_MODE_57: + up = 1UL << 56; + break; + default: + return false; + } + + return (va < up) || (va >= (ULONG_MAX - up + 1)); +} + /* * There is a simple way to determine if 4-level is supported by the * underlying hardware: establish 1:1 mapping in 4-level page table mode @@ -833,6 +854,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa) set_satp_mode_pmd + PMD_SIZE, PMD_SIZE, PAGE_KERNEL_EXEC); retry: + if (!is_vaddr_valid(set_satp_mode_pmd)) + goto out; + create_pgd_mapping(early_pg_dir, set_satp_mode_pmd, pgtable_l5_enabled ? @@ -855,6 +879,7 @@ retry: disable_pgtable_l4(); } +out: memset(early_pg_dir, 0, PAGE_SIZE); memset(early_p4d, 0, PAGE_SIZE); memset(early_pud, 0, PAGE_SIZE); -- cgit v1.2.3 From 0c7ae130698e70107430254e79fbe996b4d37ab5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Sat, 2 May 2026 12:12:40 +0200 Subject: tools/headers: Regenerate stddef.h to fix BPF selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit dacbfc167808 ("crypto: af_alg - Annotate struct af_alg_iv with __counted_by"), two selftests, test_tag and crypto_sanity, now indirectly rely on the __counted_by macro. On systems with commit dacbfc167808 in the installed UAPI headers, the selftests build fails with: In file included from tools/testing/selftests/bpf/prog_tests/crypto_sanity.c:7: /usr/include/linux/if_alg.h:45:22: error: expected ‘:’, ‘,’, ‘;’, ‘}’ or ‘__attribute__’ before ‘__counted_by’ 45 | __u8 iv[] __counted_by(ivlen); | ^~~~~~~~~~~~ This patch fixes it by regenerating stddef.h in tools/include using the instructions from commit a778f5d46b62 ("tools/headers: Pull in stddef.h to uapi to fix BPF selftests build in CI"). Fixes: dacbfc167808 ("crypto: af_alg - Annotate struct af_alg_iv with __counted_by") Signed-off-by: Paul Chaignon Reviewed-by: Alan Maguire Tested-by: Ihor Solodrai Link: https://lore.kernel.org/r/8da8ef16055aa452d940668ed5359ce54adc6b0b.1777715500.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/stddef.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index c53cde425406..457498259494 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -3,7 +3,6 @@ #define _LINUX_STDDEF_H - #ifndef __always_inline #define __always_inline __inline__ #endif @@ -36,6 +35,11 @@ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS +#ifdef __cplusplus +/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ +#define __DECLARE_FLEX_ARRAY(T, member) \ + T member[0] +#else /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union * @@ -52,3 +56,23 @@ TYPE NAME[]; \ } #endif + +#ifndef __counted_by +#define __counted_by(m) +#endif + +#ifndef __counted_by_le +#define __counted_by_le(m) +#endif + +#ifndef __counted_by_be +#define __counted_by_be(m) +#endif + +#ifndef __counted_by_ptr +#define __counted_by_ptr(m) +#endif + +#define __kernel_nonstring + +#endif /* _LINUX_STDDEF_H */ -- cgit v1.2.3 From ddca6da148b8ced3e6d3d7fb3b2e5b4ed6359dc2 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 27 Apr 2026 11:23:35 +0100 Subject: MAINTAINERS: Add self for the DEC LANCE network driver Like with the rest of DECstation and TURBOchannel hardware I have been handling the DEC LANCE network driver for some 25 years now anyway. Signed-off-by: Maciej W. Rozycki Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/alpine.DEB.2.21.2604271113520.28583@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 27a073f53cea..ec8661b446fb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7077,6 +7077,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/debugobjec F: include/linux/debugobjects.h F: lib/debugobjects.c +DEC LANCE NETWORK DRIVER +M: "Maciej W. Rozycki" +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/amd/declance.c + DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" L: linux-mips@vger.kernel.org -- cgit v1.2.3 From 1a57efe250a13906396c2a4792f0090f142f9844 Mon Sep 17 00:00:00 2001 From: Holger Brunck Date: Wed, 29 Apr 2026 13:42:07 +0200 Subject: net: wan: fsl_ucc_hdlc: fix uhdlc_memclean Unmapping of uf_regs is done from ucc_fast_free and doesn't need to be done explicitly. If already unmapped ucc_fast_free will crash. Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: Holger Brunck Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/wan/fsl_ucc_hdlc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 3bd57527b1be..8155e92af14e 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -773,11 +773,6 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv) kfree(priv->tx_skbuff); priv->tx_skbuff = NULL; - if (priv->uf_regs) { - iounmap(priv->uf_regs); - priv->uf_regs = NULL; - } - if (priv->uccf) { ucc_fast_free(priv->uccf); priv->uccf = NULL; -- cgit v1.2.3 From 851bba8068d15f5a386da544096f7ed6bc16e551 Mon Sep 17 00:00:00 2001 From: Holger Brunck Date: Wed, 29 Apr 2026 13:42:08 +0200 Subject: net: wan: fsl_ucc_hdlc: fix ucc_hdlc_remove If the driver is used in a non tdm mode priv->utdm is a NULL pointer. Therefore we need to check this pointer first before checking si_regs. Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: Holger Brunck Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/wan/fsl_ucc_hdlc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 8155e92af14e..15bfb78381d4 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1250,12 +1250,12 @@ static void ucc_hdlc_remove(struct platform_device *pdev) uhdlc_memclean(priv); - if (priv->utdm->si_regs) { + if (priv->utdm && priv->utdm->si_regs) { iounmap(priv->utdm->si_regs); priv->utdm->si_regs = NULL; } - if (priv->utdm->siram) { + if (priv->utdm && priv->utdm->siram) { iounmap(priv->utdm->siram); priv->utdm->siram = NULL; } -- cgit v1.2.3 From 383d0fb8946921b4914ea0f360342e221d419d40 Mon Sep 17 00:00:00 2001 From: Gregory Fuchedgi Date: Wed, 29 Apr 2026 14:54:14 -0700 Subject: amd-xgbe: fix PTP addend overflow causing frozen clock XGBE_PTP_ACT_CLK_FREQ and XGBE_V2_PTP_ACT_CLK_FREQ were 10x too large (500MHz/1GHz instead of 50MHz/100MHz), causing the computed addend to overflow the 32-bit tstamp_addend. In the general case this would result in the clock advancing at the wrong rate. For v2 (PCI), ptpclk_rate is hardcoded to 125MHz, so the addend formula (ACT_CLK_FREQ << 32) / ptpclk_rate yields exactly 8 * 2^32, and when stored to the 32-bit tstamp_addend the value is zero. With addend = 0 the hardware accumulator never overflows and the PTP clock is fully stopped. For v1 (platform), ptpclk_rate is read from ACPI/DT so the exact overflow behavior depends on the firmware-reported frequency. Define the constants as NSEC_PER_SEC / SSINC so the relationship is explicit and cannot drift out of sync. Fixes: fbd47be098b5 ("amd-xgbe: add hardware PTP timestamping support") Tested-by: Gregory Fuchedgi Signed-off-by: Gregory Fuchedgi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260429-fix-xgbe-ptp-addend-v1-1-fca5b0ca5e62@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 60b7e53206d1..3d3b09010d48 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -135,11 +135,11 @@ */ #define XGBE_TSTAMP_SSINC 20 #define XGBE_TSTAMP_SNSINC 0 -#define XGBE_PTP_ACT_CLK_FREQ 500000000 +#define XGBE_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_TSTAMP_SSINC) #define XGBE_V2_TSTAMP_SSINC 0xA #define XGBE_V2_TSTAMP_SNSINC 0 -#define XGBE_V2_PTP_ACT_CLK_FREQ 1000000000 +#define XGBE_V2_PTP_ACT_CLK_FREQ (NSEC_PER_SEC / XGBE_V2_TSTAMP_SSINC) /* Define maximum supported values */ #define XGBE_MAX_PPS_OUT 4 -- cgit v1.2.3 From 458d5615272d3de535748342eb68ca492343048c Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 30 Apr 2026 11:29:55 -0400 Subject: net/sched: sch_red: Replace direct dequeue call with peek and qdisc_dequeue_peeked When red qdisc has children (eg qfq qdisc) whose peek() callback is qdisc_peek_dequeued(), we could get a kernel panic. When the parent of such qdiscs (eg illustrated in patch #3 as tbf) wants to retrieve an skb from its child (red in this case), it will do the following: 1a. do a peek() - and when sensing there's an skb the child can offer, then - the child in this case(red) calls its child's (qfq) peek. qfq does the right thing and will return the gso_skb queue packet. Note: if there wasnt a gso_skb entry then qfq will store it there. 1b. invoke a dequeue() on the child (red). And herein lies the problem. - red will call the child's dequeue() which will essentially just try to grab something of qfq's queue. [ 78.667668][ T363] KASAN: null-ptr-deref in range [0x0000000000000048-0x000000000000004f] [ 78.667927][ T363] CPU: 1 UID: 0 PID: 363 Comm: ping Not tainted 7.1.0-rc1-00033-g46f74a3f7d57-dirty #790 PREEMPT(full) [ 78.668263][ T363] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 78.668486][ T363] RIP: 0010:qfq_dequeue+0x446/0xc90 [sch_qfq] [ 78.668718][ T363] Code: 54 c0 e8 dd 90 00 f1 48 c7 c7 e0 03 54 c0 48 89 de e8 ce 90 00 f1 48 8d 7b 48 b8 ff ff 37 00 48 89 fa 48 c1 e0 2a 48 c1 ea 03 <80> 3c 02 00 74 05 e8 ef a1 e1 f1 48 8b 7b 48 48 8d 54 24 58 48 8d [ 78.669312][ T363] RSP: 0018:ffff88810de573e0 EFLAGS: 00010216 [ 78.669533][ T363] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 78.669790][ T363] RDX: 0000000000000009 RSI: 0000000000000004 RDI: 0000000000000048 [ 78.670044][ T363] RBP: ffff888110dc4000 R08: ffffffffb1b0885a R09: fffffbfff6ba9078 [ 78.670297][ T363] R10: 0000000000000003 R11: ffff888110e31c80 R12: 0000001880000000 [ 78.670560][ T363] R13: ffff888110dc4150 R14: ffff888110dc42b8 R15: 0000000000000200 [ 78.670814][ T363] FS: 00007f66a8f09c40(0000) GS:ffff888163428000(0000) knlGS:0000000000000000 [ 78.671110][ T363] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 78.671324][ T363] CR2: 000055db4c6a30a8 CR3: 000000010da67000 CR4: 0000000000750ef0 [ 78.671585][ T363] PKRU: 55555554 [ 78.671713][ T363] Call Trace: [ 78.671843][ T363] [ 78.671936][ T363] ? __pfx_qfq_dequeue+0x10/0x10 [sch_qfq] [ 78.672148][ T363] ? __pfx__printk+0x10/0x10 [ 78.672322][ T363] ? srso_alias_return_thunk+0x5/0xfbef5 [ 78.672496][ T363] ? lockdep_hardirqs_on_prepare+0xa8/0x1a0 [ 78.672706][ T363] ? srso_alias_return_thunk+0x5/0xfbef5 [ 78.672875][ T363] ? trace_hardirqs_on+0x19/0x1a0 [ 78.673047][ T363] red_dequeue+0x65/0x270 [sch_red] [ 78.673217][ T363] ? srso_alias_return_thunk+0x5/0xfbef5 [ 78.673385][ T363] tbf_dequeue.cold+0xb0/0x70c [sch_tbf] [ 78.673566][ T363] __qdisc_run+0x169/0x1900 The right thing to do in #1b is to grab the skb off gso_skb queue. This patchset fixes that issue by changing #1b to use qdisc_dequeue_peeked() method instead. Fixes: 77be155cba4e ("pkt_sched: Add peek emulation for non-work-conserving qdiscs.") Reported-by: Manas Reported-by: Rakshit Awasthi Signed-off-by: Jamal Hadi Salim Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260430152957.194015-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_red.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 432b8a3000a5..4d0e44a2e7c6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -162,7 +162,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; - skb = child->dequeue(child); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); -- cgit v1.2.3 From 1b9bc71153b01dbde8045b9edede4240f4f5520e Mon Sep 17 00:00:00 2001 From: Victor Nogueria Date: Thu, 30 Apr 2026 11:29:56 -0400 Subject: net/sched: sch_sfb: Replace direct dequeue call with peek and qdisc_dequeue_peeked When sfb has children (eg qfq qdisc) whose peek() callback is qdisc_peek_dequeued(), we could get a kernel panic. When the parent of such qdiscs (eg illustrated in patch #3 as tbf) wants to retrieve an skb from its child (sfb in this case), it will do the following: 1a. do a peek() - and when sensing there's an skb the child can offer, then - the child in this case(sfb) calls its child's (qfq) peek. qfq does the right thing and will return the gso_skb queue packet. Note: if there wasnt a gso_skb entry then qfq will store it there. 1b. invoke a dequeue() on the child (sfb). And herein lies the problem. - sfb will call the child's dequeue() which will essentially just try to grab something of qfq's queue. [ 127.594489][ T453] KASAN: null-ptr-deref in range [0x0000000000000048-0x000000000000004f] [ 127.594741][ T453] CPU: 2 UID: 0 PID: 453 Comm: ping Not tainted 7.1.0-rc1-00035-gac961974495b-dirty #793 PREEMPT(full) [ 127.595059][ T453] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 127.595254][ T453] RIP: 0010:qfq_dequeue+0x35c/0x1650 [sch_qfq] [ 127.595461][ T453] Code: 00 fc ff df 80 3c 02 00 0f 85 17 0e 00 00 4c 8d 73 48 48 89 9d b8 02 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 76 0c 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b [ 127.596081][ T453] RSP: 0018:ffff88810e5af440 EFLAGS: 00010216 [ 127.596337][ T453] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: dffffc0000000000 [ 127.596623][ T453] RDX: 0000000000000009 RSI: 0000001880000000 RDI: ffff888104fd82b0 [ 127.596917][ T453] RBP: ffff888104fd8000 R08: ffff888104fd8280 R09: 1ffff110211893a3 [ 127.597165][ T453] R10: 1ffff110211893a6 R11: 1ffff110211893a7 R12: 0000001880000000 [ 127.597404][ T453] R13: ffff888104fd82b8 R14: 0000000000000048 R15: 0000000040000000 [ 127.597644][ T453] FS: 00007fc380cbfc40(0000) GS:ffff88816f2a8000(0000) knlGS:0000000000000000 [ 127.597956][ T453] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 127.598160][ T453] CR2: 00005610aa9890a8 CR3: 000000010369e000 CR4: 0000000000750ef0 [ 127.598390][ T453] PKRU: 55555554 [ 127.598509][ T453] Call Trace: [ 127.598629][ T453] [ 127.598718][ T453] ? mark_held_locks+0x40/0x70 [ 127.598890][ T453] ? srso_alias_return_thunk+0x5/0xfbef5 [ 127.599053][ T453] sfb_dequeue+0x88/0x4d0 [ 127.599174][ T453] ? ktime_get+0x137/0x230 [ 127.599328][ T453] ? srso_alias_return_thunk+0x5/0xfbef5 [ 127.599480][ T453] ? qdisc_peek_dequeued+0x7b/0x350 [sch_qfq] [ 127.599670][ T453] ? srso_alias_return_thunk+0x5/0xfbef5 [ 127.599831][ T453] tbf_dequeue+0x6b1/0x1098 [sch_tbf] [ 127.599988][ T453] __qdisc_run+0x169/0x1900 The right thing to do in #1b is to grab the skb off gso_skb queue. This patchset fixes that issue by changing #1b to use qdisc_dequeue_peeked() method instead. Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler") Signed-off-by: Victor Nogueria Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260430152957.194015-3-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index bd5ef561030f..d3ee8e5479b3 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -441,7 +441,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) struct Qdisc *child = q->qdisc; struct sk_buff *skb; - skb = child->dequeue(q->qdisc); + skb = qdisc_dequeue_peeked(child); if (skb) { qdisc_bstats_update(sch, skb); -- cgit v1.2.3 From 3a3a30c14d7f04206916824a79878cfefac6c8e2 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 30 Apr 2026 11:29:57 -0400 Subject: selftests/tc-testing: Add tests that force red and sfb to dequeue from child's gso_skb Create 4 test cases: - Force red to dequeue from its child's gso_skb with qfq leaf - Force sfb to dequeue from its child's gso_skb with qfq leaf - Force red to dequeue from its child's gso_skb with dualpi2 leaf - Force sfb to dequeue from its child's gso_skb with dualpi2 leaf All of them have tbf followed by red (or sfb) followed by qfq (or dualpi2). Since tbf calls its child's peek followed by qdisc_dequeue_peeked, it will force red/sfb to call their child's peek. In this case, since the child (qfq/dualpi2) has qdisc_peek_dequeued as its peek callback, the packet will be stored in its gso_skb queue. During the subsequent call to qdisc_dequeue_peeked, red/sfb will have to dequeue from the child's gso_skb to retrieve the packet. Not doing so will cause a NULL ptr deref which was happening before a recent fix. Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260430152957.194015-4-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 148 +++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index eefadd0546d3..b1f856cf62c1 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1136,5 +1136,153 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "7a5f", + "name": "Force red to dequeue from its child's gso_skb with qfq leaf", + "category": [ + "qdisc", + "tbf", + "red", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16", + "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq", + "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1", + "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "red", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "cdae", + "name": "Force sfb to dequeue from its child's gso_skb with qfq leaf", + "category": [ + "qdisc", + "tbf", + "sfb", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb", + "$TC qdisc add dev $DUMMY parent 2: handle 3: qfq", + "$TC class add dev $DUMMY classid 3:1 parent 3: qfq maxpkt 512 weight 1", + "$TC filter add dev $DUMMY parent 3: protocol ip prio 1 matchall classid 3:1 action ok" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "sfb", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "291d", + "name": "Force red to dequeue from its child's gso_skb with dualpi2 leaf", + "category": [ + "qdisc", + "tbf", + "red", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: red limit 757 min 16 max 24 avpkt 16", + "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "red", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "9c6d", + "name": "Force sfb to dequeue from its child's gso_skb with dualpi2 leaf", + "category": [ + "qdisc", + "tbf", + "sfb", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 88bit burst 1661b peakrate 2257333 minburst 1024 limit 7b", + "$TC qdisc add dev $DUMMY parent 1: handle 2: sfb", + "$TC qdisc add dev $DUMMY parent 2: handle 3: dualpi2" + ], + "cmdUnderTest": "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "expExitCode": "0", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:", + "matchJSON": [ + { + "kind": "sfb", + "handle": "2:", + "bytes": 98, + "packets": 1, + "backlog": 0, + "qlen": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] -- cgit v1.2.3 From 1d324c2f43f70c965f25c58cc3611c779adbe47e Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Thu, 30 Apr 2026 18:33:18 +0800 Subject: ip6_gre: Use cached t->net in ip6erspan_changelink(). After commit 5e72ce3e3980 ("net: ipv6: Use link netns in newlink() of rtnl_link_ops"), ip6erspan_newlink() correctly resolves the per-netns ip6gre hash via link_net. ip6erspan_changelink() was not converted in that series and still uses dev_net(dev), which diverges from the device's creation netns after IFLA_NET_NS_FD migration. This re-inserts the tunnel into the wrong per-netns hash. The original netns keeps a stale entry. When that netns is later destroyed, ip6gre_exit_rtnl_net() walks the stale entry, producing a slab-use-after-free reported by KASAN, followed by a kernel BUG at net/core/dev.c (LIST_POISON1) in unregister_netdevice_many_notify(). Reachable from an unprivileged user namespace (unshare --user --map-root-user --net). ip6gre_changelink() earlier in the same file already uses the cached t->net; only ip6erspan_changelink() has the wrong shape. Fixes: 2d665034f239 ("net: ip6_gre: Fix ip6erspan hlen calculation") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Maoyi Xie Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260430103318.3206018-1-maoyi.xie@ntu.edu.sg Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 63fc8556b475..365b4059eb20 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2262,10 +2262,11 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct ip6_tnl *t; + struct ip6gre_net *ign; + ign = net_generic(t->net, ip6gre_net_id); t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) return PTR_ERR(t); -- cgit v1.2.3 From 70f780edcd1e86350202d8a409de026b2d2e2067 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:34 -0300 Subject: RDMA/ionic: Fix typo in format string Applying the corrupted patch by hand mangled the format string, put the s in the right place. Cc: stable@vger.kernel.org Fixes: 654a27f25530 ("RDMA/ionic: bound node_desc sysfs read with %.64s") Link: https://patch.msgid.link/r/1-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reported-by: Brad Spengler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 0382a64839d2..73a616ae3502 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct device *device, struct ionic_ibdev *dev = rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); - return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); + return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); -- cgit v1.2.3 From 641858d52f2372124d9312a407e2124915d846ee Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:35 -0300 Subject: RDMA/mlx5: Restore zero-init to mlx5_ib_modify_qp() ucmd Sashiko points out the check for inlen==0 got missed, the ={} was not redundant, put it back. Fixes: a9cd442a5347 ("RDMA: Remove redundant = {} for udata req structs") Link: https://patch.msgid.link/r/2-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1611a704c1b3..8fd05532c09c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4697,7 +4697,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_modify_qp_resp resp = {}; struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_ib_modify_qp ucmd; + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; -- cgit v1.2.3 From 45e8ebc9ede73543c55d597bb53b6bbb7e8b7327 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:36 -0300 Subject: RDMA/mlx5: Add missing store/release for lock elision pattern mlx5 has a common pattern implementing a device-global singleton resource where it checks the resource pointer for !NULL and then skips obtaining the lock. This is not ordered properly as observing !NULL doesn't mean that all the data under that pointer is also visible on this CPU when the lock is not taken. Use a release/acquire pairing to explicitly manage this. Pointed out by sashiko, Codex found more cases. Fixes: 5895e70f2e6e ("IB/mlx5: Allocate resources just before first QP/SRQ is created") Fixes: 638420115cc4 ("IB/mlx5: Create UMR QP just before first reg_mr occurs") Link: https://sashiko.dev/#/patchset/SYBPR01MB7881E1E0970268BD69C0BA75AF2B2%40SYBPR01MB7881.ausprd01.prod.outlook.com Link: https://patch.msgid.link/r/3-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Assisted-by: Codex:GPT-5.5 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 8 ++++---- drivers/infiniband/hw/mlx5/umr.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8115ae869ef2..61078281953d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3310,7 +3310,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) * devr->c0 is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (devr->c0) + if (smp_load_acquire(&devr->c0)) return 0; mutex_lock(&devr->cq_lock); @@ -3336,7 +3336,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) } devr->p0 = pd; - devr->c0 = cq; + smp_store_release(&devr->c0, cq); unlock: mutex_unlock(&devr->cq_lock); @@ -3354,7 +3354,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) * devr->s1 is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (devr->s1) + if (smp_load_acquire(&devr->s1)) return 0; mutex_lock(&devr->srq_lock); @@ -3396,7 +3396,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) } devr->s0 = s0; - devr->s1 = s1; + smp_store_release(&devr->s1, s1); unlock: mutex_unlock(&devr->srq_lock); diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 29488fba21a0..f2139474be37 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -147,7 +147,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) * UMR qp is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (dev->umrc.qp) + if (smp_load_acquire(&dev->umrc.qp)) return 0; mutex_lock(&dev->umrc.init_lock); @@ -185,7 +185,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) sema_init(&dev->umrc.sem, MAX_UMR_WR); mutex_init(&dev->umrc.lock); dev->umrc.state = MLX5_UMR_STATE_ACTIVE; - dev->umrc.qp = qp; + smp_store_release(&dev->umrc.qp, qp); mutex_unlock(&dev->umrc.init_lock); return 0; -- cgit v1.2.3 From 6dd2d4ad9c8429523b1c220c5132bd551c006425 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:37 -0300 Subject: RDMA/mana: Validate rx_hash_key_len Sashiko points out that rx_hash_key_len comes from a uAPI structure and is blindly passed to memcpy, allowing the userspace to trash kernel memory. Bounds check it so the memcpy cannot overflow. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Link: https://patch.msgid.link/r/4-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 645581359cee..f7bb0d1f0f80 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); + if (rx_hash_key_len > sizeof(req->hashkey)) + return -EINVAL; + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) -- cgit v1.2.3 From 159f2efabc89d3f931d38f2d35876535d4abf0a3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:38 -0300 Subject: RDMA/mana: Remove user triggerable WARN_ON() in mana_ib_create_qp_rss() Sashiko points out that the user can specify WQs sharing the same CQ as a part of the uAPI and this will trigger the WARN_ON() then go on to corrupt the kernel. Just reject it outright and fail the QP creation. Cc: stable@vger.kernel.org Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Link: https://patch.msgid.link/r/5-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/cq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f4cbe21763bf..2d682428ef20 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -137,8 +137,9 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs) return -EINVAL; - /* Create CQ table entry */ - WARN_ON(gc->cq_table[cq->queue.id]); + /* Create CQ table entry, sharing a CQ between WQs is not supported */ + if (gc->cq_table[cq->queue.id]) + return -EINVAL; if (cq->queue.kmem) gdma_cq = cq->queue.kmem; else -- cgit v1.2.3 From 34ecf795692ee57c393109f4a24ccc313091e137 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:39 -0300 Subject: RDMA/mana: Fix mana_destroy_wq_obj() cleanup in mana_ib_create_qp_rss() Sashiko points out there are two bugs here in the error unwind flow, both related to how the WQ table is unwound. First there is a double i-- on the first failure path due to the while loop having a i--, remove it. Second if mana_ib_install_cq_cb() fails then mana_create_wq_obj() is not undone due to the above i--. Cc: stable@vger.kernel.org Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Link: https://patch.msgid.link/r/6-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index f7bb0d1f0f80..8e1f052d0ec9 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -176,11 +176,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, &wq_spec, &cq_spec, &wq->rx_object); - if (ret) { - /* Do cleanup starting with index i-1 */ - i--; + if (ret) goto fail; - } /* The GDMA regions are now owned by the WQ object */ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION; @@ -200,8 +197,10 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, /* Create CQ table entry */ ret = mana_ib_install_cq_cb(mdev, cq); - if (ret) + if (ret) { + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); goto fail; + } } resp.num_entries = i; -- cgit v1.2.3 From 6aaa978c6b6218cfac15fe1dab17c76fe229ce3f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:40 -0300 Subject: RDMA/mana: Fix error unwind in mana_ib_create_qp_rss() Sashiko points out that mana_ib_cfg_vport_steering() is leaked, the normal destroy path cleans it up. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/7-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Long Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 8e1f052d0ec9..0fbcf449c134 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -217,13 +217,15 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata create rss-qp, %d\n", ret); - goto fail; + goto err_disable_vport_rx; } kfree(mana_ind_table); return 0; +err_disable_vport_rx: + mana_disable_vport_rx(mpc); fail: while (i-- > 0) { ibwq = ind_tbl->ind_tbl[i]; -- cgit v1.2.3 From ea4e4b168a531c522d2850719816b6f583b1738b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:41 -0300 Subject: RDMA/ocrdma: Clarify the mm_head searching The intention of this code is to find matching entries exactly, the driver never creates phys_addr's with different lens so the current expression is not a bug, but it doesn't make sense and confuses review tooling. Search for exact match instead. Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/8-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c17e2a54dbca..463c9a5703fc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -215,7 +215,7 @@ static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - if (len != mm->key.len && phy_addr != mm->key.phy_addr) + if (len != mm->key.len || phy_addr != mm->key.phy_addr) continue; list_del(&mm->entry); @@ -233,7 +233,7 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len && phy_addr != mm->key.phy_addr) + if (len != mm->key.len || phy_addr != mm->key.phy_addr) continue; found = true; -- cgit v1.2.3 From 34fbf48cf3b410d2a6e8c586fa952a36331ca5ba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:42 -0300 Subject: RDMA/ocrdma: Don't NULL deref uctx on errors in ocrdma_copy_pd_uresp() Sashiko points out that pd->uctx isn't initialized until late in the function so all these error flow references are NULL and will crash. Use the uctx that isn't NULL. Cc: stable@vger.kernel.org Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/9-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 463c9a5703fc..a88cc5d84af8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, ucopy_err: if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); + ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE); dpp_map_err: - ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); + ocrdma_del_mmap(uctx, db_page_addr, db_page_size); return status; } -- cgit v1.2.3 From e38e86995df27f1f854063dab1f0c6a513db3faf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:43 -0300 Subject: RDMA/vmw_pvrdma: Fix double free on pvrdma_alloc_ucontext() error path Sashiko points out that pvrdma_uar_free() is already called within pvrdma_dealloc_ucontext(), so calling it before triggers a double free. Cc: stable@vger.kernel.org Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Link: https://patch.msgid.link/r/10-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index bcd43dc30e21..c7c2b41060e5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.qp_tab_size = vdev->dsr->caps.max_qp; ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) { - pvrdma_uar_free(vdev, &context->uar); + /* pvrdma_dealloc_ucontext() also frees the UAR */ pvrdma_dealloc_ucontext(&context->ibucontext); return -EFAULT; } -- cgit v1.2.3 From c54c7e4cb679c0aaa1cb489b9c3f2cd98e63a44c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:44 -0300 Subject: RDMA/mlx4: Fix resource leak on error in mlx4_ib_create_srq() Sashiko points out that mlx4_srq_alloc() was not undone during error unwind, add the missing call to mlx4_srq_free(). Cc: stable@vger.kernel.org Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=8 Link: https://patch.msgid.link/r/11-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/srq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 5b23e5f8b84a..767840736d58 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -194,13 +194,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; - goto err_wrid; + goto err_srq; } init_attr->attr.max_wr = srq->msrq.max - 1; return 0; +err_srq: + mlx4_srq_free(dev->dev, &srq->msrq); err_wrid: if (udata) mlx4_ib_db_unmap_user(ucontext, &srq->db); -- cgit v1.2.3 From c9341307ea16b9395c2e4c9c94d8499d91fe31d0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:45 -0300 Subject: RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event() Sashiko points out the radix_tree itself is RCU safe, but nothing ever frees the mlx4_srq struct with RCU, and it isn't even accessed within the RCU critical section. It also will crash if an event is delivered before the srq object is finished initializing. Use the spinlock since it isn't easy to make RCU work, use refcount_inc_not_zero() to protect against partially initialized objects, and order the refcount_set() to be after the srq is fully initialized. Cc: stable@vger.kernel.org Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5 Link: https://patch.msgid.link/r/12-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx4/srq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index dd890f5d7b72..8711689120f3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; + unsigned long flags; - rcu_read_lock(); + spin_lock_irqsave(&srq_table->lock, flags); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - rcu_read_unlock(); - if (srq) - refcount_inc(&srq->refcount); - else { + if (!srq || !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; + spin_unlock_irqrestore(&srq_table->lock, flags); + if (!srq) { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, if (err) goto err_radix; - refcount_set(&srq->refcount, 1); init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); return 0; -- cgit v1.2.3 From 48973c6c938737bb900d15dc82b91dfe3586cb0f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:46 -0300 Subject: RDMA/hns: Fix xarray race in hns_roce_create_srq() Sashiko points out that once the srq memory is stored into the xarray by alloc_srqc() it can immediately be looked up by: xa_lock(&srq_table->xa); srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); if (srq) refcount_inc(&srq->refcount); xa_unlock(&srq_table->xa); Which will fail refcount debug because the refcount is 0 and then crash: srq->event(srq, event_type); Because event is NULL. Use refcount_inc_not_zero() instead to ensure a partially prepared srq is never retrieved from the event handler and fix the ordering of the initialization so refcount becomes 1 only after it is fully ready. All the initialization must be done before calling free_srqc() since it depends on the completion and refcount. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=3 Link: https://patch.msgid.link/r/13-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index cb848e8e6bbd..8b94cbdfa54d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -16,8 +16,8 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) xa_lock(&srq_table->xa); srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); - if (srq) - refcount_inc(&srq->refcount); + if (srq && !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; xa_unlock(&srq_table->xa); if (!srq) { @@ -470,6 +470,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (ret) goto err_srqn; + srq->event = hns_roce_ib_srq_event; + init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); + if (udata) { resp.cap_flags = srq->cap_flags; resp.srqn = srq->srqn; @@ -480,10 +484,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - srq->event = hns_roce_ib_srq_event; - refcount_set(&srq->refcount, 1); - init_completion(&srq->free); - return 0; err_srqc: -- cgit v1.2.3 From 7d51783d82fea000a9ce96fa1dcf3e0a8cedc4fb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:47 -0300 Subject: RDMA/hns: Fix xarray race in hns_roce_create_qp_common() Similar to the SRQ case the hr_qp is stored in the xarray before it is fully initialized. Unlike the SRQ case the error unwinds do not wait for the completion so keep the refcount 0 until the function succeeds. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://patch.msgid.link/r/14-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Suggested-by: Junxian Huang Reviewed-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a27ea85bb063..f94ba98871f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -47,8 +47,8 @@ static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, xa_lock_irqsave(&hr_dev->qp_table_xa, flags); qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); + if (qp && !refcount_inc_not_zero(&qp->refcount)) + qp = NULL; xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); if (!qp) @@ -1251,8 +1251,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->ibqp.qp_num = hr_qp->qpn; hr_qp->event = hns_roce_ib_qp_event; - refcount_set(&hr_qp->refcount, 1); init_completion(&hr_qp->free); + refcount_set_release(&hr_qp->refcount, 1); return 0; -- cgit v1.2.3 From 0c99acbc8b6c6dd526ae475a48ee1897b61072fb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 28 Apr 2026 13:17:48 -0300 Subject: RDMA/hns: Fix unlocked call to hns_roce_qp_remove() Sashiko points out that hns_roce_qp_remove() requires the caller to hold locks. The error flow in hns_roce_create_qp_common() doesn't hold those locks for the error unwind so it risks corrupting memory. Grab the same locks the other two callers use. Cc: stable@vger.kernel.org Fixes: e088a685eae9 ("RDMA/hns: Support rq record doorbell for the user space") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=9 Link: https://patch.msgid.link/r/15-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com Reviewed-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f94ba98871f0..bf04ee84a943 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1171,6 +1171,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; + unsigned long flags; int ret; mutex_init(&hr_qp->mutex); @@ -1257,7 +1258,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, return 0; err_flow_ctrl: + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + hns_roce_lock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_unlock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); err_store: free_qpc(hr_dev, hr_qp); err_qpc: -- cgit v1.2.3 From 0799e5943611006b346b8813c7daf7dd5aa26bfd Mon Sep 17 00:00:00 2001 From: Lyes Bourennani Date: Wed, 22 Apr 2026 00:20:22 +0200 Subject: batman-adv: fix integer overflow on buff_pos Fixing an integer overflow present in batadv_iv_ogm_send_to_if. The size check is done using the int type in batadv_iv_ogm_aggr_packet whereas the buff_pos variable uses the s16 type. This could lead to an out-of-bound read. Cc: stable@vger.kernel.org Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Lyes Bourennani Signed-off-by: Alexis Pinson Signed-off-by: Sven Eckelmann --- net/batman-adv/bat_iv_ogm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f28e9cbf8ad5..618d1889c04e 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -335,7 +335,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); const char *fwd_str; u8 packet_num; - s16 buff_pos; + int buff_pos; struct batadv_ogm_packet *batadv_ogm_packet; struct sk_buff *skb; u8 *packet_pos; -- cgit v1.2.3 From 3243543592425beec83d453793e9d27caa0d8e66 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Mon, 27 Apr 2026 14:43:33 +0800 Subject: batman-adv: reject new tp_meter sessions during teardown Prevent tp_meter from starting new sender or receiver sessions after mesh_state has left BATADV_MESH_ACTIVE. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Co-developed-by: Luxing Yin Signed-off-by: Luxing Yin Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2e42f6b348c8..d9a80e459c2e 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -947,6 +947,13 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, /* look for an already existing test towards this node */ spin_lock_bh(&bat_priv->tp_list_lock); + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_DST_UNREACHABLE, + dst, bat_priv, session_cookie); + return; + } + tp_vars = batadv_tp_list_find(bat_priv, dst); if (tp_vars) { spin_unlock_bh(&bat_priv->tp_list_lock); @@ -1329,9 +1336,12 @@ static struct batadv_tp_vars * batadv_tp_init_recv(struct batadv_priv *bat_priv, const struct batadv_icmp_tp_packet *icmp) { - struct batadv_tp_vars *tp_vars; + struct batadv_tp_vars *tp_vars = NULL; spin_lock_bh(&bat_priv->tp_list_lock); + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + goto out_unlock; + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, icmp->session); if (tp_vars) @@ -1464,6 +1474,9 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_icmp_tp_packet *icmp; + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + goto out; + icmp = (struct batadv_icmp_tp_packet *)skb->data; switch (icmp->subtype) { @@ -1478,6 +1491,8 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) "Received unknown TP Metric packet type %u\n", icmp->subtype); } + +out: consume_skb(skb); } -- cgit v1.2.3 From 3d3cf6a7314aca4df0a6dde28ce784a2a30d0166 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Mon, 27 Apr 2026 14:43:34 +0800 Subject: batman-adv: stop tp_meter sessions during mesh teardown TP meter sessions remain linked on bat_priv->tp_list after the netlink request has already finished. When the mesh interface is removed, batadv_mesh_free() currently tears down the mesh without first draining these sessions. A running sender thread or a late incoming tp_meter packet can then keep processing against a mesh instance which is already shutting down. Synchronize tp_meter with the mesh lifetime by stopping all active sessions from batadv_mesh_free() and waiting for sender threads to exit before teardown continues. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Co-developed-by: Luxing Yin Signed-off-by: Luxing Yin Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Signed-off-by: Sven Eckelmann --- net/batman-adv/main.c | 1 + net/batman-adv/tp_meter.c | 94 ++++++++++++++++++++++++++++++++++++++--------- net/batman-adv/tp_meter.h | 1 + net/batman-adv/types.h | 4 ++ 4 files changed, 82 insertions(+), 18 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 3a35aadd8b41..a4d33ee0fda5 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -249,6 +249,7 @@ void batadv_mesh_free(struct net_device *mesh_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); batadv_purge_outstanding_packets(bat_priv, NULL); + batadv_tp_stop_all(bat_priv); batadv_gw_node_free(bat_priv); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index d9a80e459c2e..58ca59a2799e 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -365,23 +366,38 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) } /** - * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer - * @bat_priv: the bat priv with all the mesh interface information - * @tp_vars: the private data of the current TP meter session to cleanup + * batadv_tp_list_detach() - remove tp session from mesh session list once + * @tp_vars: the private data of the current TP meter session */ -static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, - struct batadv_tp_vars *tp_vars) +static void batadv_tp_list_detach(struct batadv_tp_vars *tp_vars) { - cancel_delayed_work(&tp_vars->finish_work); + bool detached = false; spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); - hlist_del_rcu(&tp_vars->list); + if (!hlist_unhashed(&tp_vars->list)) { + hlist_del_init_rcu(&tp_vars->list); + detached = true; + } spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); + if (!detached) + return; + + atomic_dec(&tp_vars->bat_priv->tp_num); + /* drop list reference */ batadv_tp_vars_put(tp_vars); +} - atomic_dec(&tp_vars->bat_priv->tp_num); +/** + * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer + * @tp_vars: the private data of the current TP meter session to cleanup + */ +static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) +{ + cancel_delayed_work_sync(&tp_vars->finish_work); + + batadv_tp_list_detach(tp_vars); /* kill the timer and remove its reference */ timer_delete_sync(&tp_vars->timer); @@ -886,7 +902,8 @@ out: batadv_orig_node_put(orig_node); batadv_tp_sender_end(bat_priv, tp_vars); - batadv_tp_sender_cleanup(bat_priv, tp_vars); + batadv_tp_sender_cleanup(tp_vars); + complete(&tp_vars->finished); batadv_tp_vars_put(tp_vars); @@ -918,7 +935,8 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) batadv_tp_vars_put(tp_vars); /* cleanup of failed tp meter variables */ - batadv_tp_sender_cleanup(bat_priv, tp_vars); + batadv_tp_sender_cleanup(tp_vars); + complete(&tp_vars->finished); return; } @@ -1024,6 +1042,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, tp_vars->start_time = jiffies; init_waitqueue_head(&tp_vars->more_bytes); + init_completion(&tp_vars->finished); spin_lock_init(&tp_vars->unacked_lock); INIT_LIST_HEAD(&tp_vars->unacked_list); @@ -1126,14 +1145,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) "Shutting down for inactivity (more than %dms) from %pM\n", BATADV_TP_RECV_TIMEOUT, tp_vars->other_end); - spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); - hlist_del_rcu(&tp_vars->list); - spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); - - /* drop list reference */ - batadv_tp_vars_put(tp_vars); - - atomic_dec(&bat_priv->tp_num); + batadv_tp_list_detach(tp_vars); spin_lock_bh(&tp_vars->unacked_lock); list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { @@ -1496,6 +1508,52 @@ out: consume_skb(skb); } +/** + * batadv_tp_stop_all() - stop all currently running tp meter sessions + * @bat_priv: the bat priv with all the mesh interface information + */ +void batadv_tp_stop_all(struct batadv_priv *bat_priv) +{ + struct batadv_tp_vars *tp_vars[BATADV_TP_MAX_NUM]; + struct batadv_tp_vars *tp_var; + size_t count = 0; + size_t i; + + spin_lock_bh(&bat_priv->tp_list_lock); + hlist_for_each_entry(tp_var, &bat_priv->tp_list, list) { + if (WARN_ON_ONCE(count >= BATADV_TP_MAX_NUM)) + break; + + if (!kref_get_unless_zero(&tp_var->refcount)) + continue; + + tp_vars[count++] = tp_var; + } + spin_unlock_bh(&bat_priv->tp_list_lock); + + for (i = 0; i < count; i++) { + tp_var = tp_vars[i]; + + switch (tp_var->role) { + case BATADV_TP_SENDER: + batadv_tp_sender_shutdown(tp_var, + BATADV_TP_REASON_CANCEL); + wake_up(&tp_var->more_bytes); + wait_for_completion(&tp_var->finished); + break; + case BATADV_TP_RECEIVER: + batadv_tp_list_detach(tp_var); + if (timer_shutdown_sync(&tp_var->timer)) + batadv_tp_vars_put(tp_var); + break; + } + + batadv_tp_vars_put(tp_var); + } + + synchronize_net(); +} + /** * batadv_tp_meter_init() - initialize global tp_meter structures */ diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index f0046d366eac..4e97cd10cd02 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -17,6 +17,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, u32 test_length, u32 *cookie); void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, u8 return_value); +void batadv_tp_stop_all(struct batadv_priv *bat_priv); void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb); #endif /* _NET_BATMAN_ADV_TP_METER_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 8fc5fe0e9b05..daa06f421154 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1328,6 +1329,9 @@ struct batadv_tp_vars { /** @finish_work: work item for the finishing procedure */ struct delayed_work finish_work; + /** @finished: completion signaled when a sender thread exits */ + struct completion finished; + /** @test_length: test length in milliseconds */ u32 test_length; -- cgit v1.2.3 From 046111a1a35a1720748f254377d3d1663664ea61 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2026 06:16:09 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_class_stats (I) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_class_stats() runs without qdisc spinlock being held. In this first patch, I add READ_ONCE()/WRITE_ONCE() annotations for: - flow->head - flow->dropped - b->backlogs[] Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: Toke Høiland-Jørgensen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260430061610.3503483-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 13c6d1869a14..806eb73d6a05 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -914,7 +914,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) struct sk_buff *skb = flow->head; if (skb) { - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); } @@ -926,7 +926,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow) static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) { if (!flow->head) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -1357,7 +1357,7 @@ found: if (elig_ack_prev) elig_ack_prev->next = elig_ack->next; else - flow->head = elig_ack->next; + WRITE_ONCE(flow->head, elig_ack->next); skb_mark_not_on_list(elig_ack); @@ -1595,11 +1595,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) len = qdisc_pkt_len(skb); q->buffer_used -= skb->truesize; - b->backlogs[idx] -= len; WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] - len); sch->qstats.backlog -= len; - flow->dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); if (q->config->rate_flags & CAKE_FLAG_INGRESS) @@ -1824,11 +1824,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, } /* stats */ - b->backlogs[idx] += slen; sch->qstats.backlog += slen; q->avg_window_bytes += slen; WRITE_ONCE(b->bytes, b->bytes + slen); WRITE_ONCE(b->tin_backlog, b->tin_backlog + slen); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + slen); qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); @@ -1861,11 +1861,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* stats */ WRITE_ONCE(b->packets, b->packets + 1); - b->backlogs[idx] += len - ack_pkt_len; sch->qstats.backlog += len - ack_pkt_len; q->avg_window_bytes += len - ack_pkt_len; WRITE_ONCE(b->bytes, b->bytes + len - ack_pkt_len); WRITE_ONCE(b->tin_backlog, b->tin_backlog + len - ack_pkt_len); + WRITE_ONCE(b->backlogs[idx], b->backlogs[idx] + len - ack_pkt_len); } if (q->overflow_timeout) @@ -1977,7 +1977,7 @@ static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) if (flow->head) { skb = dequeue_head(flow); len = qdisc_pkt_len(skb); - b->backlogs[q->cur_flow] -= len; + WRITE_ONCE(b->backlogs[q->cur_flow], b->backlogs[q->cur_flow] - len); WRITE_ONCE(b->tin_backlog, b->tin_backlog - len); sch->qstats.backlog -= len; q->buffer_used -= skb->truesize; @@ -2235,7 +2235,7 @@ retry: flow->deficit -= len; b->tin_deficit -= len; } - flow->dropped++; + WRITE_ONCE(flow->dropped, flow->dropped + 1); WRITE_ONCE(b->tin_dropped, b->tin_dropped + 1); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); @@ -3137,7 +3137,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, flow = &b->flows[idx % CAKE_QUEUES]; - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -3146,8 +3146,8 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = b->backlogs[idx % CAKE_QUEUES]; - qs.drops = flow->dropped; + qs.backlog = READ_ONCE(b->backlogs[idx % CAKE_QUEUES]); + qs.drops = READ_ONCE(flow->dropped); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; -- cgit v1.2.3 From 67dc6c56b871617deac85b9f72500b69b1fdf835 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2026 06:16:10 +0000 Subject: net/sched: sch_cake: annotate data-races in cake_dump_class_stats (II) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cake_dump_class_stats() runs without qdisc spinlock being held. In this second patch, I add READ_ONCE()/WRITE_ONCE() annotations for: - flow->deficit - flow->cvars.dropping - flow->cvars.count - flow->cvars.p_drop - flow->cvars.blue_timer - flow->cvars.drop_next Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Acked-by: Toke Høiland-Jørgensen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260430061610.3503483-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 131 ++++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 60 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 806eb73d6a05..5862933be8d7 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -399,14 +399,14 @@ static void cake_configure_rates(struct Qdisc *sch, u64 rate, bool rate_adjust); * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ -static void cobalt_newton_step(struct cobalt_vars *vars) +static void cobalt_newton_step(struct cobalt_vars *vars, u32 count) { u32 invsqrt, invsqrt2; u64 val; invsqrt = vars->rec_inv_sqrt; invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; - val = (3LL << 32) - ((u64)vars->count * invsqrt2); + val = (3LL << 32) - ((u64)count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); @@ -414,12 +414,12 @@ static void cobalt_newton_step(struct cobalt_vars *vars) vars->rec_inv_sqrt = val; } -static void cobalt_invsqrt(struct cobalt_vars *vars) +static void cobalt_invsqrt(struct cobalt_vars *vars, u32 count) { - if (vars->count < REC_INV_SQRT_CACHE) - vars->rec_inv_sqrt = inv_sqrt_cache[vars->count]; + if (count < REC_INV_SQRT_CACHE) + vars->rec_inv_sqrt = inv_sqrt_cache[count]; else - cobalt_newton_step(vars); + cobalt_newton_step(vars, count); } static void cobalt_vars_init(struct cobalt_vars *vars) @@ -449,16 +449,19 @@ static bool cobalt_queue_full(struct cobalt_vars *vars, bool up = false; if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { - up = !vars->p_drop; - vars->p_drop += p->p_inc; - if (vars->p_drop < p->p_inc) - vars->p_drop = ~0; - vars->blue_timer = now; - } - vars->dropping = true; - vars->drop_next = now; + u32 p_drop = vars->p_drop; + + up = !p_drop; + p_drop += p->p_inc; + if (p_drop < p->p_inc) + p_drop = ~0; + WRITE_ONCE(vars->p_drop, p_drop); + WRITE_ONCE(vars->blue_timer, now); + } + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, now); if (!vars->count) - vars->count = 1; + WRITE_ONCE(vars->count, 1); return up; } @@ -475,20 +478,20 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, if (vars->p_drop && ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { if (vars->p_drop < p->p_dec) - vars->p_drop = 0; + WRITE_ONCE(vars->p_drop, 0); else - vars->p_drop -= p->p_dec; - vars->blue_timer = now; + WRITE_ONCE(vars->p_drop, vars->p_drop - p->p_dec); + WRITE_ONCE(vars->blue_timer, now); down = !vars->p_drop; } - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->count, vars->count - 1); + cobalt_invsqrt(vars, vars->count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); } return down; @@ -507,6 +510,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, bool next_due, over_target; ktime_t schedule; u64 sojourn; + u32 count; /* The 'schedule' variable records, in its sign, whether 'now' is before or * after 'drop_next'. This allows 'drop_next' to be updated before the next @@ -528,21 +532,22 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, over_target = sojourn > p->target && sojourn > p->mtu_time * bulk_flows * 2 && sojourn > p->mtu_time * 4; - next_due = vars->count && ktime_to_ns(schedule) >= 0; + count = vars->count; + next_due = count && ktime_to_ns(schedule) >= 0; vars->ecn_marked = false; if (over_target) { if (!vars->dropping) { - vars->dropping = true; - vars->drop_next = cobalt_control(now, - p->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->dropping, true); + WRITE_ONCE(vars->drop_next, + cobalt_control(now, p->interval, + vars->rec_inv_sqrt)); } - if (!vars->count) - vars->count = 1; + if (!count) + count = 1; } else if (vars->dropping) { - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } if (next_due && vars->dropping) { @@ -550,23 +555,23 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) reason = QDISC_DROP_CONGESTED; - vars->count++; - if (!vars->count) - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count++; + if (!count) + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); } else { while (next_due) { - vars->count--; - cobalt_invsqrt(vars); - vars->drop_next = cobalt_control(vars->drop_next, - p->interval, - vars->rec_inv_sqrt); + count--; + cobalt_invsqrt(vars, count); + WRITE_ONCE(vars->drop_next, + cobalt_control(vars->drop_next, p->interval, + vars->rec_inv_sqrt)); schedule = ktime_sub(now, vars->drop_next); - next_due = vars->count && ktime_to_ns(schedule) >= 0; + next_due = count && ktime_to_ns(schedule) >= 0; } } @@ -575,11 +580,12 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, get_random_u32() < vars->p_drop) reason = QDISC_DROP_FLOOD_PROTECTION; + WRITE_ONCE(vars->count, count); /* Overload the drop_next field as an activity timeout */ - if (!vars->count) - vars->drop_next = ktime_add_ns(now, p->interval); + if (!count) + WRITE_ONCE(vars->drop_next, ktime_add_ns(now, p->interval)); else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC) - vars->drop_next = now; + WRITE_ONCE(vars->drop_next, now); return reason; } @@ -1924,7 +1930,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow->set = CAKE_SET_SPARSE; WRITE_ONCE(b->sparse_flow_count, b->sparse_flow_count + 1); - flow->deficit = cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, cake_get_flow_quantum(b, flow, q->config->flow_mode)); } else if (flow->set == CAKE_SET_SPARSE_WAIT) { /* this flow was empty, accounted as a sparse flow, but actually * in the bulk rotation. @@ -2166,7 +2172,8 @@ retry: } } - flow->deficit += cake_get_flow_quantum(b, flow, q->config->flow_mode); + WRITE_ONCE(flow->deficit, + flow->deficit + cake_get_flow_quantum(b, flow, q->config->flow_mode)); list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2232,7 +2239,7 @@ retry: if (q->config->rate_flags & CAKE_FLAG_INGRESS) { len = cake_advance_shaper(q, b, skb, now, true); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; } WRITE_ONCE(flow->dropped, flow->dropped + 1); @@ -2259,7 +2266,7 @@ retry: delay < b->base_delay ? 2 : 8)); len = cake_advance_shaper(q, b, skb, now, false); - flow->deficit -= len; + WRITE_ONCE(flow->deficit, flow->deficit - len); b->tin_deficit -= len; if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { @@ -3153,6 +3160,8 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, return -1; if (flow) { ktime_t now = ktime_get(); + bool dropping; + u32 p_drop; stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) @@ -3167,21 +3176,23 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; \ } while (0) - PUT_STAT_S32(DEFICIT, flow->deficit); - PUT_STAT_U32(DROPPING, flow->cvars.dropping); - PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); - PUT_STAT_U32(P_DROP, flow->cvars.p_drop); - if (flow->cvars.p_drop) { + PUT_STAT_S32(DEFICIT, READ_ONCE(flow->deficit)); + dropping = READ_ONCE(flow->cvars.dropping); + PUT_STAT_U32(DROPPING, dropping); + PUT_STAT_U32(COBALT_COUNT, READ_ONCE(flow->cvars.count)); + p_drop = READ_ONCE(flow->cvars.p_drop); + PUT_STAT_U32(P_DROP, p_drop); + if (p_drop) { PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + READ_ONCE(flow->cvars.blue_timer)))); } - if (flow->cvars.dropping) { + if (dropping) { PUT_STAT_S32(DROP_NEXT_US, ktime_to_us( ktime_sub(now, - flow->cvars.drop_next))); + READ_ONCE(flow->cvars.drop_next)))); } if (nla_nest_end(d->skb, stats) < 0) -- cgit v1.2.3 From 7e7be31bfdb066c1c780dcd6b1224078fc54063f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Apr 2026 15:29:38 -0700 Subject: net: tls: fix silent data drop under pipe back-pressure tls_sw_splice_read() uses len when advancing rxm->offset / rxm->full_len after skb_splice_bits(), rather than copied (the actual number of bytes successfully spliced into the pipe). When the destination pipe cannot accept all the requested bytes, splice_to_pipe() returns fewer bytes than len, and 'len - copied' of data is effectively skipped over. Fixes: e062fe99cccd ("tls: splice_read: fix accessing pre-processed records") Link: https://patch.msgid.link/20260429222944.2139041-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 798243eabb1f..2590e855f6a5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2317,9 +2317,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (copied < 0) goto splice_requeue; - if (chunk < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; + if (copied < rxm->full_len) { + rxm->offset += copied; + rxm->full_len -= copied; goto splice_requeue; } -- cgit v1.2.3 From bd3a4795d5744f59a1f485379f1303e5e606f377 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Apr 2026 15:29:39 -0700 Subject: selftests: tls: add test for data loss on small pipe Add selftest for data loss on short splice. Link: https://patch.msgid.link/20260429222944.2139041-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 43 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9e2ccea13d70..30a236b8e9f7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -946,6 +946,49 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_to_pipe_small) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + size_t total = 0; + int p[2]; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + + /* Shrink pipe to 1 page (typically 4096 bytes) to force multiple + * splice iterations for a 16384-byte TLS record. + */ + EXPECT_GE(fcntl(p[1], F_SETPIPE_SZ, 4096), 4096); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + while (total < (size_t)send_len) { + ssize_t spliced, drained; + + spliced = splice(self->cfd, NULL, p[1], NULL, + send_len - total, 0); + EXPECT_GT(spliced, 0); + if (spliced <= 0) + break; + + drained = read(p[0], mem_recv + total, spliced); + EXPECT_EQ(drained, spliced); + if (drained <= 0) + break; + + total += drained; + } + + EXPECT_EQ(total, (size_t)send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + close(p[0]); + close(p[1]); +} + #define MAX_FRAGS 48 TEST_F(tls, splice_short) { -- cgit v1.2.3 From d3894e4e09085bc6450aae6e3d30d13f1b1c8691 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 1 May 2026 21:10:38 +0900 Subject: ntfs: fix variable dereferenced before check ni and attr in ntfs_attrlist_entry_add() Smatch warnings: ntfs_attrlist_entry_add() warn: variable dereferenced before check 'ni' ntfs_attrlist_entry_add() warn: variable dereferenced before check 'attr' Moves the ntfs_debug() call after the NULL pointer checks to ensure safe access to the structure members. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon --- fs/ntfs/attrlist.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ntfs/attrlist.c b/fs/ntfs/attrlist.c index bd501e8a628c..c2594d4c83b0 100644 --- a/fs/ntfs/attrlist.c +++ b/fs/ntfs/attrlist.c @@ -119,15 +119,14 @@ int ntfs_attrlist_entry_add(struct ntfs_inode *ni, struct attr_record *attr) struct mft_record *ni_mrec; u8 *old_al; - ntfs_debug("Entering for inode 0x%llx, attr 0x%x.\n", - (long long) ni->mft_no, - (unsigned int) le32_to_cpu(attr->type)); - if (!ni || !attr) { ntfs_debug("Invalid arguments.\n"); return -EINVAL; } + ntfs_debug("Entering for inode 0x%llx, attr 0x%x.\n", + ni->mft_no, (unsigned int) le32_to_cpu(attr->type)); + ni_mrec = map_mft_record(ni); if (IS_ERR(ni_mrec)) { ntfs_debug("Invalid arguments.\n"); -- cgit v1.2.3 From d9eeb0ea0d2de658663bfaa9c26eccdd8fd64440 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Mon, 13 Apr 2026 21:46:04 +0800 Subject: counter: Fix refcount leak in counter_alloc() error path After device_initialize(), the lifetime of the embedded struct device is expected to be managed through the device core reference counting. In counter_alloc(), if dev_set_name() fails after device_initialize(), the error path removes the chrdev, frees the ID, and frees the backing allocation directly instead of releasing the device reference with put_device(). This bypasses the normal device lifetime rules and may leave the reference count of the embedded struct device unbalanced, resulting in a refcount leak. The issue was identified by a static analysis tool I developed and confirmed by manual review. Fix this by using put_device() in the dev_set_name() failure path and let counter_device_release() handle the final cleanup. Fixes: 4da08477ea1f ("counter: Set counter device name") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Link: https://lore.kernel.org/r/20260413134604.2861772-1-lgs201920130244@gmail.com Signed-off-by: William Breathitt Gray --- drivers/counter/counter-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c index 50bd30ba3d03..0b1dac61b7b5 100644 --- a/drivers/counter/counter-core.c +++ b/drivers/counter/counter-core.c @@ -124,7 +124,8 @@ struct counter_device *counter_alloc(size_t sizeof_priv) err_dev_set_name: - counter_chrdev_remove(counter); + put_device(dev); + return NULL; err_chrdev_add: ida_free(&counter_ida, dev->id); -- cgit v1.2.3 From 0a69ac25bd596d50823d530d0a2004336668c0df Mon Sep 17 00:00:00 2001 From: Eliot Courtney Date: Fri, 1 May 2026 19:49:37 +0900 Subject: rust: drm: fix unsound initialization in drm::Device::new If pinned initialization of drm::Device::Data fails, it calls drm::Device::release via drm_dev_put. This materializes a reference to &drm::Device, but it's not fully constructed yet, because initializing `data` failed. It should not be dropped either. Instead, if pinned initialization fails, make sure drm::Device::release isn't called. Fixes: 2e9fdbe5ec7a ("rust: drm: device: drop_in_place() the drm::Device in release()") Signed-off-by: Eliot Courtney Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260501-fix-drm-1-v2-1-5c4f681837bc@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/drm/device.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index adbafe8db54d..403fc35353c7 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -119,13 +119,20 @@ impl Device { // compatible `Layout`. let layout = Kmalloc::aligned_layout(Layout::new::()); + // Use a temporary vtable without a `release` callback until `data` is initialized, so + // init failure can release the DRM device without dropping uninitialized fields. + let alloc_vtable = bindings::drm_driver { + release: None, + ..Self::VTABLE + }; + // SAFETY: - // - `VTABLE`, as a `const` is pinned to the read-only section of the compilation, + // - `alloc_vtable` reference remains valid until no longer used, // - `dev` is valid by its type invarants, let raw_drm: *mut Self = unsafe { bindings::__drm_dev_alloc( dev.as_raw(), - &Self::VTABLE, + &alloc_vtable, layout.size(), mem::offset_of!(Self, dev), ) @@ -133,6 +140,10 @@ impl Device { .cast(); let raw_drm = NonNull::new(from_err_ptr(raw_drm)?).ok_or(ENOMEM)?; + // SAFETY: `raw_drm` is a valid pointer to `Self`, given that `__drm_dev_alloc` was + // successful. + let drm_dev = unsafe { Self::into_drm_device(raw_drm) }; + // SAFETY: `raw_drm` is a valid pointer to `Self`. let raw_data = unsafe { ptr::addr_of_mut!((*raw_drm.as_ptr()).data) }; @@ -140,15 +151,14 @@ impl Device { // - `raw_data` is a valid pointer to uninitialized memory. // - `raw_data` will not move until it is dropped. unsafe { data.__pinned_init(raw_data) }.inspect_err(|_| { - // SAFETY: `raw_drm` is a valid pointer to `Self`, given that `__drm_dev_alloc` was - // successful. - let drm_dev = unsafe { Self::into_drm_device(raw_drm) }; - // SAFETY: `__drm_dev_alloc()` was successful, hence `drm_dev` must be valid and the // refcount must be non-zero. unsafe { bindings::drm_dev_put(drm_dev) }; })?; + // SAFETY: `drm_dev` is still private to this function. + unsafe { (*drm_dev).driver = const { &Self::VTABLE } }; + // SAFETY: The reference count is one, and now we take ownership of that reference as a // `drm::Device`. Ok(unsafe { ARef::from_raw(raw_drm) }) -- cgit v1.2.3 From b0aa5e4b087b686575f1b31ce54048b4d059b7b8 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Fri, 17 Apr 2026 13:32:07 +0300 Subject: sh: Fix fallout from ZERO_PAGE consolidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidation of empty_zero_page declarations broke boot on sh. sh stores its initial boot parameters in a page reserved in arch/sh/kernel/head_32.S. Before commit 6215d9f4470f ("arch, mm: consolidate empty_zero_page") this page was referenced in C code as an array and after that commit it is referenced as a pointer. This causes wrong code generation and boot hang. Declare boot_params_page as an array to fix the issue. Reported-by: Thomas Weißschuh Tested-by: Thomas Weißschuh Fixes: 6215d9f4470f ("arch, mm: consolidate empty_zero_page") Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: John Paul Adrian Glaubitz Tested-by: Geert Uytterhoeven Tested-by: Artur Rojek Signed-off-by: John Paul Adrian Glaubitz --- arch/sh/include/asm/setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h index 63c9efc06348..8488f76b48b4 100644 --- a/arch/sh/include/asm/setup.h +++ b/arch/sh/include/asm/setup.h @@ -7,7 +7,7 @@ /* * This is set up by the setup-routine at boot-time */ -extern unsigned char *boot_params_page; +extern unsigned char boot_params_page[]; #define PARAM boot_params_page #define MOUNT_ROOT_RDONLY (*(unsigned long *) (PARAM+0x000)) -- cgit v1.2.3 From fd672888cccd6b855154efe0ac78e7ce3e8ab088 Mon Sep 17 00:00:00 2001 From: Yongxing Mou Date: Mon, 27 Apr 2026 14:35:19 +0800 Subject: phy: qcom: edp: Unify generic DP/eDP swing and pre-emphasis tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current eDP and DP swing/pre-emphasis tables do not match the HPG requirements for the supported platforms, correct the table accordingly. The generic tables which can be shared as follows: DP mode: -sa8775p/sc7280/sc8280xp/x1e80100 -glymur -sc8180x eDP mode(low vdiff): -glymur/sa8775p/sc8280xp/x1e80100 -sc7280 -sc8180x The proper tables for SC8180X and SC7280 will be added in a later patch, since they need separate table. Cc: stable@vger.kernel.org Fixes: f199223cb490 ("phy: qcom: Introduce new eDP PHY driver") Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Yongxing Mou Link: https://patch.msgid.link/20260427-edp_phy-v5-1-3bb876824475@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-edp.c | 41 +++++++++---------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-edp.c b/drivers/phy/qualcomm/phy-qcom-edp.c index 7372de05a0b8..2af3fd63832f 100644 --- a/drivers/phy/qualcomm/phy-qcom-edp.c +++ b/drivers/phy/qualcomm/phy-qcom-edp.c @@ -116,17 +116,17 @@ struct qcom_edp { }; static const u8 dp_swing_hbr_rbr[4][4] = { - { 0x08, 0x0f, 0x16, 0x1f }, + { 0x07, 0x0f, 0x16, 0x1f }, { 0x11, 0x1e, 0x1f, 0xff }, { 0x16, 0x1f, 0xff, 0xff }, { 0x1f, 0xff, 0xff, 0xff } }; static const u8 dp_pre_emp_hbr_rbr[4][4] = { - { 0x00, 0x0d, 0x14, 0x1a }, + { 0x00, 0x0e, 0x15, 0x1a }, { 0x00, 0x0e, 0x15, 0xff }, { 0x00, 0x0e, 0xff, 0xff }, - { 0x03, 0xff, 0xff, 0xff } + { 0x04, 0xff, 0xff, 0xff } }; static const u8 dp_swing_hbr2_hbr3[4][4] = { @@ -158,7 +158,7 @@ static const u8 edp_swing_hbr_rbr[4][4] = { }; static const u8 edp_pre_emp_hbr_rbr[4][4] = { - { 0x05, 0x12, 0x17, 0x1d }, + { 0x05, 0x11, 0x17, 0x1d }, { 0x05, 0x11, 0x18, 0xff }, { 0x06, 0x11, 0xff, 0xff }, { 0x00, 0xff, 0xff, 0xff } @@ -172,10 +172,10 @@ static const u8 edp_swing_hbr2_hbr3[4][4] = { }; static const u8 edp_pre_emp_hbr2_hbr3[4][4] = { - { 0x08, 0x11, 0x17, 0x1b }, - { 0x00, 0x0c, 0x13, 0xff }, - { 0x05, 0x10, 0xff, 0xff }, - { 0x00, 0xff, 0xff, 0xff } + { 0x0c, 0x15, 0x19, 0x1e }, + { 0x0b, 0x15, 0x19, 0xff }, + { 0x0e, 0x14, 0xff, 0xff }, + { 0x0d, 0xff, 0xff, 0xff } }; static const struct qcom_edp_swing_pre_emph_cfg edp_phy_swing_pre_emph_cfg = { @@ -193,27 +193,6 @@ static const u8 edp_phy_vco_div_cfg_v4[4] = { 0x01, 0x01, 0x02, 0x00, }; -static const u8 edp_pre_emp_hbr_rbr_v5[4][4] = { - { 0x05, 0x11, 0x17, 0x1d }, - { 0x05, 0x11, 0x18, 0xff }, - { 0x06, 0x11, 0xff, 0xff }, - { 0x00, 0xff, 0xff, 0xff } -}; - -static const u8 edp_pre_emp_hbr2_hbr3_v5[4][4] = { - { 0x0c, 0x15, 0x19, 0x1e }, - { 0x0b, 0x15, 0x19, 0xff }, - { 0x0e, 0x14, 0xff, 0xff }, - { 0x0d, 0xff, 0xff, 0xff } -}; - -static const struct qcom_edp_swing_pre_emph_cfg edp_phy_swing_pre_emph_cfg_v5 = { - .swing_hbr_rbr = &edp_swing_hbr_rbr, - .swing_hbr3_hbr2 = &edp_swing_hbr2_hbr3, - .pre_emphasis_hbr_rbr = &edp_pre_emp_hbr_rbr_v5, - .pre_emphasis_hbr3_hbr2 = &edp_pre_emp_hbr2_hbr3_v5, -}; - static const u8 edp_phy_aux_cfg_v5[DP_AUX_CFG_SIZE] = { 0x00, 0x13, 0xa4, 0x00, 0x0a, 0x26, 0x0a, 0x03, 0x37, 0x03, 0x02, 0x02, 0x00, }; @@ -564,7 +543,7 @@ static const struct qcom_edp_phy_cfg sa8775p_dp_phy_cfg = { .is_edp = false, .aux_cfg = edp_phy_aux_cfg_v5, .vco_div_cfg = edp_phy_vco_div_cfg_v4, - .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg_v5, + .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v4, }; @@ -945,7 +924,7 @@ static const struct phy_ver_ops qcom_edp_phy_ops_v8 = { static struct qcom_edp_phy_cfg glymur_phy_cfg = { .aux_cfg = edp_phy_aux_cfg_v8, .vco_div_cfg = edp_phy_vco_div_cfg_v8, - .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg_v5, + .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v8, }; -- cgit v1.2.3 From 3011c365a329cf2db6d55e8d684550dc88350436 Mon Sep 17 00:00:00 2001 From: Yongxing Mou Date: Mon, 27 Apr 2026 14:35:20 +0800 Subject: phy: qcom: edp: Add eDP/DP mode switch support The eDP PHY supports both eDP/DP modes, each requiring a different swing/pre-emphasis table. However, the driver currently uses a fixed static table for eDP programming rather than selecting the appropriate table based on the current mode. Add separate tables for eDP and DP modes, and select the appropriate table dynamically based on the current mode. Glymur's DP mode table differs from the other platforms, add a dedicated table for it. This also fixes the table mismatch for X1E80100 (eDP) and SA8775P (DP). Cc: stable@vger.kernel.org Fixes: 3f12bf16213c ("phy: qcom: edp: Add support for eDP PHY on SA8775P") Reviewed-by: Konrad Dybcio Signed-off-by: Yongxing Mou Link: https://patch.msgid.link/20260427-edp_phy-v5-2-3bb876824475@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-edp.c | 46 +++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-edp.c b/drivers/phy/qualcomm/phy-qcom-edp.c index 2af3fd63832f..3266026cfe37 100644 --- a/drivers/phy/qualcomm/phy-qcom-edp.c +++ b/drivers/phy/qualcomm/phy-qcom-edp.c @@ -87,7 +87,8 @@ struct qcom_edp_phy_cfg { bool is_edp; const u8 *aux_cfg; const u8 *vco_div_cfg; - const struct qcom_edp_swing_pre_emph_cfg *swing_pre_emph_cfg; + const struct qcom_edp_swing_pre_emph_cfg *dp_swing_pre_emph_cfg; + const struct qcom_edp_swing_pre_emph_cfg *edp_swing_pre_emph_cfg; const struct phy_ver_ops *ver_ops; }; @@ -150,6 +151,20 @@ static const struct qcom_edp_swing_pre_emph_cfg dp_phy_swing_pre_emph_cfg = { .pre_emphasis_hbr3_hbr2 = &dp_pre_emp_hbr2_hbr3, }; +static const u8 dp_pre_emp_hbr_rbr_v8[4][4] = { + { 0x00, 0x0e, 0x15, 0x1a }, + { 0x00, 0x0e, 0x15, 0xff }, + { 0x00, 0x0e, 0xff, 0xff }, + { 0x00, 0xff, 0xff, 0xff } +}; + +static const struct qcom_edp_swing_pre_emph_cfg dp_phy_swing_pre_emph_cfg_v8 = { + .swing_hbr_rbr = &dp_swing_hbr_rbr, + .swing_hbr3_hbr2 = &dp_swing_hbr2_hbr3, + .pre_emphasis_hbr_rbr = &dp_pre_emp_hbr_rbr_v8, + .pre_emphasis_hbr3_hbr2 = &dp_pre_emp_hbr2_hbr3, +}; + static const u8 edp_swing_hbr_rbr[4][4] = { { 0x07, 0x0f, 0x16, 0x1f }, { 0x0d, 0x16, 0x1e, 0xff }, @@ -246,7 +261,7 @@ static int qcom_edp_phy_init(struct phy *phy) * when more information becomes available about why this is * even needed. */ - if (edp->cfg->swing_pre_emph_cfg && !edp->is_edp) + if (edp->cfg->dp_swing_pre_emph_cfg && !edp->is_edp) aux_cfg[8] = 0xb7; writel(0xfc, edp->edp + DP_PHY_MODE); @@ -270,7 +285,7 @@ out_disable_supplies: static int qcom_edp_set_voltages(struct qcom_edp *edp, const struct phy_configure_opts_dp *dp_opts) { - const struct qcom_edp_swing_pre_emph_cfg *cfg = edp->cfg->swing_pre_emph_cfg; + const struct qcom_edp_swing_pre_emph_cfg *cfg; unsigned int v_level = 0; unsigned int p_level = 0; u8 ldo_config; @@ -278,12 +293,14 @@ static int qcom_edp_set_voltages(struct qcom_edp *edp, const struct phy_configur u8 emph; int i; + if (edp->is_edp) + cfg = edp->cfg->edp_swing_pre_emph_cfg; + else + cfg = edp->cfg->dp_swing_pre_emph_cfg; + if (!cfg) return 0; - if (edp->is_edp) - cfg = &edp_phy_swing_pre_emph_cfg; - for (i = 0; i < dp_opts->lanes; i++) { v_level = max(v_level, dp_opts->voltage[i]); p_level = max(p_level, dp_opts->pre[i]); @@ -543,7 +560,8 @@ static const struct qcom_edp_phy_cfg sa8775p_dp_phy_cfg = { .is_edp = false, .aux_cfg = edp_phy_aux_cfg_v5, .vco_div_cfg = edp_phy_vco_div_cfg_v4, - .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v4, }; @@ -556,7 +574,8 @@ static const struct qcom_edp_phy_cfg sc7280_dp_phy_cfg = { static const struct qcom_edp_phy_cfg sc8280xp_dp_phy_cfg = { .aux_cfg = edp_phy_aux_cfg_v4, .vco_div_cfg = edp_phy_vco_div_cfg_v4, - .swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v4, }; @@ -564,7 +583,8 @@ static const struct qcom_edp_phy_cfg sc8280xp_edp_phy_cfg = { .is_edp = true, .aux_cfg = edp_phy_aux_cfg_v4, .vco_div_cfg = edp_phy_vco_div_cfg_v4, - .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v4, }; @@ -745,7 +765,8 @@ static const struct phy_ver_ops qcom_edp_phy_ops_v6 = { static struct qcom_edp_phy_cfg x1e80100_phy_cfg = { .aux_cfg = edp_phy_aux_cfg_v4, .vco_div_cfg = edp_phy_vco_div_cfg_v4, - .swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v6, }; @@ -924,7 +945,8 @@ static const struct phy_ver_ops qcom_edp_phy_ops_v8 = { static struct qcom_edp_phy_cfg glymur_phy_cfg = { .aux_cfg = edp_phy_aux_cfg_v8, .vco_div_cfg = edp_phy_vco_div_cfg_v8, - .swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg_v8, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg, .ver_ops = &qcom_edp_phy_ops_v8, }; @@ -942,7 +964,7 @@ static int qcom_edp_phy_power_on(struct phy *phy) if (ret) return ret; - if (edp->cfg->swing_pre_emph_cfg && !edp->is_edp) + if (edp->cfg->edp_swing_pre_emph_cfg && !edp->is_edp) ldo_config = 0x1; writel(ldo_config, edp->tx0 + TXn_LDO_CONFIG); -- cgit v1.2.3 From 3d22594d6f842814b7718600486fe3ce9453abf0 Mon Sep 17 00:00:00 2001 From: Yongxing Mou Date: Mon, 27 Apr 2026 14:35:21 +0800 Subject: phy: qcom: edp: Add SC7280/SC8180X swing/pre-emphasis tables SC7280 and SC8180X previously shared the same cfg because they did not use swing/pre-emphasis tables. Add the corresponding tables for these platforms. Since they have different PHY sub-versions, their eDP/DP mode tables also differ, so move SC8180X to its own cfg instead of reusing the SC7280 one. Signed-off-by: Yongxing Mou Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260427-edp_phy-v5-3-3bb876824475@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-edp.c | 84 +++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-edp.c b/drivers/phy/qualcomm/phy-qcom-edp.c index 3266026cfe37..3e613b374032 100644 --- a/drivers/phy/qualcomm/phy-qcom-edp.c +++ b/drivers/phy/qualcomm/phy-qcom-edp.c @@ -165,6 +165,33 @@ static const struct qcom_edp_swing_pre_emph_cfg dp_phy_swing_pre_emph_cfg_v8 = { .pre_emphasis_hbr3_hbr2 = &dp_pre_emp_hbr2_hbr3, }; +static const u8 dp_swing_hbr2_hbr3_v2[4][4] = { + { 0x27, 0x2f, 0x36, 0xff }, + { 0x31, 0x3e, 0x3f, 0xff }, + { 0x3a, 0x3f, 0xff, 0xff }, + { 0xff, 0xff, 0xff, 0xff } +}; + +static const u8 dp_pre_emp_hbr2_hbr3_v2[4][4] = { + { 0x20, 0x2e, 0x35, 0xff }, + { 0x20, 0x2e, 0x35, 0xff }, + { 0x20, 0x2e, 0xff, 0xff }, + { 0xff, 0xff, 0xff, 0xff } +}; + +static const struct qcom_edp_swing_pre_emph_cfg dp_phy_swing_pre_emph_cfg_v2 = { + /* + * NOTE: The HPG does not specify a separate swing_hbr_rbr table. + * Reuse the HBR2/HBR3 table for now. + * + * TODO: Update this once the HPG explicitly defines RBR/HBR swing values. + */ + .swing_hbr_rbr = &dp_swing_hbr2_hbr3_v2, + .swing_hbr3_hbr2 = &dp_swing_hbr2_hbr3_v2, + .pre_emphasis_hbr_rbr = &dp_pre_emp_hbr2_hbr3_v2, + .pre_emphasis_hbr3_hbr2 = &dp_pre_emp_hbr2_hbr3_v2, +}; + static const u8 edp_swing_hbr_rbr[4][4] = { { 0x07, 0x0f, 0x16, 0x1f }, { 0x0d, 0x16, 0x1e, 0xff }, @@ -208,6 +235,48 @@ static const u8 edp_phy_vco_div_cfg_v4[4] = { 0x01, 0x01, 0x02, 0x00, }; +static const u8 edp_pre_emp_hbr_rbr_v2[4][4] = { + { 0x05, 0x12, 0x17, 0x1d }, + { 0x05, 0x11, 0x18, 0xff }, + { 0x06, 0x11, 0xff, 0xff }, + { 0x00, 0xff, 0xff, 0xff } +}; + +static const u8 edp_pre_emp_hbr2_hbr3_v2[4][4] = { + { 0x0c, 0x15, 0x19, 0x1e }, + { 0x08, 0x15, 0x19, 0xff }, + { 0x0e, 0x14, 0xff, 0xff }, + { 0x0d, 0xff, 0xff, 0xff } +}; + +static const struct qcom_edp_swing_pre_emph_cfg edp_phy_swing_pre_emph_cfg_v2 = { + .swing_hbr_rbr = &edp_swing_hbr_rbr, + .swing_hbr3_hbr2 = &edp_swing_hbr2_hbr3, + .pre_emphasis_hbr_rbr = &edp_pre_emp_hbr_rbr_v2, + .pre_emphasis_hbr3_hbr2 = &edp_pre_emp_hbr2_hbr3_v2, +}; + +static const u8 edp_swing_hbr2_hbr3_v3[4][4] = { + { 0x06, 0x11, 0x16, 0x1b }, + { 0x0b, 0x19, 0x1f, 0xff }, + { 0x18, 0x1f, 0xff, 0xff }, + { 0x1f, 0xff, 0xff, 0xff } +}; + +static const u8 edp_pre_emp_hbr2_hbr3_v3[4][4] = { + { 0x0c, 0x15, 0x19, 0x1e }, + { 0x09, 0x14, 0x19, 0xff }, + { 0x0f, 0x14, 0xff, 0xff }, + { 0x0d, 0xff, 0xff, 0xff } +}; + +static const struct qcom_edp_swing_pre_emph_cfg edp_phy_swing_pre_emph_cfg_v3 = { + .swing_hbr_rbr = &edp_swing_hbr_rbr, + .swing_hbr3_hbr2 = &edp_swing_hbr2_hbr3_v3, + .pre_emphasis_hbr_rbr = &edp_pre_emp_hbr_rbr, + .pre_emphasis_hbr3_hbr2 = &edp_pre_emp_hbr2_hbr3_v3, +}; + static const u8 edp_phy_aux_cfg_v5[DP_AUX_CFG_SIZE] = { 0x00, 0x13, 0xa4, 0x00, 0x0a, 0x26, 0x0a, 0x03, 0x37, 0x03, 0x02, 0x02, 0x00, }; @@ -298,9 +367,6 @@ static int qcom_edp_set_voltages(struct qcom_edp *edp, const struct phy_configur else cfg = edp->cfg->dp_swing_pre_emph_cfg; - if (!cfg) - return 0; - for (i = 0; i < dp_opts->lanes; i++) { v_level = max(v_level, dp_opts->voltage[i]); p_level = max(p_level, dp_opts->pre[i]); @@ -568,6 +634,16 @@ static const struct qcom_edp_phy_cfg sa8775p_dp_phy_cfg = { static const struct qcom_edp_phy_cfg sc7280_dp_phy_cfg = { .aux_cfg = edp_phy_aux_cfg_v4, .vco_div_cfg = edp_phy_vco_div_cfg_v4, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg_v3, + .ver_ops = &qcom_edp_phy_ops_v4, +}; + +static const struct qcom_edp_phy_cfg sc8180x_dp_phy_cfg = { + .aux_cfg = edp_phy_aux_cfg_v4, + .vco_div_cfg = edp_phy_vco_div_cfg_v4, + .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg_v2, + .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg_v2, .ver_ops = &qcom_edp_phy_ops_v4, }; @@ -1348,7 +1424,7 @@ static const struct of_device_id qcom_edp_phy_match_table[] = { { .compatible = "qcom,glymur-dp-phy", .data = &glymur_phy_cfg, }, { .compatible = "qcom,sa8775p-edp-phy", .data = &sa8775p_dp_phy_cfg, }, { .compatible = "qcom,sc7280-edp-phy", .data = &sc7280_dp_phy_cfg, }, - { .compatible = "qcom,sc8180x-edp-phy", .data = &sc7280_dp_phy_cfg, }, + { .compatible = "qcom,sc8180x-edp-phy", .data = &sc8180x_dp_phy_cfg, }, { .compatible = "qcom,sc8280xp-dp-phy", .data = &sc8280xp_dp_phy_cfg, }, { .compatible = "qcom,sc8280xp-edp-phy", .data = &sc8280xp_edp_phy_cfg, }, { .compatible = "qcom,x1e80100-dp-phy", .data = &x1e80100_phy_cfg, }, -- cgit v1.2.3 From bf237a9fcbbf9d658522f7315ffc04bf2d49be42 Mon Sep 17 00:00:00 2001 From: Yongxing Mou Date: Mon, 27 Apr 2026 14:35:22 +0800 Subject: phy: qcom: edp: Fix AUX_CFG8 programming for DP mode AUX_CFG8 depends on whether the PHY is operating in eDP or DP mode, not the selected swing/pre-emphasis table. All supported platforms already have the proper tables, so remove the unnecessary check. Cc: stable@vger.kernel.org Fixes: 6078b8ce070c ("phy: qcom: edp: Add set_mode op for configuring eDP/DP submode") Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Yongxing Mou Link: https://patch.msgid.link/20260427-edp_phy-v5-4-3bb876824475@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-edp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-edp.c b/drivers/phy/qualcomm/phy-qcom-edp.c index 3e613b374032..3a848f18a8d6 100644 --- a/drivers/phy/qualcomm/phy-qcom-edp.c +++ b/drivers/phy/qualcomm/phy-qcom-edp.c @@ -325,12 +325,7 @@ static int qcom_edp_phy_init(struct phy *phy) DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN, edp->edp + DP_PHY_PD_CTL); - /* - * TODO: Re-work the conditions around setting the cfg8 value - * when more information becomes available about why this is - * even needed. - */ - if (edp->cfg->dp_swing_pre_emph_cfg && !edp->is_edp) + if (!edp->is_edp) aux_cfg[8] = 0xb7; writel(0xfc, edp->edp + DP_PHY_MODE); -- cgit v1.2.3 From 519a228ee40d1be3453d1da339b4577c3785e333 Mon Sep 17 00:00:00 2001 From: Yongxing Mou Date: Mon, 27 Apr 2026 14:35:23 +0800 Subject: phy: qcom: edp: Add PHY-specific LDO config for eDP low vdiff For eDP low vdiff, the LDO setting depends on the PHY version rather than being a simple 0x0 or 0x1 value. Introduce a PHY callback to program the correct LDO setting according to the HPG. Since SC7280/SC8180X uses different LDO settings from SA8775P/SC8280XP, introduce qcom_edp_phy_ops_v3 to keep the LDO setting correct. Cc: stable@vger.kernel.org Fixes: f199223cb490 ("phy: qcom: Introduce new eDP PHY driver") Signed-off-by: Yongxing Mou Reviewed-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Tested-by: Konrad Dybcio # SC8280XP X13s Link: https://patch.msgid.link/20260427-edp_phy-v5-5-3bb876824475@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-edp.c | 88 ++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-edp.c b/drivers/phy/qualcomm/phy-qcom-edp.c index 3a848f18a8d6..a3c893f72908 100644 --- a/drivers/phy/qualcomm/phy-qcom-edp.c +++ b/drivers/phy/qualcomm/phy-qcom-edp.c @@ -81,6 +81,7 @@ struct phy_ver_ops { int (*com_clk_fwd_cfg)(const struct qcom_edp *edp); int (*com_configure_pll)(const struct qcom_edp *edp); int (*com_configure_ssc)(const struct qcom_edp *edp); + int (*com_ldo_config)(const struct qcom_edp *edp); }; struct qcom_edp_phy_cfg { @@ -352,7 +353,7 @@ static int qcom_edp_set_voltages(struct qcom_edp *edp, const struct phy_configur const struct qcom_edp_swing_pre_emph_cfg *cfg; unsigned int v_level = 0; unsigned int p_level = 0; - u8 ldo_config; + int ret; u8 swing; u8 emph; int i; @@ -378,13 +379,13 @@ static int qcom_edp_set_voltages(struct qcom_edp *edp, const struct phy_configur if (swing == 0xff || emph == 0xff) return -EINVAL; - ldo_config = edp->is_edp ? 0x0 : 0x1; + ret = edp->cfg->ver_ops->com_ldo_config(edp); + if (ret) + return ret; - writel(ldo_config, edp->tx0 + TXn_LDO_CONFIG); writel(swing, edp->tx0 + TXn_TX_DRV_LVL); writel(emph, edp->tx0 + TXn_TX_EMP_POST1_LVL); - writel(ldo_config, edp->tx1 + TXn_LDO_CONFIG); writel(swing, edp->tx1 + TXn_TX_DRV_LVL); writel(emph, edp->tx1 + TXn_TX_EMP_POST1_LVL); @@ -608,6 +609,52 @@ static int qcom_edp_com_configure_pll_v4(const struct qcom_edp *edp) return 0; } +static int qcom_edp_ldo_config_v3(const struct qcom_edp *edp) +{ + const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts; + u32 ldo_config; + + if (!edp->is_edp) + ldo_config = 0x0; + else if (dp_opts->link_rate <= 2700) + ldo_config = 0x81; + else + ldo_config = 0x41; + + writel(ldo_config, edp->tx0 + TXn_LDO_CONFIG); + writel(dp_opts->lanes > 2 ? ldo_config : 0x00, edp->tx1 + TXn_LDO_CONFIG); + + return 0; +} + +static int qcom_edp_ldo_config_v4(const struct qcom_edp *edp) +{ + const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts; + u32 ldo_config; + + if (!edp->is_edp) + ldo_config = 0x0; + else if (dp_opts->link_rate <= 2700) + ldo_config = 0xc1; + else + ldo_config = 0x81; + + writel(ldo_config, edp->tx0 + TXn_LDO_CONFIG); + writel(dp_opts->lanes > 2 ? ldo_config : 0x00, edp->tx1 + TXn_LDO_CONFIG); + + return 0; +} + +static const struct phy_ver_ops qcom_edp_phy_ops_v3 = { + .com_power_on = qcom_edp_phy_power_on_v4, + .com_resetsm_cntrl = qcom_edp_phy_com_resetsm_cntrl_v4, + .com_bias_en_clkbuflr = qcom_edp_com_bias_en_clkbuflr_v4, + .com_clk_fwd_cfg = qcom_edp_com_clk_fwd_cfg_v4, + .com_configure_pll = qcom_edp_com_configure_pll_v4, + .com_configure_ssc = qcom_edp_com_configure_ssc_v4, + .com_ldo_config = qcom_edp_ldo_config_v3, +}; + static const struct phy_ver_ops qcom_edp_phy_ops_v4 = { .com_power_on = qcom_edp_phy_power_on_v4, .com_resetsm_cntrl = qcom_edp_phy_com_resetsm_cntrl_v4, @@ -615,6 +662,7 @@ static const struct phy_ver_ops qcom_edp_phy_ops_v4 = { .com_clk_fwd_cfg = qcom_edp_com_clk_fwd_cfg_v4, .com_configure_pll = qcom_edp_com_configure_pll_v4, .com_configure_ssc = qcom_edp_com_configure_ssc_v4, + .com_ldo_config = qcom_edp_ldo_config_v4, }; static const struct qcom_edp_phy_cfg sa8775p_dp_phy_cfg = { @@ -631,7 +679,7 @@ static const struct qcom_edp_phy_cfg sc7280_dp_phy_cfg = { .vco_div_cfg = edp_phy_vco_div_cfg_v4, .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg, .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg_v3, - .ver_ops = &qcom_edp_phy_ops_v4, + .ver_ops = &qcom_edp_phy_ops_v3, }; static const struct qcom_edp_phy_cfg sc8180x_dp_phy_cfg = { @@ -639,7 +687,7 @@ static const struct qcom_edp_phy_cfg sc8180x_dp_phy_cfg = { .vco_div_cfg = edp_phy_vco_div_cfg_v4, .dp_swing_pre_emph_cfg = &dp_phy_swing_pre_emph_cfg_v2, .edp_swing_pre_emph_cfg = &edp_phy_swing_pre_emph_cfg_v2, - .ver_ops = &qcom_edp_phy_ops_v4, + .ver_ops = &qcom_edp_phy_ops_v3, }; static const struct qcom_edp_phy_cfg sc8280xp_dp_phy_cfg = { @@ -824,6 +872,24 @@ static int qcom_edp_com_configure_pll_v6(const struct qcom_edp *edp) return 0; } +static int qcom_edp_ldo_config_v6(const struct qcom_edp *edp) +{ + const struct phy_configure_opts_dp *dp_opts = &edp->dp_opts; + u32 ldo_config; + + if (!edp->is_edp) + ldo_config = 0x0; + else if (dp_opts->link_rate <= 2700) + ldo_config = 0x51; + else + ldo_config = 0x91; + + writel(ldo_config, edp->tx0 + TXn_LDO_CONFIG); + writel(dp_opts->lanes > 2 ? ldo_config : 0x00, edp->tx1 + TXn_LDO_CONFIG); + + return 0; +} + static const struct phy_ver_ops qcom_edp_phy_ops_v6 = { .com_power_on = qcom_edp_phy_power_on_v6, .com_resetsm_cntrl = qcom_edp_phy_com_resetsm_cntrl_v6, @@ -831,6 +897,7 @@ static const struct phy_ver_ops qcom_edp_phy_ops_v6 = { .com_clk_fwd_cfg = qcom_edp_com_clk_fwd_cfg_v4, .com_configure_pll = qcom_edp_com_configure_pll_v6, .com_configure_ssc = qcom_edp_com_configure_ssc_v6, + .com_ldo_config = qcom_edp_ldo_config_v6, }; static struct qcom_edp_phy_cfg x1e80100_phy_cfg = { @@ -1011,6 +1078,7 @@ static const struct phy_ver_ops qcom_edp_phy_ops_v8 = { .com_clk_fwd_cfg = qcom_edp_com_clk_fwd_cfg_v8, .com_configure_pll = qcom_edp_com_configure_pll_v8, .com_configure_ssc = qcom_edp_com_configure_ssc_v8, + .com_ldo_config = qcom_edp_ldo_config_v6, }; static struct qcom_edp_phy_cfg glymur_phy_cfg = { @@ -1026,7 +1094,6 @@ static int qcom_edp_phy_power_on(struct phy *phy) const struct qcom_edp *edp = phy_get_drvdata(phy); u32 bias0_en, drvr0_en, bias1_en, drvr1_en; unsigned long pixel_freq; - u8 ldo_config = 0x0; int ret; u32 val; u8 cfg1; @@ -1035,11 +1102,10 @@ static int qcom_edp_phy_power_on(struct phy *phy) if (ret) return ret; - if (edp->cfg->edp_swing_pre_emph_cfg && !edp->is_edp) - ldo_config = 0x1; + ret = edp->cfg->ver_ops->com_ldo_config(edp); + if (ret) + return ret; - writel(ldo_config, edp->tx0 + TXn_LDO_CONFIG); - writel(ldo_config, edp->tx1 + TXn_LDO_CONFIG); writel(0x00, edp->tx0 + TXn_LANE_MODE_1); writel(0x00, edp->tx1 + TXn_LANE_MODE_1); -- cgit v1.2.3 From 464af6fc2b1dcc74005b7f58ee3812b17777efee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Apr 2026 14:25:40 +0200 Subject: KVM: x86: check for nEPT/nNPT in slow flush hypercalls Checking is_guest_mode(vcpu) is incorrect, because translate_nested_gpa() is only valid if an L2 guest is running *with nested EPT/NPT enabled*. Instead use the same condition as translate_nested_gpa() itself. Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Fixes: aee738236dca ("KVM: x86: Prepare kvm_hv_flush_tlb() to handle L2's GPAs", 2022-11-18) Link: https://patch.msgid.link/20260503200905.106077-1-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 9b140bbdc1d8..4438ecac9a89 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2040,7 +2040,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) * flush). Translate the address here so the memory can be uniformly * read with kvm_read_guest(). */ - if (!hc->fast && is_guest_mode(vcpu)) { + if (!hc->fast && mmu_is_nested(vcpu)) { hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL); if (unlikely(hc->ingpa == INVALID_GPA)) return HV_STATUS_INVALID_HYPERCALL_INPUT; -- cgit v1.2.3 From 33fd0ccd2590b470b65adcca288615ad3b5e3e06 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 3 May 2026 19:19:32 +0200 Subject: KVM: x86: Do IRR scan in __kvm_apic_update_irr even if PIR is empty Fall back to apic_find_highest_vector() when PID.ON is set but PIR turns out to be empty, to correctly report the highest pending interrupt from the existing IRR. In a nested VM stress test, the following WARNING fires in vmx_check_nested_events() when kvm_cpu_has_interrupt() reports a pending interrupt but the subsequent kvm_apic_has_interrupt() (which invokes vmx_sync_pir_to_irr() again) returns -1: WARNING: CPU: 99 PID: 57767 at arch/x86/kvm/vmx/nested.c:4449 vmx_check_nested_events+0x6bf/0x6e0 [kvm_intel] Call Trace: kvm_check_and_inject_events vcpu_enter_guest.constprop.0 vcpu_run kvm_arch_vcpu_ioctl_run kvm_vcpu_ioctl __x64_sys_ioctl do_syscall_64 entry_SYSCALL_64_after_hwframe The root cause is a race between vmx_sync_pir_to_irr() on the target vCPU and __vmx_deliver_posted_interrupt() on a sender vCPU. The sender performs two individually-atomic operations that are not a single transaction: 1. pi_test_and_set_pir(vector) -- sets the PIR bit 2. pi_test_and_set_on() -- sets PID.ON The following interleaving triggers the bug: Sender vCPU (IPI): Target vCPU (1st sync_pir_to_irr): B1: set PIR[vector] A1: pi_clear_on() A2: pi_harvest_pir() -> sees B1 bit A3: xchg() -> consumes bit, PIR=0 (1st sync returns correct max_irr) B2: set PID.ON = 1 Target vCPU (2nd sync_pir_to_irr): C1: pi_test_on() -> TRUE (from B2) C2: pi_clear_on() -> ON=0 C3: pi_harvest_pir() -> PIR empty C4: *max_irr = -1, early return IRR NOT SCANNED The interrupt is not lost (it resides in the IRR from the first sync and is recovered on the next vcpu_enter_guest() iteration), but the incorrect max_irr causes a spurious WARNING and a wasted L2 VM-Enter/VM-Exit cycle. Fixes: b41f8638b9d3 ("KVM: VMX: Isolate pure loads from atomic XCHG when processing PIR") Reported-by: Farrah Chen Analyzed-by: Chenyi Qiang Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/kvm/20260428070349.1633238-1-chenyi.qiang@intel.com/T/ Link: https://patch.msgid.link/20260503201703.108231-2-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e3ec4d8607c1..5ee14d6bc288 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -669,12 +669,14 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) u32 irr_val, prev_irr_val; int max_updated_irr; + if (!pi_harvest_pir(pir, pir_vals)) { + *max_irr = apic_find_highest_vector(regs + APIC_IRR); + return false; + } + max_updated_irr = -1; *max_irr = -1; - if (!pi_harvest_pir(pir, pir_vals)) - return false; - for (i = vec = 0; i <= 7; i++, vec += 32) { u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10); -- cgit v1.2.3 From 0aec99f9bf0213f7910688472e1ccf517c0e8b5a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 28 Apr 2026 08:50:43 -0700 Subject: KVM: x86: Fix misleading variable names and add more comments for PIR=>IRR flow Rename kvm_apic_update_irr()'s "irr_updated" and vmx_sync_pir_to_irr()'s "got_posted_interrupt" to a more accurate "max_irr_is_from_pir", as neither "irr_updated" nor "got_posted_interrupt" is accurate. __kvm_apic_update_irr() and thus kvm_apic_update_irr() specifically return true if and only if the highest priority IRQ, i.e. max_irr, is a "new" pending IRQ from the PIR. I.e. it's possible for the IRR to be updated, i.e. for a posted IRQ to be "got", *without* the APIs returning true. Expand vmx_sync_pir_to_irr()'s comment to explain why it's necessary to set KVM_REQ_EVENT only if a "new" IRQ was found, and to explain why it's safe to do so only if a new IRQ is also the highest priority pending IRQ. No functional change intended. Signed-off-by: Sean Christopherson Link: https://patch.msgid.link/20260503201703.108231-3-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 16 ++++++++-------- arch/x86/kvm/vmx/vmx.c | 40 ++++++++++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5ee14d6bc288..4078e624ca66 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -667,14 +667,14 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) u32 *__pir = (void *)pir_vals; u32 i, vec; u32 irr_val, prev_irr_val; - int max_updated_irr; + int max_new_irr; if (!pi_harvest_pir(pir, pir_vals)) { *max_irr = apic_find_highest_vector(regs + APIC_IRR); return false; } - max_updated_irr = -1; + max_new_irr = -1; *max_irr = -1; for (i = vec = 0; i <= 7; i++, vec += 32) { @@ -690,25 +690,25 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) !try_cmpxchg(p_irr, &prev_irr_val, irr_val)); if (prev_irr_val != irr_val) - max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec; + max_new_irr = __fls(irr_val ^ prev_irr_val) + vec; } if (irr_val) *max_irr = __fls(irr_val) + vec; } - return ((max_updated_irr != -1) && - (max_updated_irr == *max_irr)); + return max_new_irr != -1 && max_new_irr == *max_irr; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_apic_update_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr) { struct kvm_lapic *apic = vcpu->arch.apic; - bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr); + bool max_irr_is_from_pir; - if (unlikely(!apic->apicv_active && irr_updated)) + max_irr_is_from_pir = __kvm_apic_update_irr(pir, apic->regs, max_irr); + if (unlikely(!apic->apicv_active && max_irr_is_from_pir)) apic->irr_pending = true; - return irr_updated; + return max_irr_is_from_pir; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_irr); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a29896a9ef14..5c2c33a5f7dc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7029,8 +7029,8 @@ static void vmx_set_rvi(int vector) int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vt *vt = to_vt(vcpu); + bool max_irr_is_from_pir; int max_irr; - bool got_posted_interrupt; if (KVM_BUG_ON(!enable_apicv, vcpu->kvm)) return -EIO; @@ -7042,17 +7042,22 @@ int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); - got_posted_interrupt = - kvm_apic_update_irr(vcpu, vt->pi_desc.pir, &max_irr); + max_irr_is_from_pir = kvm_apic_update_irr(vcpu, vt->pi_desc.pir, + &max_irr); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); - got_posted_interrupt = false; + max_irr_is_from_pir = false; } /* - * Newly recognized interrupts are injected via either virtual interrupt - * delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is - * disabled in two cases: + * If APICv is enabled and L2 is not active, then update the Requesting + * Virtual Interrupt (RVI) portion of vmcs01.GUEST_INTR_STATUS with the + * highest priority IRR to deliver the IRQ via Virtual Interrupt + * Delivery. Note, this is required even if the highest priority IRQ + * was already pending in the IRR, as RVI isn't updated in lockstep with + * the IRR (unlike apic->irr_pending). + * + * For the cases where Virtual Interrupt Delivery can't be used: * * 1) If L2 is running and the vCPU has a new pending interrupt. If L1 * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a @@ -7063,10 +7068,29 @@ int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * 2) If APICv is disabled for this vCPU, assigned devices may still * attempt to post interrupts. The posted interrupt vector will cause * a VM-Exit and the subsequent entry will call sync_pir_to_irr. + * + * In both cases, set KVM_REQ_EVENT if and only if the highest priority + * pending IRQ came from the PIR, as setting KVM_REQ_EVENT if any IRQ + * is pending may put the vCPU into an infinite loop, e.g. if the IRQ + * is blocked, then it will stay pending until an IRQ window is opened. + * + * Note! It's possible that one or more IRQs were moved from the PIR + * to the IRR _without_ max_irr_is_from_pir being true! I.e. if there + * was a higher priority IRQ already pending in the IRR. Not setting + * KVM_REQ_EVENT in this case is intentional and safe. If APICv is + * inactive, or L2 is running with exit-on-interrupt off (in vmcs12), + * i.e. without nested virtual interrupt delivery, then there's no need + * to request an IRQ window as the lower priority IRQ only needs to be + * delivered when the higher priority IRQ is dismissed from the ISR, + * i.e. on the next EOI, and EOIs are always intercepted if APICv is + * disabled or if L2 is running without nested VID. If L2 is running + * exit-on-interrupt on (in vmcs12), then the higher priority IRQ will + * trigger a nested VM-Exit, at which point KVM will re-evaluate L1's + * pending IRQs. */ if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu)) vmx_set_rvi(max_irr); - else if (got_posted_interrupt) + else if (max_irr_is_from_pir) kvm_make_request(KVM_REQ_EVENT, vcpu); return max_irr; -- cgit v1.2.3 From 0cb2af2ea66ad8ff195c156ea690f11216285bdf Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 15 Apr 2026 00:52:34 +0000 Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected GFN The shadow MMU computes GFNs for direct shadow pages using sp->gfn plus the SPTE index. This assumption breaks for shadow paging if the guest page tables are modified between VM entries (similar to commit aad885e77496, "KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE", 2026-03-27). The flow is as follows: - a PDE is installed for a 2MB mapping, and a page in that area is accessed. KVM creates a kvm_mmu_page consisting of 512 4KB pages; the kvm_mmu_page is marked by FNAME(fetch) as direct-mapped because the guest's mapping is a huge page (and thus contiguous). - the PDE mapping is changed from outside the guest. - the guest accesses another page in the same 2MB area. KVM installs a new leaf SPTE and rmap entry; the SPTE uses the "correct" GFN (i.e. based on the new mapping, as changed in the previous step) but that GFN is outside of the [sp->gfn, sp->gfn + 511] range; therefore the rmap entry cannot be found and removed when the kvm_mmu_page is zapped. - the memslot that covers the first 2MB mapping is deleted, and the kvm_mmu_page for the now-invalid GPA is zapped. However, rmap_remove() only looks at the [sp->gfn, sp->gfn + 511] range established in step 1, and fails to find the rmap entry that was recorded by step 3. - any operation that causes an rmap walk for the same page accessed by step 3 then walks a stale rmap and dereferences a freed kvm_mmu_page. This includes dirty logging or MMU notifier invalidations (e.g., from MADV_DONTNEED). The underlying issue is that KVM's walking of shadow PTEs assumes that if a SPTE is present when KVM wants to install a non-leaf SPTE, then the existing kvm_mmu_page must be for the correct gfn. Because the only way for the gfn to be wrong is if KVM messed up and failed to zap a SPTE... which shouldn't happen, but *actually* only happens in response to a guest write. That bug dates back literally forever, as even the first version of KVM assumes that the GFN matches and walks into the "wrong" shadow page. However, that was only an imprecision until 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages") came along. Fix it by checking for a target gfn mismatch and zapping the existing SPTE. That way the old SP and rmap entries are gone, KVM installs the rmap in the right location, and everyone is happy. Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages") Fixes: 6aa8b732ca01 ("kvm: userspace interface") Reported-by: Alexander Bulekov Reported-by: Fred Griffoul Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Link: https://patch.msgid.link/20260503201029.106481-1-pbonzini@redhat.com/ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 24fbc9ea502a..892246204435 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -182,6 +182,8 @@ static struct kmem_cache *pte_list_desc_cache; struct kmem_cache *mmu_page_header_cache; static void mmu_spte_set(u64 *sptep, u64 spte); +static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, + u64 *spte, struct list_head *invalid_list); struct kvm_mmu_role_regs { const unsigned long cr0; @@ -1287,19 +1289,6 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) rmap_remove(kvm, sptep); } -static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) -{ - struct kvm_mmu_page *sp; - - sp = sptep_to_sp(sptep); - WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K); - - drop_spte(kvm, sptep); - - if (flush) - kvm_flush_remote_tlbs_sptep(kvm, sptep); -} - /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. @@ -2466,7 +2455,8 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu, { union kvm_mmu_page_role role; - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) + if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) && + spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn) return ERR_PTR(-EEXIST); role = kvm_mmu_child_role(sptep, direct, access); @@ -2544,13 +2534,16 @@ static void __link_shadow_page(struct kvm *kvm, BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); - /* - * If an SPTE is present already, it must be a leaf and therefore - * a large one. Drop it, and flush the TLB if needed, before - * installing sp. - */ - if (is_shadow_present_pte(*sptep)) - drop_large_spte(kvm, sptep, flush); + if (is_shadow_present_pte(*sptep)) { + struct kvm_mmu_page *parent_sp; + LIST_HEAD(invalid_list); + + parent_sp = sptep_to_sp(sptep); + WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K); + + mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list); + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true); + } spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp)); -- cgit v1.2.3 From 7fd2df204f342fc17d1a0bfcd474b24232fb0f32 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 May 2026 14:21:25 -0700 Subject: Linux 7.1-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e27c91ea56fc..9f88dcaae382 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3 From 4808e5cc4fe1f40c4d690d0ce5df0f413fb02938 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 4 May 2026 09:00:00 +0800 Subject: LoongArch: Make CONFIG_64BIT as the default option CONFIG_64BIT is the mandatory option before v7.0, but in v7.1-rc1 both CONFIG_32BIT and CONFIG_64BIT are selectable and CONFIG_32BIT became the default option. This breaks existing configurations, so explicitly make CONFIG_64BIT as the default option to keep existing behavior. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 3b042dbb2c41..606597da46b8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -220,6 +220,7 @@ menu "Kernel type and options" choice prompt "Kernel type" + default 64BIT # Keep existing behavior config 32BIT bool "32-bit kernel" -- cgit v1.2.3 From 5643c6b2c8308b206cb01cbfd0e6ac80f9f1bc9a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 4 May 2026 09:00:01 +0800 Subject: LoongArch: Specify -m32/-m64 explicitly for 32BIT/64BIT Clang/LLVM build needs -m32/-m64 to switch triple variants (i.e. the --target=xxx parameter). Otherwise we get build errors for CONFIG_32BIT. GCC doesn't support -m32/-m64 now, but maybe support in future, so use cc-option to specify them. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604232041.ESJDwVG4-lkp@intel.com/ Suggested-by: Nathan Chancellor Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 ++ arch/loongarch/vdso/Makefile | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 47516aeea9d2..54fcfa1eac1f 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -55,9 +55,11 @@ endif ifdef CONFIG_32BIT tool-archpref = $(32bit-tool-archpref) UTS_MACHINE := loongarch32 +cflags-y += $(call cc-option,-m32) else tool-archpref = $(64bit-tool-archpref) UTS_MACHINE := loongarch64 +cflags-y += $(call cc-option,-m64) endif ifneq ($(SUBARCH),$(ARCH)) diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 42aa96249828..9c9181bb4071 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -12,6 +12,8 @@ obj-vdso-$(CONFIG_GENERIC_GETTIMEOFDAY) += vgettimeofday.o ccflags-vdso := \ $(filter -I%,$(KBUILD_CFLAGS)) \ $(filter -E%,$(KBUILD_CFLAGS)) \ + $(filter -m32,$(KBUILD_CFLAGS)) \ + $(filter -m64,$(KBUILD_CFLAGS)) \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ $(CLANG_FLAGS) \ -- cgit v1.2.3 From 98b8aebb14fdc0133939fd8fe07d0d98333dc976 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 4 May 2026 09:00:01 +0800 Subject: LoongArch: Fix SYM_SIGFUNC_START definition for 32BIT The SYM_SIGFUNC_START definition should match sigcontext that the length of GPRs are 8 bytes for both 32BIT and 64BIT. So replace SZREG with 8 to fix it. Cc: stable@vger.kernel.org Fixes: e4878c37f6679fde ("LoongArch: vDSO: Emit GNU_EH_FRAME correctly") Suggested-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/linkage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index a1bd6a3ee03a..ae937d1708b2 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h @@ -69,7 +69,7 @@ 9, 10, 11, 12, 13, 14, 15, 16, \ 17, 18, 19, 20, 21, 22, 23, 24, \ 25, 26, 27, 28, 29, 30, 31; \ - .cfi_offset \num, SC_REGS + \num * SZREG; \ + .cfi_offset \num, SC_REGS + \num * 8; \ .endr; \ \ nop; \ -- cgit v1.2.3 From 49f33840dcc907d21313d369e34872880846b61c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 4 May 2026 09:00:20 +0800 Subject: LoongArch: Use per-root-bridge PCIH flag to skip mem resource fixup When firmware enables 64-bit PCI host bridge support, some root bridges already provide valid 64-bit mem resource windows through ACPI. In this case, the LoongArch-specific mem resource high-bits fixup in acpi_prepare_root_resources() should not be applied unconditionally. Otherwise, the kernel may override the native resource layout derived from firmware, and later BAR assignment can fail to place device BARs into the intended 64-bit address space correctly. Add a per-root-bridge ACPI flag, PCIH, and evaluate it from the current root bridge device scope. When PCIH is set, skip the mem resource high- bits fixup path and let the kernel use the firmware-provided resource description directly. When PCIH is absent or cleared, keep the existing behavior and continue filling the high address bits from the host bridge address. This makes the behavior per-root-bridge configurable and avoids breaking valid 64-bit BAR space allocation on bridges whose 64-bit windows have already been fully described by firmware. Cc: stable@vger.kernel.org Suggested-by: Chao Li Tested-by: Dongyan Qian Signed-off-by: Dongyan Qian Signed-off-by: Huacai Chen --- arch/loongarch/pci/acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 0dde3ddcd544..b02698a338ee 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -61,11 +61,16 @@ static void acpi_release_root_info(struct acpi_pci_root_info *ci) static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) { int status; + unsigned long long pci_h = 0; struct resource_entry *entry, *tmp; struct acpi_device *device = ci->bridge; status = acpi_pci_probe_root_resources(ci); if (status > 0) { + acpi_evaluate_integer(device->handle, "PCIH", NULL, &pci_h); + if (pci_h) + return status; + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { if (entry->res->flags & IORESOURCE_MEM) { entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40); -- cgit v1.2.3 From 8dfa2f8780e486d05b9a0ffce70b8f5fbd62053e Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Mon, 4 May 2026 09:00:20 +0800 Subject: LoongArch: Fix potential ADE in loongson_gpu_fixup_dma_hang() The switch case in loongson_gpu_fixup_dma_hang() may not DC2 or DC3, and readl(crtc_reg) will access with random address, because the "device" is from "base+PCI_DEVICE_ID", "base" is from "pdev->devfn+1". This is wrong when my platform inserts a discrete GPU: lspci -tv -[0000:00]-+-00.0 Loongson Technology LLC Hyper Transport Bridge Controller ... +-06.0 Loongson Technology LLC LG100 GPU +-06.2 Loongson Technology LLC Device 7a37 ... Add a default switch case to fix the panic as below: Kernel ade access[#1]: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.136-loong64-desktop-hwe+ #4 pc 90000000017e5534 ra 90000000017e54c0 tp 90000001002f8000 sp 90000001002fb6c0 a0 80000efe00003100 a1 0000000000003100 a2 0000000000000000 a3 0000000000000002 a4 90000001002fb6b4 a5 900000087cdb58fd a6 90000000027af000 a7 0000000000000001 t0 00000000000085b9 t1 000000000000ffff t2 0000000000000000 t3 0000000000000000 t4 fffffffffffffffd t5 00000000fffb6d9c t6 0000000000083b00 t7 00000000000070c0 t8 900000087cdb4d94 u0 900000087cdb58fd s9 90000001002fb826 s0 90000000031c12c8 s1 7fffffffffffff00 s2 90000000031c12d0 s3 0000000000002710 s4 0000000000000000 s5 0000000000000000 s6 9000000100053000 s7 7fffffffffffff00 s8 90000000030d4000 ra: 90000000017e54c0 loongson_gpu_fixup_dma_hang+0x40/0x210 ERA: 90000000017e5534 loongson_gpu_fixup_dma_hang+0xb4/0x210 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 00480000 [ADEM] (IS= ECode=8 EsubCode=1) BADV: 7fffffffffffff00 PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) Modules linked in: Process swapper/0 (pid: 1, threadinfo=(____ptrval____), task=(____ptrval____)) Stack : 0000000000000006 90000001002fb778 90000001002fb704 0000000000000007 0000000016a65700 90000000017e5690 000000000000ffff ffffffffffffffff 900000000209f7c0 9000000100053000 900000000209f7a8 9000000000eebc08 0000000000000000 0000000000000000 0000000000000006 90000001002fb778 90000001000530b8 90000000027af000 0000000000000000 9000000100054000 9000000100053000 9000000000ebb70c 9000000100004c00 9000000004000001 90000001002fb7e4 bae765461f31cb12 0000000000000000 0000000000000000 0000000000000006 90000000027af000 0000000000000030 90000000027af000 900000087cd6f800 9000000100053000 0000000000000000 9000000000ebc560 7a2500147cdaf720 bae765461f31cb12 0000000000000001 0000000000000030 ... Call Trace: [<90000000017e5534>] loongson_gpu_fixup_dma_hang+0xb4/0x210 [<9000000000eebc08>] pci_fixup_device+0x108/0x280 [<9000000000ebb70c>] pci_setup_device+0x24c/0x690 [<9000000000ebc560>] pci_scan_single_device+0xe0/0x140 [<9000000000ebc684>] pci_scan_slot+0xc4/0x280 [<9000000000ebdd00>] pci_scan_child_bus_extend+0x60/0x3f0 [<9000000000f5bc94>] acpi_pci_root_create+0x2b4/0x420 [<90000000017e5e74>] pci_acpi_scan_root+0x2d4/0x440 [<9000000000f5b02c>] acpi_pci_root_add+0x21c/0x3a0 [<9000000000f4ee54>] acpi_bus_attach+0x1a4/0x3c0 [<90000000010e200c>] device_for_each_child+0x6c/0xe0 [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70 [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0 [<90000000010e200c>] device_for_each_child+0x6c/0xe0 [<9000000000f4bbf4>] acpi_dev_for_each_child+0x44/0x70 [<9000000000f4ef40>] acpi_bus_attach+0x290/0x3c0 [<9000000000f5211c>] acpi_bus_scan+0x6c/0x280 [<900000000189c028>] acpi_scan_init+0x194/0x310 [<900000000189bc6c>] acpi_init+0xcc/0x140 [<9000000000220cdc>] do_one_initcall+0x4c/0x310 [<90000000018618fc>] kernel_init_freeable+0x258/0x2d4 [<900000000184326c>] kernel_init+0x28/0x13c [<9000000000222008>] ret_from_kernel_thread+0xc/0xa4 Cc: stable@vger.kernel.org Fixes: 95db0c9f526d ("LoongArch: Workaround LS2K/LS7A GPU DMA hang bug") Link: https://gist.github.com/opsiff/ebf2dac51b4013d22462f2124c55f807 Link: https://gist.github.com/opsiff/a62f2a73db0492b3c49bf223a339b133 Signed-off-by: Wentao Guan Signed-off-by: Huacai Chen --- arch/loongarch/pci/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index d233ea2218fe..f33c7ea1443d 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -132,6 +132,9 @@ static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on) crtc_reg = regbase; crtc_offset = 0x400; break; + default: + iounmap(regbase); + return; } for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) { -- cgit v1.2.3 From 7e2c41bc62e436f465ee1ff7ebc14e35c99d95fb Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 4 May 2026 09:00:20 +0800 Subject: LoongArch: vDSO: Drop custom __arch_vdso_hres_capable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The custom definition is identical to the generic fallback one. So remove it. Signed-off-by: Thomas Weißschuh Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/vdso/gettimeofday.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index bae76767c693..18ba403e1ed9 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -85,12 +85,6 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return count; } -static inline bool loongarch_vdso_hres_capable(void) -{ - return true; -} -#define __arch_vdso_hres_capable loongarch_vdso_hres_capable - #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #endif /* !__ASSEMBLER__ */ -- cgit v1.2.3 From 5203012fa6045aac4b69d4e7c212e16dcf38ef10 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 4 May 2026 09:00:37 +0800 Subject: LoongArch: KVM: Compile switch.S directly into the kernel If we directly compile the switch.S file into the kernel, the address of the kvm_exc_entry function will definitely be within the DMW memory area. Therefore, we will no longer need to perform a copy relocation of the kvm_exc_entry. So this patch compiles switch.S directly into the kernel, and then remove the copy relocation execution logic for the kvm_exc_entry function. Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/Kbuild | 2 +- arch/loongarch/include/asm/asm-prototypes.h | 20 +++++++++++++++++ arch/loongarch/include/asm/kvm_host.h | 3 --- arch/loongarch/kvm/Makefile | 3 ++- arch/loongarch/kvm/main.c | 35 +++-------------------------- arch/loongarch/kvm/switch.S | 20 ++++++++++++----- 6 files changed, 41 insertions(+), 42 deletions(-) diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index beb8499dd8ed..1c7a0dbe5e72 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -3,7 +3,7 @@ obj-y += mm/ obj-y += net/ obj-y += vdso/ -obj-$(CONFIG_KVM) += kvm/ +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/ # for cleaning subdir- += boot diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index 704066b4f736..de0c17f3f49c 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -20,3 +20,23 @@ asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_ struct pt_regs *regs, int (*fn)(void *), void *fn_arg); + +struct kvm_run; +struct kvm_vcpu; +struct loongarch_fpu; + +void kvm_exc_entry(void); +int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); + +void kvm_save_fpu(struct loongarch_fpu *fpu); +void kvm_restore_fpu(struct loongarch_fpu *fpu); + +#ifdef CONFIG_CPU_HAS_LSX +void kvm_save_lsx(struct loongarch_fpu *fpu); +void kvm_restore_lsx(struct loongarch_fpu *fpu); +#endif + +#ifdef CONFIG_CPU_HAS_LASX +void kvm_save_lasx(struct loongarch_fpu *fpu); +void kvm_restore_lasx(struct loongarch_fpu *fpu); +#endif diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 130cedbb6b39..776bc487a705 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -87,7 +87,6 @@ struct kvm_context { struct kvm_world_switch { int (*exc_entry)(void); int (*enter_guest)(struct kvm_run *run, struct kvm_vcpu *vcpu); - unsigned long page_order; }; #define MAX_PGTABLE_LEVELS 4 @@ -359,8 +358,6 @@ void kvm_exc_entry(void); int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); extern unsigned long vpid_mask; -extern const unsigned long kvm_exception_size; -extern const unsigned long kvm_enter_guest_size; extern struct kvm_world_switch *kvm_loongarch_ops; #define SW_GCSR (1 << 0) diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index ae469edec99c..a4d044da3aa7 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -7,11 +7,12 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o +obj-y += switch.o + kvm-y += exit.o kvm-y += interrupt.o kvm-y += main.o kvm-y += mmu.o -kvm-y += switch.o kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 76ebff2faedd..f105a86143f5 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -348,8 +348,7 @@ void kvm_arch_disable_virtualization_cpu(void) static int kvm_loongarch_env_init(void) { - int cpu, order, ret; - void *addr; + int cpu, ret; struct kvm_context *context; vmcs = alloc_percpu(struct kvm_context); @@ -365,30 +364,8 @@ static int kvm_loongarch_env_init(void) return -ENOMEM; } - /* - * PGD register is shared between root kernel and kvm hypervisor. - * So world switch entry should be in DMW area rather than TLB area - * to avoid page fault reenter. - * - * In future if hardware pagetable walking is supported, we won't - * need to copy world switch code to DMW area. - */ - order = get_order(kvm_exception_size + kvm_enter_guest_size); - addr = (void *)__get_free_pages(GFP_KERNEL, order); - if (!addr) { - free_percpu(vmcs); - vmcs = NULL; - kfree(kvm_loongarch_ops); - kvm_loongarch_ops = NULL; - return -ENOMEM; - } - - memcpy(addr, kvm_exc_entry, kvm_exception_size); - memcpy(addr + kvm_exception_size, kvm_enter_guest, kvm_enter_guest_size); - flush_icache_range((unsigned long)addr, (unsigned long)addr + kvm_exception_size + kvm_enter_guest_size); - kvm_loongarch_ops->exc_entry = addr; - kvm_loongarch_ops->enter_guest = addr + kvm_exception_size; - kvm_loongarch_ops->page_order = order; + kvm_loongarch_ops->exc_entry = (void *)kvm_exc_entry; + kvm_loongarch_ops->enter_guest = (void *)kvm_enter_guest; vpid_mask = read_csr_gstat(); vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT; @@ -428,16 +405,10 @@ static int kvm_loongarch_env_init(void) static void kvm_loongarch_env_exit(void) { - unsigned long addr; - if (vmcs) free_percpu(vmcs); if (kvm_loongarch_ops) { - if (kvm_loongarch_ops->exc_entry) { - addr = (unsigned long)kvm_loongarch_ops->exc_entry; - free_pages(addr, kvm_loongarch_ops->page_order); - } kfree(kvm_loongarch_ops); } diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index f1768b7a6194..1d3ba7190154 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -4,9 +4,11 @@ */ #include +#include #include #include #include +#include #include #include @@ -100,8 +102,13 @@ * - is still in guest mode, such as pgd table/vmid registers etc, * - will fix with hw page walk enabled in future * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS + * + * PGD register is shared between root kernel and kvm hypervisor. + * So world switch entry should be in DMW area rather than TLB area + * to avoid page fault re-enter. */ .text + .p2align PAGE_SHIFT .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) UNWIND_HINT_UNDEFINED @@ -190,8 +197,8 @@ ret_to_host: kvm_restore_host_gpr a2 jr ra -SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL) SYM_CODE_END(kvm_exc_entry) +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry) /* * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu) @@ -215,8 +222,8 @@ SYM_FUNC_START(kvm_enter_guest) /* Save kvm_vcpu to kscratch */ csrwr a1, KVM_VCPU_KS kvm_switch_to_guest -SYM_INNER_LABEL(kvm_enter_guest_end, SYM_L_LOCAL) SYM_FUNC_END(kvm_enter_guest) +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest) SYM_FUNC_START(kvm_save_fpu) fpu_save_csr a0 t1 @@ -224,6 +231,7 @@ SYM_FUNC_START(kvm_save_fpu) fpu_save_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_save_fpu) +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu) SYM_FUNC_START(kvm_restore_fpu) fpu_restore_double a0 t1 @@ -231,6 +239,7 @@ SYM_FUNC_START(kvm_restore_fpu) fpu_restore_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_fpu) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu) #ifdef CONFIG_CPU_HAS_LSX SYM_FUNC_START(kvm_save_lsx) @@ -239,6 +248,7 @@ SYM_FUNC_START(kvm_save_lsx) lsx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lsx) +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx) SYM_FUNC_START(kvm_restore_lsx) lsx_restore_data a0 t1 @@ -246,6 +256,7 @@ SYM_FUNC_START(kvm_restore_lsx) fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lsx) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx) #endif #ifdef CONFIG_CPU_HAS_LASX @@ -255,6 +266,7 @@ SYM_FUNC_START(kvm_save_lasx) lasx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lasx) +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx) SYM_FUNC_START(kvm_restore_lasx) lasx_restore_data a0 t1 @@ -262,10 +274,8 @@ SYM_FUNC_START(kvm_restore_lasx) fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lasx) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx) #endif - .section ".rodata" -SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) -SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) #ifdef CONFIG_CPU_HAS_LBT STACK_FRAME_NON_STANDARD kvm_restore_fpu -- cgit v1.2.3 From b323a441da602dfdfc24f30d3190cac786ffebf2 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Mon, 4 May 2026 09:00:37 +0800 Subject: LoongArch: KVM: Fix "unreliable stack" for kvm_exc_entry Insert the appropriate UNWIND hint into the kvm_exc_entry assembly function to guide the generation of correct ORC table entries, thereby solving the timeout problem ("unreliable stack") while loading the livepatch-sample module on a physical machine running virtual machines with multiple vcpus. Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kvm/switch.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 1d3ba7190154..936e4ae3e408 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -111,7 +111,7 @@ .p2align PAGE_SHIFT .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) - UNWIND_HINT_UNDEFINED + UNWIND_HINT_END_OF_STACK csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS addi.d a2, a2, KVM_VCPU_ARCH -- cgit v1.2.3 From b3e31a6650d4cab63f0814c37c0b360372c6ee9e Mon Sep 17 00:00:00 2001 From: Qiang Ma Date: Mon, 4 May 2026 09:00:37 +0800 Subject: LoongArch: KVM: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS It doesn't make sense to return the recommended maximum number of vCPUs which exceeds the maximum possible number of vCPUs. Other architectures have already done this, such as commit 57a2e13ebdda ("KVM: MIPS: Cap KVM_CAP_NR_VCPUS by KVM_CAP_MAX_VCPUS") Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Qiang Ma Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index 8cc5ee1c53ef..1317c718f896 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -125,7 +125,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS); break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; -- cgit v1.2.3 From f26faae96c411a70641e4d21b759475caa6122d5 Mon Sep 17 00:00:00 2001 From: Tao Cui Date: Mon, 4 May 2026 09:00:38 +0800 Subject: LoongArch: KVM: Fix missing EMULATE_FAIL in kvm_emu_mmio_read() In the ldptr (0x24...0x27) opcode decoding path, the default case only breaks out but without setting "ret" value to EMULATE_FAIL. This leaves run->mmio.len uninitialized (stale from a previous MMIO operation) while "ret" value remains EMULATE_DO_MMIO, causing the code to proceed with an incorrect MMIO length. Add "ret = EMULATE_FAIL" to match the other default branches in the same function (e.g. the 0x28...0x2e and 0x38 cases). Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Tao Cui Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index da0ad89f2eb7..3b95cd0f989b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -390,6 +390,7 @@ int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) run->mmio.len = 8; break; default: + ret = EMULATE_FAIL; break; } break; -- cgit v1.2.3 From 81e18777d61440511451866c7c80b34a8bdd6b33 Mon Sep 17 00:00:00 2001 From: Tao Cui Date: Mon, 4 May 2026 09:00:38 +0800 Subject: LoongArch: KVM: Use kvm_set_pte() in kvm_flush_pte() kvm_flush_pte() is the only caller that directly assigns *pte instead of using the kvm_set_pte() wrapper. Use the wrapper for consistency with the rest of the file. No functional change intended. Cc: stable@vger.kernel.org Reviewed-by: Bibo Mao Signed-off-by: Tao Cui Signed-off-by: Huacai Chen --- arch/loongarch/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index a7fa458e3360..e104897aa532 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -95,7 +95,7 @@ static int kvm_flush_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx) else kvm->stat.pages--; - *pte = ctx->invalid_entry; + kvm_set_pte(pte, ctx->invalid_entry); return 1; } -- cgit v1.2.3 From 6debfff78584f0adedf7355fe5263198a3fc6b19 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 4 May 2026 09:00:48 +0800 Subject: LoongArch: KVM: Move AVEC interrupt injection into switch loop When AVEC interrupt controller is emulated in user space, AVEC interrupt is injected by software like SIP0/SIP1/TI/IPI interrupts. Here also move the AVEC interrupt injection in switch loop. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/interrupt.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c index 32930959f7c2..53dac8ab8fb1 100644 --- a/arch/loongarch/kvm/interrupt.c +++ b/arch/loongarch/kvm/interrupt.c @@ -33,13 +33,12 @@ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) if (priority < EXCCODE_INT_NUM) irq = priority_to_irq[priority]; - if (kvm_guest_has_msgint(&vcpu->arch) && (priority == INT_AVEC)) { - dmsintc_inject_irq(vcpu); - set_gcsr_estat(irq); - return 1; - } - switch (priority) { + case INT_AVEC: + if (!kvm_guest_has_msgint(&vcpu->arch)) + break; + dmsintc_inject_irq(vcpu); + fallthrough; case INT_TI: case INT_IPI: case INT_SWI0: @@ -66,12 +65,11 @@ static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) if (priority < EXCCODE_INT_NUM) irq = priority_to_irq[priority]; - if (kvm_guest_has_msgint(&vcpu->arch) && (priority == INT_AVEC)) { - clear_gcsr_estat(irq); - return 1; - } - switch (priority) { + case INT_AVEC: + if (!kvm_guest_has_msgint(&vcpu->arch)) + break; + fallthrough; case INT_TI: case INT_IPI: case INT_SWI0: -- cgit v1.2.3 From 2433f3f5724b3af569d9fb411ba728629524738b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 4 May 2026 09:00:48 +0800 Subject: LoongArch: KVM: Fix HW timer interrupt lost when inject interrupt by software With passthrough HW timer, timer interrupt is injected by HW. When inject emulated CPU interrupt by software such SIP0/SIP1/IPI, HW timer interrupt may be lost. Here check whether there is timer tick value inversion before and after injecting emulated CPU interrupt by software, timer enabling by reading timer cfg register is skipped. If the timer tick value is detected with changing, then timer should be enabled. And inject a timer interrupt by software if there is. Cc: Fixes: f45ad5b8aa93 ("LoongArch: KVM: Implement vcpu interrupt operations"). Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/interrupt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c index 53dac8ab8fb1..a18c60dffbba 100644 --- a/arch/loongarch/kvm/interrupt.c +++ b/arch/loongarch/kvm/interrupt.c @@ -28,6 +28,7 @@ static unsigned int priority_to_irq[EXCCODE_INT_NUM] = { static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { unsigned int irq = 0; + unsigned long old, new; clear_bit(priority, &vcpu->arch.irq_pending); if (priority < EXCCODE_INT_NUM) @@ -43,7 +44,13 @@ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) case INT_IPI: case INT_SWI0: case INT_SWI1: + old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); set_gcsr_estat(irq); + new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + + /* Inject TI if TVAL inverted */ + if (new > old) + set_gcsr_estat(CPU_TIMER); break; case INT_HWI0 ... INT_HWI7: @@ -60,6 +67,7 @@ static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) { unsigned int irq = 0; + unsigned long old, new; clear_bit(priority, &vcpu->arch.irq_clear); if (priority < EXCCODE_INT_NUM) @@ -74,7 +82,13 @@ static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) case INT_IPI: case INT_SWI0: case INT_SWI1: + old = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); clear_gcsr_estat(irq); + new = kvm_read_hw_gcsr(LOONGARCH_CSR_TVAL); + + /* Inject TI if TVAL inverted */ + if (new > old) + set_gcsr_estat(CPU_TIMER); break; case INT_HWI0 ... INT_HWI7: -- cgit v1.2.3 From 5a873d77ba792410a796595a917be6a440f9b7d2 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 4 May 2026 09:00:48 +0800 Subject: LoongArch: KVM: Move unconditional delay into timer clear scenery When timer interrupt arrives in guest kernel, guest kernel clears the timer interrupt and program timer with the next incoming event. During this stage, timer tick is -1 and timer interrupt status is disabled in ESTAT register. KVM hypervisor need write zero with timer tick register and wait timer interrupt injection from HW side, and then clear timer interrupt. So there is 2 cycle delay in KVM hypervisor to emulate such scenery, and the delay is unnecessary if there is no need to clear the timer interrupt. Here move 2 cycle delay into timer clear scenery and add timer ESTAT checking after delay, and set max timer expire value if timer interrupt does not arrive still. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/timer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 29c2aaba63c3..8356fce0043f 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -96,15 +96,21 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); - __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear * timer interrupt, and CSR TVAL keeps unchanged with -1, it * avoids spurious timer interrupt */ - if (!(estat & CPU_TIMER)) + if (!(estat & CPU_TIMER)) { + __delay(2); /* Wait cycles until timer interrupt injected */ + + /* Write TVAL with max value if no TI shot */ + estat = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT); + if (!(estat & CPU_TIMER)) + write_gcsr_timertick(CSR_TCFG_VAL); gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + } return; } -- cgit v1.2.3 From d68ce834f8cf6cb2e77f3331df65166b35466b53 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 28 Apr 2026 21:37:47 +0530 Subject: cifs: abort open_cached_dir if we don't request leases It is possible that SMB2_open_init may not set lease context based on the requested oplock level. This can happen when leases have been temporarily or permanently disabled. When this happens, we will have open_cached_dir making an open without lease context and the response will anyway be rejected by open_cached_dir (thereby forcing a close to discard this open). That's unnecessary two round-trips to the server. This change adds a check before making the open request to the server to make sure that SMB2_open_init did add the expected lease context to the open in open_cached_dir. Cc: Reviewed-by: Bharath SM Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 02791ec3c5a1..88d5e9a32f28 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -286,6 +286,14 @@ replay_again: &rqst[0], &oplock, &oparms, utf16_path); if (rc) goto oshr_free; + + if (oplock != SMB2_OPLOCK_LEVEL_II) { + rc = -EINVAL; + cifs_dbg(FYI, "%s: Oplock level %d not suitable for cached directory\n", + __func__, oplock); + goto oshr_free; + } + smb2_set_next_command(tcon, &rqst[0]); memset(&qi_iov, 0, sizeof(qi_iov)); -- cgit v1.2.3 From 5e489c6c47a2ac15edbaca153b9348e42c1eacab Mon Sep 17 00:00:00 2001 From: Bjoern Doebel Date: Thu, 30 Apr 2026 08:57:17 +0000 Subject: smb: client: use kzalloc to zero-initialize security descriptor buffer Commit 62e7dd0a39c2d ("smb: common: change the data type of num_aces to le16") split struct smb_acl's __le32 num_aces field into __le16 num_aces and __le16 reserved. The reserved field corresponds to Sbz2 in the MS-DTYP ACL wire format, which must be zero [1]. When building an ACL descriptor in build_sec_desc(), we are using a kmalloc()'ed descriptor buffer and writing the fields explicitly using le16() writes now. This never writes to the 2 byte reserved field, leaving it as uninitialized heap data. When the reserved field happens to contain non-zero slab garbage, Samba rejects the security descriptor with "ndr_pull_security_descriptor failed: Range Error", causing chmod to fail with EINVAL. Change kmalloc() to kzalloc() to ensure the entire buffer is zero-initialized. Fixes: 62e7dd0a39c2d ("smb: common: change the data type of num_aces to le16") Cc: stable@vger.kernel.org Signed-off-by: Bjoern Doebel Assisted-by: Kiro:claude-opus-4.6 [1] https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dtyp/20233ed8-a6c6-4097-aafa-dd545ed24428 Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index ec5d47779304..a2750f1e3d90 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -1732,7 +1732,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, * descriptor parameters, and security descriptor itself */ nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN); - pnntsd = kmalloc(nsecdesclen, GFP_KERNEL); + pnntsd = kzalloc(nsecdesclen, GFP_KERNEL); if (!pnntsd) { kfree(pntsd); cifs_put_tlink(tlink); -- cgit v1.2.3 From 84d5d76c4e8e2750fa17869b7272f189d2bdd40b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 1 May 2026 23:53:38 -0700 Subject: drm/ttm: Fix GPU MM stats during pool shrinking TTM pool shrinking frees pages by calling __free_pages() directly, which bypasses updates to NR_GPU_ACTIVE and leaves GPU MM accounting out of sync. Introduce a helper, __free_pages_gpu_account(), and use it for all page frees in ttm_pool.c so GPU MM statistics are updated consistently. Reported-by: Kenneth Crudup Fixes: ae80122f3896 ("drm/ttm: use gpu mm stats to track gpu memory allocations. (v4)") Cc: Christian Koenig Cc: Huang Rui Cc: Matthew Auld Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Matthew Brost Tested-by: Kenneth Crudup Reviewed-by: Dave Airlie Link: https://patch.msgid.link/20260502065338.2720646-1-matthew.brost@intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 26a3689e5fd9..278bbe7a11ad 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -206,6 +206,14 @@ error_free: return NULL; } +static void __free_pages_gpu_account(struct page *p, unsigned int order, + bool reclaim) +{ + mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, + -(1 << order)); + __free_pages(p, order); +} + /* Reset the caching and pages of size 1 << order */ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order, struct page *p, bool reclaim) @@ -223,9 +231,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, #endif if (!pool || !ttm_pool_uses_dma_alloc(pool)) { - mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, - -(1 << order)); - __free_pages(p, order); + __free_pages_gpu_account(p, order, reclaim); return; } @@ -606,7 +612,7 @@ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, */ ttm_pool_split_for_swap(restore->pool, p); copy_highpage(restore->alloced_page + i, p); - __free_pages(p, 0); + __free_pages_gpu_account(p, 0, false); } restore->restored_pages++; @@ -1068,7 +1074,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, if (flags->purge) { shrunken += num_pages; page->private = 0; - __free_pages(page, order); + __free_pages_gpu_account(page, order, false); memset(tt->pages + i, 0, num_pages * sizeof(*tt->pages)); } @@ -1109,7 +1115,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, } handle = shandle; tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); - put_page(page); + __free_pages_gpu_account(page, 0, false); shrunken++; } -- cgit v1.2.3 From b8c2e9e27636b92dc96c12f16894cbc60c58a306 Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Mon, 4 May 2026 01:56:10 +0800 Subject: io_uring/napi: clear tracked NAPI entries on unregister IORING_UNREGISTER_NAPI disables NAPI busy polling, but it currently leaves any previously tracked NAPI IDs on the ring context. The normal wait path only checks whether the list is empty before entering the busy poll helper, so an unregistered ring can still observe stale entries and run an unexpected busy poll pass. Make unregister switch the context to inactive and free the tracked entries. Do the same inactive transition while changing the tracking strategy, and recheck the expected tracking mode under napi_lock before inserting a newly learned NAPI ID. This prevents a racing poll path from repopulating the list after unregister or reconfiguration. Also make the busy poll dispatcher ignore inactive mode explicitly. Signed-off-by: Yufan Chen Fixes: 6bf90bd8c58a ("io_uring/napi: add static napi tracking strategy") Link: https://patch.msgid.link/20260503175610.35521-1-yufan.chen@linux.dev Signed-off-by: Jens Axboe --- io_uring/napi.c | 27 ++++++++++++++++++++------- io_uring/napi.h | 8 +++++--- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/io_uring/napi.c b/io_uring/napi.c index 8d68366a4b90..bfc771445912 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -38,7 +38,8 @@ static inline ktime_t net_to_ktime(unsigned long t) return ns_to_ktime(t << 10); } -int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, + unsigned int mode) { struct hlist_head *hash_list; struct io_napi_entry *e; @@ -69,6 +70,11 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) * kfree() */ spin_lock(&ctx->napi_lock); + if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) { + spin_unlock(&ctx->napi_lock); + kfree(e); + return -EINVAL; + } if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); kfree(e); @@ -196,9 +202,14 @@ __io_napi_do_busy_loop(struct io_ring_ctx *ctx, bool (*loop_end)(void *, unsigned long), void *loop_end_arg) { - if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) + switch (READ_ONCE(ctx->napi_track_mode)) { + case IO_URING_NAPI_TRACKING_STATIC: return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); - return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + case IO_URING_NAPI_TRACKING_DYNAMIC: + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); + default: + return false; + } } static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, @@ -273,13 +284,13 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx, default: return -EINVAL; } - /* clean the napi list for new settings */ + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); io_napi_free(ctx); - WRITE_ONCE(ctx->napi_track_mode, napi->op_param); /* cap NAPI at 10 msec of spin time */ napi->busy_poll_to = min(10000, napi->busy_poll_to); WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); return 0; } @@ -315,7 +326,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) case IO_URING_NAPI_STATIC_ADD_ID: if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) return -EINVAL; - return __io_napi_add_id(ctx, napi.op_param); + return __io_napi_add_id(ctx, napi.op_param, + IO_URING_NAPI_TRACKING_STATIC); case IO_URING_NAPI_STATIC_DEL_ID: if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) return -EINVAL; @@ -343,9 +355,10 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); - WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); + io_napi_free(ctx); return 0; } diff --git a/io_uring/napi.h b/io_uring/napi.h index fa742f42e09b..e0aecccc5065 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -15,7 +15,8 @@ void io_napi_free(struct io_ring_ctx *ctx); int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); -int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id); +int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, + unsigned int mode); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); @@ -43,13 +44,14 @@ static inline void io_napi_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct socket *sock; + unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC; - if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) + if (READ_ONCE(ctx->napi_track_mode) != mode) return; sock = sock_from_file(req->file); if (sock && sock->sk) - __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id)); + __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode); } #else -- cgit v1.2.3 From 04fe9aeb4f3c0999e6715385664c677469dfd8f4 Mon Sep 17 00:00:00 2001 From: Yufan Chen Date: Mon, 4 May 2026 01:57:10 +0800 Subject: io_uring/eventfd: reset deferred signal state Recursive eventfd wakeups must defer io_uring eventfd signaling because eventfd_signal_mask() rejects reentry from eventfd wakeup handlers. The io_ev_fd ops bit tracks an outstanding deferred signal so that the same rcu_head is not queued twice. That bit is only set today. Once the first deferred callback runs, later recursive notifications still see the bit set and skip queueing another deferred signal. This can leave new completions without a matching eventfd wake after the first recursive deferral. Clear the pending bit before issuing the deferred signal. If the wakeup path recurses while the callback runs, a new signal can be queued for the next RCU grace period while the current callback keeps its reference until it returns. Signed-off-by: Yufan Chen Fixes: 60b6c075e8eb ("io_uring/eventfd: move to more idiomatic RCU free usage") Link: https://patch.msgid.link/20260503175710.37209-1-yufan.chen@linux.dev Signed-off-by: Jens Axboe --- io_uring/eventfd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index 3da028500f76..d656cc2a0b9b 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c @@ -43,6 +43,7 @@ static void io_eventfd_do_signal(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); + atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops); eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); io_eventfd_put(ev_fd); } -- cgit v1.2.3 From 646ebdd3105809d84ed04aa9e92e47e89cc44502 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 10 Apr 2026 23:03:09 +0200 Subject: media: rc: ttusbir: fix inverted error logic We have to report ENOMEM if no buffer is allocated. Typo dropped a "!". Restore it. Fixes: 50acaad3d202 ("media: rc: ttusbir: respect DMA coherency rules") Cc: stable@vger.kernel.org Signed-off-by: Oliver Neukum Signed-off-by: Sean Young Signed-off-by: Hans Verkuil --- drivers/media/rc/ttusbir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/ttusbir.c b/drivers/media/rc/ttusbir.c index 3848ad3a6b85..db2f6698a6c0 100644 --- a/drivers/media/rc/ttusbir.c +++ b/drivers/media/rc/ttusbir.c @@ -191,7 +191,7 @@ static int ttusbir_probe(struct usb_interface *intf, tt = kzalloc_obj(*tt); buffer = kzalloc(5, GFP_KERNEL); rc = rc_allocate_device(RC_DRIVER_IR_RAW); - if (!tt || !rc || buffer) { + if (!tt || !rc || !buffer) { ret = -ENOMEM; goto out; } -- cgit v1.2.3 From 0cfff13c94cb5fa818bb374945ff280e08dc1bb9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 May 2026 08:54:27 +0200 Subject: wifi: mac80211: tests: mark HT check strict The HT check now only applies in strict mode since APs were found to be broken. Mark it as such. Fixes: 711a9c018ad2 ("wifi: mac80211: skip ieee80211_verify_sta_ht_mcs_support check in non-strict mode") Signed-off-by: Johannes Berg --- net/mac80211/tests/chan-mode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/tests/chan-mode.c b/net/mac80211/tests/chan-mode.c index adc069065e73..fa370831d617 100644 --- a/net/mac80211/tests/chan-mode.c +++ b/net/mac80211/tests/chan-mode.c @@ -65,6 +65,7 @@ static const struct determine_chan_mode_case { .ht_capa_mask = { .mcs.rx_mask[0] = 0xf7, }, + .strict = true, }, { .desc = "Masking out a RX rate in VHT capabilities", .conn_mode = IEEE80211_CONN_MODE_EHT, -- cgit v1.2.3 From 65493f27a6008bf84bd11bd41c5e1ea6b0bf3c3d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 30 Apr 2026 10:44:15 -0700 Subject: wifi: cw1200: Revert "Fix locking in error paths" Revert commit d98c24617a83 ("wifi: cw1200: Fix locking in error paths") because it introduces a locking bug instead of fixing a locking bug. cw1200_wow_resume() unlocks priv->conf_mutex. Hence, adding mutex_unlock(&priv->conf_mutex) just after cw1200_wow_resume() is wrong. Reported-by: Ben Hutchings Closes: https://lore.kernel.org/all/408661f69f263266b028713e1412ba36d457e63d.camel@decadent.org.uk/ Fixes: d98c24617a83 ("wifi: cw1200: Fix locking in error paths") Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20260430174418.1845431-1-bvanassche@acm.org Signed-off-by: Johannes Berg --- drivers/net/wireless/st/cw1200/pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c index 84eb15d729c7..120f0379f81d 100644 --- a/drivers/net/wireless/st/cw1200/pm.c +++ b/drivers/net/wireless/st/cw1200/pm.c @@ -264,14 +264,12 @@ int cw1200_wow_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) wiphy_err(priv->hw->wiphy, "PM request failed: %d. WoW is disabled.\n", ret); cw1200_wow_resume(hw); - mutex_unlock(&priv->conf_mutex); return -EBUSY; } /* Force resume if event is coming from the device. */ if (atomic_read(&priv->bh_rx)) { cw1200_wow_resume(hw); - mutex_unlock(&priv->conf_mutex); return -EAGAIN; } -- cgit v1.2.3 From 8bc6b14aab669f0c301febcf4aa5249b9393bf51 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 30 Apr 2026 11:08:10 +0200 Subject: i2c: testunit: Replace system_long_wq with system_dfl_long_wq Currently the code enqueue work items using {queue|mod}_delayed_work(), using system_long_wq. This workqueue should be used when long works are expected, but it is a per-cpu workqueue. This is important because queue_delayed_work() queue the work using: queue_delayed_work_on(WORK_CPU_UNBOUND, ...); Note that WORK_CPU_UNBOUND = NR_CPUS. This would end up calling __queue_delayed_work() that does: if (housekeeping_enabled(HK_TYPE_TIMER)) { // [....] } else { if (likely(cpu == WORK_CPU_UNBOUND)) add_timer_global(timer); else add_timer_on(timer, cpu); } So when cpu == WORK_CPU_UNBOUND the timer is global and is not using a specific CPU. Later, when __queue_work() is called: if (req_cpu == WORK_CPU_UNBOUND) { if (wq->flags & WQ_UNBOUND) cpu = wq_select_unbound_cpu(raw_smp_processor_id()); else cpu = raw_smp_processor_id(); } Because the wq is not unbound, it takes the CPU where the timer fired and enqueue the work on that CPU. The consequence of all of this is that the work can run anywhere, depending on where the timer fired. Recently, a new unbound workqueue specific for long running work has been added: c116737e972e ("workqueue: Add system_dfl_long_wq for long unbound works") So change system_long_wq with system_dfl_long_wq so that the work may benefit from scheduler task placement. Signed-off-by: Marco Crivellari [wsa: remove FIXME as well] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-slave-testunit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index 6de4307050dd..871c58461ebc 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -15,7 +15,7 @@ #include #include #include -#include /* FIXME: is system_long_wq the best choice? */ +#include #define TU_VERSION_MAX_LENGTH 128 @@ -124,7 +124,7 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client, case I2C_SLAVE_STOP: if (tu->reg_idx == TU_NUM_REGS) { set_bit(TU_FLAG_IN_PROCESS, &tu->flags); - queue_delayed_work(system_long_wq, &tu->worker, + queue_delayed_work(system_dfl_long_wq, &tu->worker, msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); } -- cgit v1.2.3 From 9a937ca22741eebe2bf10b18657b8b9aed9c009b Mon Sep 17 00:00:00 2001 From: Ronald Claveau Date: Fri, 24 Apr 2026 16:17:33 +0200 Subject: dt-bindings: i2c: amlogic: Add compatible for T7 SOC Add the T7 SOC compatible which fallback to AXG compatible. Acked-by: Rob Herring (Arm) Signed-off-by: Ronald Claveau Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/amlogic,meson6-i2c.yaml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml b/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml index c4cc8af18280..7b59b60b62e5 100644 --- a/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml @@ -16,10 +16,15 @@ allOf: properties: compatible: - enum: - - amlogic,meson6-i2c # Meson6, Meson8 and compatible SoCs - - amlogic,meson-gxbb-i2c # GXBB and compatible SoCs - - amlogic,meson-axg-i2c # AXG and compatible SoCs + oneOf: + - items: + - enum: + - amlogic,t7-i2c + - const: amlogic,meson-axg-i2c + - enum: + - amlogic,meson6-i2c # Meson6, Meson8 and compatible SoCs + - amlogic,meson-gxbb-i2c # GXBB and compatible SoCs + - amlogic,meson-axg-i2c # AXG and compatible SoCs reg: maxItems: 1 -- cgit v1.2.3 From 02b7cd683892d8d8464815d69a3a76579909a726 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 2 May 2026 17:31:54 +0200 Subject: i2c: stm32f7: reinit_completion() per transfer not per msg Currently, the driver may repeatedly call reinit_completion() during transfer which contains multiple messages, while another thread is waiting for the completion. This happens during transfer with more than 1 message, invoked via stm32f7_i2c_xfer_core() -> stm32f7_i2c_xfer_msg(). After invoking the stm32f7_i2c_xfer_msg() to start transfer, stm32f7_i2c_xfer_core() calls wait_for_completion_timeout() to wait for completion of the transfer of all messages. When the first message transfer completes, the hard IRQ handler triggers, and detects transfer completion, which leads to stm32f7_i2c_isr_event_thread() IRQ thread being started. The stm32f7_i2c_isr_event_thread() calls stm32f7_i2c_xfer_msg() in case there are more messages. Without this change, the second and later stm32f7_i2c_xfer_msg() would call reinit_completion() on the completion which is still being waited for in stm32f7_i2c_xfer_core(). Fix this by moving the reinit_completion() into stm32f7_i2c_xfer_core(), together with wait_for_completion_timeout(). Since stm32f7_i2c_xfer_core() now waits for completion of the entire transfer, increase the default timeout. This fixes sporadic transfer timeouts on STM32MP25xx during kernel boot. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Marek Vasut [wsa: reworded commit subject] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 70cb5822bf17..53d9df70ebe4 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -895,8 +895,6 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev, f7_msg->result = 0; f7_msg->stop = (i2c_dev->msg_id >= i2c_dev->msg_num - 1); - reinit_completion(&i2c_dev->complete); - cr1 = readl_relaxed(base + STM32F7_I2C_CR1); cr2 = readl_relaxed(base + STM32F7_I2C_CR2); @@ -1728,6 +1726,8 @@ static int stm32f7_i2c_xfer_core(struct i2c_adapter *i2c_adap, if (ret) goto pm_free; + reinit_completion(&i2c_dev->complete); + stm32f7_i2c_xfer_msg(i2c_dev, msgs); if (!i2c_dev->atomic) @@ -2253,7 +2253,7 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) snprintf(adap->name, sizeof(adap->name), "STM32F7 I2C(%pa)", &res->start); adap->owner = THIS_MODULE; - adap->timeout = 2 * HZ; + adap->timeout = 8 * HZ; adap->retries = 3; adap->algo = &stm32f7_i2c_algo; adap->dev.parent = &pdev->dev; -- cgit v1.2.3 From 10161b4a791d5c4b7ea16512c1ddb133c3f8f953 Mon Sep 17 00:00:00 2001 From: Weinan Liu Date: Thu, 30 Apr 2026 23:28:51 +0000 Subject: iommu/amd: Fix precedence order in set_dte_passthrough() Bitwise OR | operator has a higher precedence than the ternary ?: operatior. It will be incorrectly evaluated as: new->data[1] |= (FIELD_PREP(...) | dev_data->ats_enabled) ? DTE_FLAG_IOTLB : 0; Wrap the conditional operation in parentheses to enforce the correct evaluation order. Fixes: 93eee2a49c1b ("iommu/amd: Refactor logic to program the host page table in DTE") Signed-off-by: Weinan Liu Reviewed-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 157505d96fdd..f78e23f03938 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2149,7 +2149,8 @@ static void set_dte_passthrough(struct iommu_dev_data *dev_data, new->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW; new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domain->id) | - (dev_data->ats_enabled) ? DTE_FLAG_IOTLB : 0; + (dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0); + } static void set_dte_entry(struct amd_iommu *iommu, -- cgit v1.2.3 From c5f25f5800f56f1754d9eeb3ced7c1e08c29119a Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 20 Mar 2026 13:23:24 +0100 Subject: dt-bindings: i2c: apple,i2c: Add t8122 compatible The i2c block on the Apple silicon t8122 (M3) SoC is compatible with the existing driver. Add "apple,t8122-i2c" as SoC specific compatible under "apple,t8103-i2c" used by the deriver. Signed-off-by: Janne Grunau Acked-by: Andi Shyti Acked-by: Rob Herring (Arm) Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/apple,i2c.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml index 500a965bdb7a..9e59200ad37b 100644 --- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml @@ -22,7 +22,9 @@ properties: compatible: oneOf: - items: - - const: apple,t6020-i2c + - enum: + - apple,t6020-i2c + - apple,t8122-i2c - const: apple,t8103-i2c - items: - enum: -- cgit v1.2.3 From 8de779dc40d35d39fa07387b6f921eb11df0f511 Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Sun, 3 May 2026 20:51:10 -0700 Subject: fbdev: udlfb: add vm_ops to dlfb_ops_mmap to prevent use-after-free dlfb_ops_mmap() uses remap_pfn_range() to map vmalloc framebuffer pages to userspace but sets no vm_ops on the VMA. This means the kernel cannot track active mmaps. When dlfb_realloc_framebuffer() replaces the backing buffer via FBIOPUT_VSCREENINFO, existing mmap PTEs are not invalidated. On USB disconnect, dlfb_ops_destroy() calls vfree() on the old pages while userspace PTEs still reference them, resulting in a use-after-free: the process retains read/write access to freed kernel pages. Add vm_operations_struct with open/close callbacks that maintain an atomic mmap_count on struct dlfb_data. In dlfb_realloc_framebuffer(), check mmap_count and return -EBUSY if the buffer is currently mapped, preventing buffer replacement while userspace holds stale PTEs. Tested with PoC using dummy_hcd + raw_gadget USB device emulation. Signed-off-by: Rajat Gupta Acked-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- drivers/video/fbdev/udlfb.c | 31 ++++++++++++++++++++++++++++++- include/video/udlfb.h | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index c341d76bc564..fdbb8671a810 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -321,12 +321,32 @@ static int dlfb_set_video_mode(struct dlfb_data *dlfb, return retval; } +static void dlfb_vm_open(struct vm_area_struct *vma) +{ + struct dlfb_data *dlfb = vma->vm_private_data; + + atomic_inc(&dlfb->mmap_count); +} + +static void dlfb_vm_close(struct vm_area_struct *vma) +{ + struct dlfb_data *dlfb = vma->vm_private_data; + + atomic_dec(&dlfb->mmap_count); +} + +static const struct vm_operations_struct dlfb_vm_ops = { + .open = dlfb_vm_open, + .close = dlfb_vm_close, +}; + static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) { unsigned long start = vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start; unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long page, pos; + struct dlfb_data *dlfb = info->par; if (info->fbdefio) return fb_deferred_io_mmap(info, vma); @@ -358,6 +378,9 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } + vma->vm_ops = &dlfb_vm_ops; + vma->vm_private_data = dlfb; + atomic_inc(&dlfb->mmap_count); return 0; } @@ -1176,7 +1199,6 @@ static void dlfb_deferred_vfree(struct dlfb_data *dlfb, void *mem) /* * Assumes &info->lock held by caller - * Assumes no active clients have framebuffer open */ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info, u32 new_len) { @@ -1188,6 +1210,13 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info new_len = PAGE_ALIGN(new_len); if (new_len > old_len) { + if (atomic_read(&dlfb->mmap_count) > 0) { + dev_warn(info->dev, + "refusing realloc: %d active mmaps\n", + atomic_read(&dlfb->mmap_count)); + return -EBUSY; + } + /* * Alloc system memory for virtual framebuffer */ diff --git a/include/video/udlfb.h b/include/video/udlfb.h index 58fb5732831a..ab34790d57ec 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h @@ -56,6 +56,7 @@ struct dlfb_data { spinlock_t damage_lock; struct work_struct damage_work; struct fb_ops ops; + atomic_t mmap_count; /* blit-only rendering path metrics, exposed through sysfs */ atomic_t bytes_rendered; /* raw pixel-bytes driver asked to render */ atomic_t bytes_identical; /* saved effort with backbuffer comparison */ -- cgit v1.2.3 From 9998e388be9930c106eb5904c23ecf2162407527 Mon Sep 17 00:00:00 2001 From: Niels Franke Date: Sat, 18 Apr 2026 07:37:19 +0200 Subject: i2c: acpi: Add ELAN0678 to i2c_acpi_force_100khz_device_ids The ELAN0678 touchpad (04F3:3195) found in the Lenovo ThinkPad X13 exhibits excessive smoothing when the I2C bus runs at 400KHz, making the touchpad feel sluggish when plugged into AC power. This is the same issue previously fixed for ELAN06FA. The device's ACPI table (Lenovo TP-R22) specifies 0x00061A80 (400KHz) for the I2cSerialBusV2 descriptor. Forcing the bus to 100KHz eliminates the sluggish behavior. Signed-off-by: Niels Franke Acked-by: Mika Westerberg [wsa: kept the sorting] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 2cbd31f77667..28c0e4884a7f 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -371,6 +371,7 @@ static const struct acpi_device_id i2c_acpi_force_100khz_device_ids[] = { * a 400KHz frequency. The root cause of the issue is not known. */ { "DLL0945", 0 }, + { "ELAN0678", 0 }, { "ELAN06FA", 0 }, {} }; -- cgit v1.2.3 From 32c91e8ee039777d0b95b914633fc6a42607959c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Apr 2026 12:49:10 +0200 Subject: staging: vme_user: fix root device leak on init failure Make sure to deregister and free the root device in case module initialisation fails. Fixes: 658bcdae9c67 ("vme: Adding Fake VME driver") Cc: stable@vger.kernel.org # 4.9 Cc: Martyn Welch Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260424104910.2619349-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vme_user/vme_fake.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c index be4ad47ed526..8abaa3165fbb 100644 --- a/drivers/staging/vme_user/vme_fake.c +++ b/drivers/staging/vme_user/vme_fake.c @@ -1230,6 +1230,8 @@ err_master: err_driver: kfree(fake_bridge); err_struct: + root_device_unregister(vme_root); + return retval; } -- cgit v1.2.3 From 617eb7c0961a8dfcfc811844a6396e406b2923ea Mon Sep 17 00:00:00 2001 From: Mingyu Wang <25181214217@stu.xidian.edu.cn> Date: Mon, 27 Apr 2026 10:57:45 +0800 Subject: i2c: dev: prevent integer overflow in I2C_TIMEOUT ioctl While fuzzing with Syzkaller, a persistent `schedule_timeout: wrong timeout value` warning was observed, accompanied by SMBus controller state machine corruption. The I2C_TIMEOUT ioctl accepts a user-provided timeout in multiples of 10 ms. The user argument is checked against INT_MAX, but it is subsequently multiplied by 10 before being passed to msecs_to_jiffies(). A malicious user can pass a large value (e.g., 429496729) that passes the `arg > INT_MAX` check but overflows when multiplied by 10. This results in a truncated 32-bit unsigned value that bypasses the internal `(int)m < 0` check in `msecs_to_jiffies()`. The truncated value is then assigned to `client->adapter->timeout` (a signed 32-bit int), which is reinterpreted as a negative number. When passed to wait_for_completion_timeout(), this negative value undergoes sign extension to a 64-bit unsigned long, triggering the `schedule_timeout` warning and causing premature returns. This leaves the SMBus state machine in an unrecoverable state, constituting a local Denial of Service (DoS). Fix this by bounding the user argument to `INT_MAX / 10`. Signed-off-by: Mingyu Wang <25181214217@stu.xidian.edu.cn> [wsa: move the comment as well] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 7bbe0263411e..ccaac5e29f90 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -487,12 +487,13 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) client->adapter->retries = arg; break; case I2C_TIMEOUT: - if (arg > INT_MAX) + /* + * For historical reasons, user-space sets the timeout value in + * units of 10 ms. + */ + if (arg > INT_MAX / 10) return -EINVAL; - /* For historical reasons, user-space sets the timeout - * value in units of 10 ms. - */ client->adapter->timeout = msecs_to_jiffies(arg * 10); break; default: -- cgit v1.2.3 From bc851db06045a40c18233dd76ef0562d7f8bb6db Mon Sep 17 00:00:00 2001 From: Shyam Sunder Reddy Padira Date: Tue, 14 Apr 2026 12:43:06 +0530 Subject: staging: rtl8723bs: os_dep: avoid NULL pointer dereference in rtw_cbuf_alloc The return value of kzalloc_flex() is used without ensuring that the allocation succeeded, and the pointer is dereferenced unconditionally. Guard the access to the allocated structure to avoid a potential NULL pointer dereference if the allocation fails. Fixes: 980cd426a257 ("staging: rtl8723bs: replace rtw_zmalloc() with kzalloc()") Cc: stable Signed-off-by: Shyam Sunder Reddy Padira Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20260414071308.4781-2-shyamsunderreddypadira@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/osdep_service.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/osdep_service.c b/drivers/staging/rtl8723bs/os_dep/osdep_service.c index 7959daeabc6f..4cfdf7c62344 100644 --- a/drivers/staging/rtl8723bs/os_dep/osdep_service.c +++ b/drivers/staging/rtl8723bs/os_dep/osdep_service.c @@ -194,7 +194,8 @@ struct rtw_cbuf *rtw_cbuf_alloc(u32 size) struct rtw_cbuf *cbuf; cbuf = kzalloc_flex(*cbuf, bufs, size); - cbuf->size = size; + if (cbuf) + cbuf->size = size; return cbuf; } -- cgit v1.2.3 From 37b0dc5e279f35036fb638d1e187197b6c05a76d Mon Sep 17 00:00:00 2001 From: Hongling Zeng Date: Sun, 3 May 2026 12:17:44 +0800 Subject: parisc: Fix IRQ leak in LASI driver When request_irq() succeeds but gsc_common_setup() fails later, the IRQ is never released. Fix this by adding proper error handling with goto labels to ensure resources are released in LIFO order. Detected by Smatch: drivers/parisc/lasi.c:216 lasi_init_chip() warn: 'lasi->gsc_irq.irq' from request_irq() not released on lines: 207. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202604180957.4QdAIxP6-lkp@intel.com/ Signed-off-by: Hongling Zeng Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- drivers/parisc/lasi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index ef6125d83878..a5b80cd5cc37 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c @@ -193,8 +193,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) ret = request_irq(lasi->gsc_irq.irq, gsc_asic_intr, 0, "lasi", lasi); if (ret < 0) { - kfree(lasi); - return ret; + goto err_free; } /* enable IRQ's for devices below LASI */ @@ -203,8 +202,7 @@ static int __init lasi_init_chip(struct parisc_device *dev) /* Done init'ing, register this driver */ ret = gsc_common_setup(dev, lasi); if (ret) { - kfree(lasi); - return ret; + goto err_irq; } gsc_fixup_irqs(dev, lasi, lasi_choose_irq); @@ -214,6 +212,12 @@ static int __init lasi_init_chip(struct parisc_device *dev) SYS_OFF_PRIO_DEFAULT, lasi_power_off, lasi); return ret; + +err_irq: + free_irq(lasi->gsc_irq.irq, lasi); +err_free: + kfree(lasi); + return ret; } static struct parisc_device_id lasi_tbl[] __initdata = { -- cgit v1.2.3 From b47bc7c022ddab7c79a84dd5f3f0d07fe09ec786 Mon Sep 17 00:00:00 2001 From: "Nikola Z. Ivanov" Date: Wed, 15 Apr 2026 23:50:21 +0300 Subject: i2c: Compare the return value of gpiod_get_direction against GPIO_LINE_DIRECTION_OUT The GPIO_LINE_DIRECTION_* definitions have just recently been exposed to gpio consumers.h by breaking them out in a separate defs.h file. Use this to validate the gpio direction instead of the hard-coded literal. Signed-off-by: Nikola Z. Ivanov Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9c46147e3506..a2132d70fb36 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -445,8 +445,7 @@ static int i2c_init_recovery(struct i2c_adapter *adap) bri->set_scl = set_scl_gpio_value; if (bri->sda_gpiod) { bri->get_sda = get_sda_gpio_value; - /* FIXME: add proper flag instead of '0' once available */ - if (gpiod_get_direction(bri->sda_gpiod) == 0) + if (gpiod_get_direction(bri->sda_gpiod) == GPIO_LINE_DIRECTION_OUT) bri->set_sda = set_sda_gpio_value; } } else if (bri->recover_bus == i2c_generic_scl_recovery) { -- cgit v1.2.3 From 088f65e206087bf903743bd18417261d7a4c9644 Mon Sep 17 00:00:00 2001 From: Ivan Hu Date: Thu, 30 Apr 2026 15:41:07 +0800 Subject: x86/efi: Fix graceful fault handling after FPU softirq changes Since commit d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs"), kernel_fpu_begin() calls fpregs_lock() which uses local_bh_disable() instead of the previous preempt_disable(). This sets SOFTIRQ_OFFSET in preempt_count during the entire EFI runtime service call, causing in_interrupt() to return true in normal task context. The graceful page fault handler efi_crash_gracefully_on_page_fault() uses in_interrupt() to bail out for faults in real interrupt context. With SOFTIRQ_OFFSET now set, the handler always bails out, leaving EFI firmware page faults unhandled. This escalates to die() which also sees in_interrupt() as true and calls panic("Fatal exception in interrupt"), resulting in a hard system freeze. On systems with buggy firmware that triggers page faults during EFI runtime calls (e.g., accessing unmapped memory in GetTime()), this causes an unrecoverable hang instead of the expected graceful EFI_ABORTED recovery. Fix by replacing in_interrupt() with !in_task(). This preserves the original intent of bailing for interrupts or NMI faults, while no longer falsely triggering from the FPU code path's local_bh_disable(). Fixes: d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs") Cc: Signed-off-by: Ivan Hu [ardb: Sashiko spotted that using 'in_hardirq() || in_nmi()' leaves a window where a softirq may be taken before fpregs_lock() is called, but after efi_rts_work.efi_rts_id has been assigned, and any page faults occurring in that window will then be misidentified as having been caused by the firmware. Instead, use !in_task(), which incorporates in_serving_softirq(). ] Signed-off-by: Ard Biesheuvel --- arch/x86/platform/efi/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index df24ffc6105d..c8f5e094ed9d 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -770,7 +770,7 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) * If we get an interrupt/NMI while processing an EFI runtime service * then this is a regular OOPS, not an EFI failure. */ - if (in_interrupt()) + if (!in_task()) return; /* -- cgit v1.2.3 From 6036b5067a8199ba7a2dc7b377d4b9dd276d5f9e Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 15 Apr 2026 01:23:39 +0800 Subject: i2c: stub: Reject I2C block transfers with invalid length The I2C_SMBUS_I2C_BLOCK_DATA case in stub_xfer() uses data->block[0] as the transfer length. The existing check only clamps it to avoid overrunning the chip->words[256] register array, but does not validate it against I2C_SMBUS_BLOCK_MAX (32), which is the limit of the union i2c_smbus_data.block buffer (34 bytes total). The driver is a development/test tool (CONFIG_I2C_STUB=m, not built by default) that must be loaded with a chip_addr= parameter. A local user with access to /dev/i2c-* can issue an I2C_SMBUS ioctl with I2C_SMBUS_I2C_BLOCK_DATA and data->block[0] > 32, causing stub_xfer() to read or write past the end of the union i2c_smbus_data.block buffer: BUG: KASAN: stack-out-of-bounds in stub_xfer (drivers/i2c/i2c-stub.c:223) Read of size 1 at addr ffff88800abcfd92 by task exploit/81 Call Trace: stub_xfer (drivers/i2c/i2c-stub.c:223) __i2c_smbus_xfer (drivers/i2c/i2c-core-smbus.c:593) i2c_smbus_xfer (drivers/i2c/i2c-core-smbus.c:536) i2cdev_ioctl_smbus (drivers/i2c/i2c-dev.c:391) i2cdev_ioctl (drivers/i2c/i2c-dev.c:478) __x64_sys_ioctl (fs/ioctl.c:583) do_syscall_64 (arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) The bug exists because i2c-stub implements .smbus_xfer directly, bypassing the I2C_SMBUS_BLOCK_MAX validation in i2c_smbus_xfer_emulated(). The I2C_SMBUS_BLOCK_DATA case in the same function correctly validates against I2C_SMBUS_BLOCK_MAX, but the I2C_SMBUS_I2C_BLOCK_DATA case does not. Fix by rejecting transfers with data->block[0] == 0 or data->block[0] > I2C_SMBUS_BLOCK_MAX with -EINVAL, consistent with both the I2C_SMBUS_BLOCK_DATA case in the same function and the I2C_SMBUS_I2C_BLOCK_DATA validation in i2c_smbus_xfer_emulated(). Fixes: 4710317891e4 ("i2c-stub: Implement I2C block support") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-stub.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c index fbb0db41b10e..04314e3ed24c 100644 --- a/drivers/i2c/i2c-stub.c +++ b/drivers/i2c/i2c-stub.c @@ -214,6 +214,11 @@ static s32 stub_xfer(struct i2c_adapter *adap, u16 addr, unsigned short flags, * We ignore banks here, because banked chips don't use I2C * block transfers */ + if (data->block[0] == 0 || + data->block[0] > I2C_SMBUS_BLOCK_MAX) { + ret = -EINVAL; + break; + } if (data->block[0] > 256 - command) /* Avoid overrun */ data->block[0] = 256 - command; len = data->block[0]; -- cgit v1.2.3 From 359b626d362127451dbe8a687ac5c240f896ae2e Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Fri, 1 May 2026 14:45:14 -0300 Subject: ALSA: pcmtest: Return -EFAULT on pattern read copy failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pattern_write() reports -EFAULT when copy_from_user() fails, but pattern_read() converts copy_to_user() failures into a zero-length read. That makes a userspace buffer fault look like EOF instead of reporting the actual error. Return -EFAULT from pattern_read() when copying the pattern data to userspace fails, and update the file offset only after a successful copy. Fixes: 315a3d57c64c ("ALSA: Implement the new Virtual PCM Test Driver") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260501-alsa-pcmtest-pattern-read-efault-v1-1-53e1e8c11dda@gmail.com Signed-off-by: Takashi Iwai --- sound/drivers/pcmtest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c index 5bfec4c7bf71..7f93557b51ec 100644 --- a/sound/drivers/pcmtest.c +++ b/sound/drivers/pcmtest.c @@ -679,9 +679,9 @@ static ssize_t pattern_read(struct file *file, char __user *u_buff, size_t len, return 0; if (copy_to_user(u_buff, patt_buf->buf + *off, to_read)) - to_read = 0; - else - *off += to_read; + return -EFAULT; + + *off += to_read; return to_read; } -- cgit v1.2.3 From e366ce8b22ec68189ffea2bb8009f7b20d549b0f Mon Sep 17 00:00:00 2001 From: "Uwe Kleine-König (The Capable Hub)" Date: Thu, 30 Apr 2026 17:45:24 +0200 Subject: ASoC: codecs: ab8500: Remove suspicious code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit anc_configure() passed values from drvdata->anc_fir_values[], drvdata->anc_iir_values[] and drvdata->sid_fir_values[] as register offset to snd_soc_component_read(). The content of these arrays are user controllable via the component controls "ANC FIR Coefficients", "ANC IIR Coefficients" and "Sidetone FIR Coefficients" which I assume are supposed to hold register values, not register offsets. Without a datasheet for that component and given that before commit a201aef1a88b ("ASoC: codecs: ab8500: Fix casting of private data") the arrays overlapped with driver control structures and thus didn't work properly since 2012, drop that functionality and let someone repair it who has an actual need for it. With the core functionally removed several code parts become essentially unused and are removed, too. Reported-by: Sashiko (gemini/gemini-3.1-pro-preview) Link: https://sashiko.dev/#/patchset/20260428192255.2294705-2-u.kleine-koenig%40baylibre.com Fixes: 679d7abdc754 ("ASoC: codecs: Add AB8500 codec-driver") Signed-off-by: Uwe Kleine-König (The Capable Hub) Link: https://patch.msgid.link/20260430154524.338912-2-u.kleine-koenig@baylibre.com Signed-off-by: Mark Brown --- sound/soc/codecs/ab8500-codec.c | 304 +--------------------------------------- 1 file changed, 3 insertions(+), 301 deletions(-) diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c index 8ab2e60f80b4..6e8ef9cd1b31 100644 --- a/sound/soc/codecs/ab8500-codec.c +++ b/sound/soc/codecs/ab8500-codec.c @@ -60,19 +60,6 @@ low before proceeding with the configuration sequence */ #define AB8500_ANC_SM_DELAY 2000 -#define AB8500_FILTER_CONTROL(xname, xcount, xmin, xmax) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \ - .info = filter_control_info, \ - .get = filter_control_get, .put = filter_control_put, \ - .private_value = (unsigned long)&(struct filter_control) \ - {.count = xcount, .min = xmin, .max = xmax} } - -struct filter_control { - long min, max; - unsigned int count; - long value[128]; -}; - /* Sidetone states */ static const char * const enum_sid_state[] = { "Unconfigured", @@ -85,45 +72,13 @@ enum sid_state { SID_FIR_CONFIGURED = 2, }; -static const char * const enum_anc_state[] = { - "Unconfigured", - "Apply FIR and IIR", - "FIR and IIR are configured", - "Apply FIR", - "FIR is configured", - "Apply IIR", - "IIR is configured" -}; -enum anc_state { - ANC_UNCONFIGURED = 0, - ANC_APPLY_FIR_IIR = 1, - ANC_FIR_IIR_CONFIGURED = 2, - ANC_APPLY_FIR = 3, - ANC_FIR_CONFIGURED = 4, - ANC_APPLY_IIR = 5, - ANC_IIR_CONFIGURED = 6 -}; - -/* Analog microphones */ -enum amic_idx { - AMIC_IDX_1A, - AMIC_IDX_1B, - AMIC_IDX_2 -}; - /* Private data for AB8500 device-driver */ struct ab8500_codec_drvdata { struct regmap *regmap; struct mutex ctrl_lock; /* Sidetone */ - long *sid_fir_values; enum sid_state sid_status; - - /* ANC */ - long *anc_fir_values; - long *anc_iir_values; - enum anc_state anc_status; }; static inline const char *amic_micbias_str(enum amic_micbias micbias) @@ -1024,89 +979,6 @@ static const struct snd_soc_dapm_route ab8500_dapm_routes_mic2_vamicx[] = { {"MIC2 V-AMICx Enable", NULL, "V-AMIC2"}, }; -/* ANC FIR-coefficients configuration sequence */ -static void anc_fir(struct snd_soc_component *component, - unsigned int bnk, unsigned int par, unsigned int val) -{ - if (par == 0 && bnk == 0) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCFIRUPDATE), - BIT(AB8500_ANCCONF1_ANCFIRUPDATE)); - - snd_soc_component_write(component, AB8500_ANCCONF5, val >> 8 & 0xff); - snd_soc_component_write(component, AB8500_ANCCONF6, val & 0xff); - - if (par == AB8500_ANC_FIR_COEFFS - 1 && bnk == 1) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCFIRUPDATE), 0); -} - -/* ANC IIR-coefficients configuration sequence */ -static void anc_iir(struct snd_soc_component *component, unsigned int bnk, - unsigned int par, unsigned int val) -{ - if (par == 0) { - if (bnk == 0) { - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRINIT), - BIT(AB8500_ANCCONF1_ANCIIRINIT)); - usleep_range(AB8500_ANC_SM_DELAY, AB8500_ANC_SM_DELAY*2); - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRINIT), 0); - usleep_range(AB8500_ANC_SM_DELAY, AB8500_ANC_SM_DELAY*2); - } else { - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRUPDATE), - BIT(AB8500_ANCCONF1_ANCIIRUPDATE)); - } - } else if (par > 3) { - snd_soc_component_write(component, AB8500_ANCCONF7, 0); - snd_soc_component_write(component, AB8500_ANCCONF8, val >> 16 & 0xff); - } - - snd_soc_component_write(component, AB8500_ANCCONF7, val >> 8 & 0xff); - snd_soc_component_write(component, AB8500_ANCCONF8, val & 0xff); - - if (par == AB8500_ANC_IIR_COEFFS - 1 && bnk == 1) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ANCIIRUPDATE), 0); -} - -/* ANC IIR-/FIR-coefficients configuration sequence */ -static void anc_configure(struct snd_soc_component *component, - bool apply_fir, bool apply_iir) -{ - struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - unsigned int bnk, par, val; - - dev_dbg(component->dev, "%s: Enter.\n", __func__); - - if (apply_fir) - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ENANC), 0); - - snd_soc_component_update_bits(component, AB8500_ANCCONF1, - BIT(AB8500_ANCCONF1_ENANC), BIT(AB8500_ANCCONF1_ENANC)); - - if (apply_fir) - for (bnk = 0; bnk < AB8500_NR_OF_ANC_COEFF_BANKS; bnk++) - for (par = 0; par < AB8500_ANC_FIR_COEFFS; par++) { - val = snd_soc_component_read(component, - drvdata->anc_fir_values[par]); - anc_fir(component, bnk, par, val); - } - - if (apply_iir) - for (bnk = 0; bnk < AB8500_NR_OF_ANC_COEFF_BANKS; bnk++) - for (par = 0; par < AB8500_ANC_IIR_COEFFS; par++) { - val = snd_soc_component_read(component, - drvdata->anc_iir_values[par]); - anc_iir(component, bnk, par, val); - } - - dev_dbg(component->dev, "%s: Exit.\n", __func__); -} - /* * Control-events */ @@ -1130,7 +1002,7 @@ static int sid_status_control_put(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - unsigned int param, sidconf, val; + unsigned int param, sidconf; int status = 1; dev_dbg(component->dev, "%s: Enter\n", __func__); @@ -1159,9 +1031,8 @@ static int sid_status_control_put(struct snd_kcontrol *kcontrol, snd_soc_component_write(component, AB8500_SIDFIRADR, 0); for (param = 0; param < AB8500_SID_FIR_COEFFS; param++) { - val = snd_soc_component_read(component, drvdata->sid_fir_values[param]); - snd_soc_component_write(component, AB8500_SIDFIRCOEF1, val >> 8 & 0xff); - snd_soc_component_write(component, AB8500_SIDFIRCOEF2, val & 0xff); + snd_soc_component_write(component, AB8500_SIDFIRCOEF1, 0); + snd_soc_component_write(component, AB8500_SIDFIRCOEF2, 0); } snd_soc_component_update_bits(component, AB8500_SIDFIRADR, @@ -1180,136 +1051,6 @@ out: return status; } -static int anc_status_control_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - - mutex_lock(&drvdata->ctrl_lock); - ucontrol->value.enumerated.item[0] = drvdata->anc_status; - mutex_unlock(&drvdata->ctrl_lock); - - return 0; -} - -static int anc_status_control_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); - struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(component->dev); - struct device *dev = component->dev; - bool apply_fir, apply_iir; - unsigned int req; - int status; - - dev_dbg(dev, "%s: Enter.\n", __func__); - - mutex_lock(&drvdata->ctrl_lock); - - req = ucontrol->value.enumerated.item[0]; - if (req >= ARRAY_SIZE(enum_anc_state)) { - status = -EINVAL; - goto cleanup; - } - if (req != ANC_APPLY_FIR_IIR && req != ANC_APPLY_FIR && - req != ANC_APPLY_IIR) { - dev_err(dev, "%s: ERROR: Unsupported status to set '%s'!\n", - __func__, enum_anc_state[req]); - status = -EINVAL; - goto cleanup; - } - apply_fir = req == ANC_APPLY_FIR || req == ANC_APPLY_FIR_IIR; - apply_iir = req == ANC_APPLY_IIR || req == ANC_APPLY_FIR_IIR; - - status = snd_soc_dapm_force_enable_pin(dapm, "ANC Configure Input"); - if (status < 0) { - dev_err(dev, - "%s: ERROR: Failed to enable power (status = %d)!\n", - __func__, status); - goto cleanup; - } - snd_soc_dapm_sync(dapm); - - anc_configure(component, apply_fir, apply_iir); - - if (apply_fir) { - if (drvdata->anc_status == ANC_IIR_CONFIGURED) - drvdata->anc_status = ANC_FIR_IIR_CONFIGURED; - else if (drvdata->anc_status != ANC_FIR_IIR_CONFIGURED) - drvdata->anc_status = ANC_FIR_CONFIGURED; - } - if (apply_iir) { - if (drvdata->anc_status == ANC_FIR_CONFIGURED) - drvdata->anc_status = ANC_FIR_IIR_CONFIGURED; - else if (drvdata->anc_status != ANC_FIR_IIR_CONFIGURED) - drvdata->anc_status = ANC_IIR_CONFIGURED; - } - - status = snd_soc_dapm_disable_pin(dapm, "ANC Configure Input"); - snd_soc_dapm_sync(dapm); - -cleanup: - mutex_unlock(&drvdata->ctrl_lock); - - if (status < 0) - dev_err(dev, "%s: Unable to configure ANC! (status = %d)\n", - __func__, status); - - dev_dbg(dev, "%s: Exit.\n", __func__); - - return (status < 0) ? status : 1; -} - -static int filter_control_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - struct filter_control *fc = - (struct filter_control *)kcontrol->private_value; - - uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = fc->count; - uinfo->value.integer.min = fc->min; - uinfo->value.integer.max = fc->max; - - return 0; -} - -static int filter_control_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct ab8500_codec_drvdata *drvdata = snd_soc_component_get_drvdata(component); - struct filter_control *fc = - (struct filter_control *)kcontrol->private_value; - unsigned int i; - - mutex_lock(&drvdata->ctrl_lock); - for (i = 0; i < fc->count; i++) - ucontrol->value.integer.value[i] = fc->value[i]; - mutex_unlock(&drvdata->ctrl_lock); - - return 0; -} - -static int filter_control_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct ab8500_codec_drvdata *drvdata = snd_soc_component_get_drvdata(component); - struct filter_control *fc = - (struct filter_control *)kcontrol->private_value; - unsigned int i; - - mutex_lock(&drvdata->ctrl_lock); - for (i = 0; i < fc->count; i++) - fc->value[i] = ucontrol->value.integer.value[i]; - mutex_unlock(&drvdata->ctrl_lock); - - return 0; -} - /* * Controls - Non-DAPM ASoC */ @@ -1597,7 +1338,6 @@ static SOC_ENUM_SINGLE_DECL(soc_enum_bfifomast, static SOC_ENUM_SINGLE_EXT_DECL(soc_enum_sidstate, enum_sid_state); /* ANC */ -static SOC_ENUM_SINGLE_EXT_DECL(soc_enum_ancstate, enum_anc_state); static struct snd_kcontrol_new ab8500_ctrls[] = { /* Charge pump */ @@ -1873,8 +1613,6 @@ static struct snd_kcontrol_new ab8500_ctrls[] = { AB8500_FIFOCONF6_BFIFOSAMPLE_MAX, 0), /* ANC */ - SOC_ENUM_EXT("ANC Status", soc_enum_ancstate, - anc_status_control_get, anc_status_control_put), SOC_SINGLE_XR_SX("ANC Warp Delay Shift", AB8500_ANCCONF2, 1, AB8500_ANCCONF2_SHIFT, AB8500_ANCCONF2_MIN, AB8500_ANCCONF2_MAX, 0), @@ -1895,21 +1633,6 @@ static struct snd_kcontrol_new ab8500_ctrls[] = { AB8500_SIDFIRADR, AB8500_SIDFIRADR_FIRSIDSET, 0), }; -static struct snd_kcontrol_new ab8500_filter_controls[] = { - AB8500_FILTER_CONTROL("ANC FIR Coefficients", AB8500_ANC_FIR_COEFFS, - AB8500_ANC_FIR_COEFF_MIN, AB8500_ANC_FIR_COEFF_MAX), - AB8500_FILTER_CONTROL("ANC IIR Coefficients", AB8500_ANC_IIR_COEFFS, - AB8500_ANC_IIR_COEFF_MIN, AB8500_ANC_IIR_COEFF_MAX), - AB8500_FILTER_CONTROL("Sidetone FIR Coefficients", - AB8500_SID_FIR_COEFFS, AB8500_SID_FIR_COEFF_MIN, - AB8500_SID_FIR_COEFF_MAX) -}; -enum ab8500_filter { - AB8500_FILTER_ANC_FIR = 0, - AB8500_FILTER_ANC_IIR = 1, - AB8500_FILTER_SID_FIR = 2, -}; - /* * Extended interface for codec-driver */ @@ -2454,7 +2177,6 @@ static int ab8500_codec_probe(struct snd_soc_component *component) struct device_node *np = dev->of_node; struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(dev); struct ab8500_codec_platform_data codec_pdata; - struct filter_control *fc; int status; dev_dbg(dev, "%s: Enter.\n", __func__); @@ -2486,25 +2208,6 @@ static int ab8500_codec_probe(struct snd_soc_component *component) snd_soc_component_write(component, AB8500_SHORTCIRCONF, BIT(AB8500_SHORTCIRCONF_HSZCDDIS)); - /* Add filter controls */ - status = snd_soc_add_component_controls(component, ab8500_filter_controls, - ARRAY_SIZE(ab8500_filter_controls)); - if (status < 0) { - dev_err(dev, - "%s: failed to add ab8500 filter controls (%d).\n", - __func__, status); - return status; - } - fc = (struct filter_control *) - ab8500_filter_controls[AB8500_FILTER_ANC_FIR].private_value; - drvdata->anc_fir_values = (long *)fc->value; - fc = (struct filter_control *) - ab8500_filter_controls[AB8500_FILTER_ANC_IIR].private_value; - drvdata->anc_iir_values = (long *)fc->value; - fc = (struct filter_control *) - ab8500_filter_controls[AB8500_FILTER_SID_FIR].private_value; - drvdata->sid_fir_values = (long *)fc->value; - snd_soc_dapm_disable_pin(dapm, "ANC Configure Input"); mutex_init(&drvdata->ctrl_lock); @@ -2538,7 +2241,6 @@ static int ab8500_codec_driver_probe(struct platform_device *pdev) if (!drvdata) return -ENOMEM; drvdata->sid_status = SID_UNCONFIGURED; - drvdata->anc_status = ANC_UNCONFIGURED; dev_set_drvdata(&pdev->dev, drvdata); drvdata->regmap = devm_regmap_init(&pdev->dev, NULL, &pdev->dev, -- cgit v1.2.3 From 8acd2d7e0889ac62bc102bd7b648cd7bee04f902 Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 20:25:18 +0900 Subject: drm/qxl: Fix missing KMS poll cleanup drm_kms_helper_poll_init() initializes the output polling work and enables polling for the DRM device. qxl enables polling before calling drm_dev_register(), but the drm_dev_register() failure path tears down the modeset and device state without disabling the polling helper. The remove path also unregisters and shuts down the DRM device without first disabling the polling helper. Add matching drm_kms_helper_poll_fini() calls in both paths so the delayed polling work is cancelled before qxl tears down the associated modeset/device state. Signed-off-by: Myeonghun Pak Reviewed-by: Thomas Zimmermann Fixes: 5ff91e442652 ("qxl: use drm helper hotplug support") Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260424112543.57819-1-mhun512@gmail.com --- drivers/gpu/drm/qxl/qxl_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 2bbb1168a3ff..1e6a2392d7c6 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -118,12 +118,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Complete initialization. */ ret = drm_dev_register(&qdev->ddev, ent->driver_data); if (ret) - goto modeset_cleanup; + goto poll_fini; drm_client_setup(&qdev->ddev, NULL); return 0; -modeset_cleanup: +poll_fini: + drm_kms_helper_poll_fini(&qdev->ddev); qxl_modeset_fini(qdev); unload: qxl_device_fini(qdev); @@ -154,6 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + drm_kms_helper_poll_fini(dev); drm_dev_unregister(dev); drm_atomic_helper_shutdown(dev); if (pci_is_vga(pdev) && pdev->revision < 5) -- cgit v1.2.3 From c28c22c8cfbd43f2ad71a157324d9fbebc0d0f2e Mon Sep 17 00:00:00 2001 From: Francesco Lavra Date: Tue, 10 Feb 2026 18:35:45 +0100 Subject: drm/fb-helper: Fix clipping when damage area spans a single scanline When the damage area resulting from a dirty memory range spans a single scanline, the width of the rectangle is calculated dynamically because it may not coincide with the framebuffer width. If the dirty range ends exactly at the end of the scanline, the `bit_end` variable is incorrectly assigned a 0 value, which results in a bogus clip rectangle where the x2 coordinate is 0. This prevents the dirty scanline from being flushed to the hardware. Change the calculation of the `bit_end` value to fix the x2 coordinate value in the above edge case. Fixes: ded74cafeea9 ("drm/fb-helper: Clip damage area horizontally") Signed-off-by: Francesco Lavra Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260210173545.733937-1-flavra@baylibre.com --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index a80a335f4148..1541fc8a9ac2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -490,7 +490,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; - off_t bit_end = (end % line_length) * 8; + off_t bit_end = bit_off + len * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel); -- cgit v1.2.3 From fb7415f2ab0e3c818254cbf5fb0afda71bef4333 Mon Sep 17 00:00:00 2001 From: Bruce Johnston Date: Tue, 28 Apr 2026 14:39:31 -0400 Subject: dm vdo: use GFP_NOIO for blkdev_issue_zeroout on format path GFP_NOWAIT is inappropriate when blkdev_issue_zeroout may sleep and bio_alloc can fail under pressure; use GFP_NOIO for clear_partition and vdo_clear_layout zeroout calls. Signed-off-by: Bruce Johnston Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka Fixes: fc1d43826702 ("dm vdo: save the formatted metadata to disk") --- drivers/md/dm-vdo/vdo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-vdo/vdo.c b/drivers/md/dm-vdo/vdo.c index 7bec2418c121..d0d4e0262be2 100644 --- a/drivers/md/dm-vdo/vdo.c +++ b/drivers/md/dm-vdo/vdo.c @@ -965,7 +965,7 @@ static int __must_check clear_partition(struct vdo *vdo, enum partition_id id) return blkdev_issue_zeroout(vdo_get_backing_device(vdo), partition->offset * VDO_SECTORS_PER_BLOCK, partition->count * VDO_SECTORS_PER_BLOCK, - GFP_NOWAIT, 0); + GFP_NOIO, 0); } int vdo_clear_layout(struct vdo *vdo) @@ -976,7 +976,7 @@ int vdo_clear_layout(struct vdo *vdo) result = blkdev_issue_zeroout(vdo_get_backing_device(vdo), VDO_SECTORS_PER_BLOCK, VDO_SECTORS_PER_BLOCK, - GFP_NOWAIT, 0); + GFP_NOIO, 0); if (result != VDO_SUCCESS) return result; -- cgit v1.2.3 From ec0611868f2fcf29e4c2bebdc6702d3e1f272fec Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Wed, 29 Apr 2026 17:00:50 +0800 Subject: ASoC: spacemit: fix RX DMA params not set when TX is running When TX is already running (SSCR_SSE is set), the hw_params callback returns early before setting up DMA parameters for the RX stream. This prevents the capture path from configuring its DMA data properly. Move the SSCR_SSE check after DMA parameter setup and format constraints, so both TX and RX streams get their DMA configuration regardless of whether the hardware is already enabled. The early return now only skips the register writes that would disrupt an active stream. Fixes: fce217449075 ("ASoC: spacemit: add i2s support for K1 SoC") Signed-off-by: Troy Mitchell Link: https://patch.msgid.link/20260429-k1-i2s-fix-v2-1-8d67835aaddc@linux.spacemit.com Signed-off-by: Mark Brown --- sound/soc/spacemit/k1_i2s.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/spacemit/k1_i2s.c b/sound/soc/spacemit/k1_i2s.c index 43481f387c44..5420ca2aefbd 100644 --- a/sound/soc/spacemit/k1_i2s.c +++ b/sound/soc/spacemit/k1_i2s.c @@ -148,10 +148,6 @@ static int spacemit_i2s_hw_params(struct snd_pcm_substream *substream, u32 val; int ret; - val = readl(i2s->base + SSCR); - if (val & SSCR_SSE) - return 0; - dma_data = &i2s->playback_dma_data; if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) @@ -199,6 +195,9 @@ static int spacemit_i2s_hw_params(struct snd_pcm_substream *substream, } val = readl(i2s->base + SSCR); + if (val & SSCR_SSE) + return 0; + val &= ~SSCR_DW_32BYTE; val |= data_width; writel(val, i2s->base + SSCR); -- cgit v1.2.3 From c64e77490b7e5d9dec738850f18878edb07e0f13 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 29 Apr 2026 11:53:15 +0100 Subject: ASoC: cs35l56: Fix hibernate write in runtime resume error path The error path of cs35l56_runtime_resume_common() should only write the hibernation sequence if can_hibernate is true. Something has already gone badly wrong if we ever reach the error path. But triggering hibernate on hardware that does not support it is likely to make the situation unrecoverable without a full reboot because there might not be any hardware signal to exit hibernate. Fixes: a47cf4dac7dc ("ASoC: cs35l56: Change hibernate sequence to use allow auto hibernate") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260429105315.2438298-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 033e56d5e9db..ea724101cdd1 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -851,9 +851,11 @@ out_sync: err: regcache_cache_only(cs35l56_base->regmap, true); - regmap_multi_reg_write_bypassed(cs35l56_base->regmap, - cs35l56_hibernate_seq, - ARRAY_SIZE(cs35l56_hibernate_seq)); + if (cs35l56_base->can_hibernate) { + regmap_multi_reg_write_bypassed(cs35l56_base->regmap, + cs35l56_hibernate_seq, + ARRAY_SIZE(cs35l56_hibernate_seq)); + } return ret; } -- cgit v1.2.3 From 0e60d96616640ffcf51b81a87c71e30d92385a93 Mon Sep 17 00:00:00 2001 From: Bob Song Date: Thu, 30 Apr 2026 09:49:20 +0800 Subject: ASoC: amd: yc: Add DMI quirk for MSI Bravo 15 C7VE The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: gannovera Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218402 Signed-off-by: Bob Song Link: https://patch.msgid.link/20260430014920.141276-1-songxiebing@kylinos.cn Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index c5cf45881416..dde1a144e4d4 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -479,6 +479,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VE"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit v1.2.3 From 0f9bfb84b3f0fe1406b2555fc11b45283ea21644 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 30 Apr 2026 11:11:34 +0100 Subject: ASoC: cs35l56: Fix out-of-bounds in dev_err() in cs35l56_read_onchip_spkid() Remove the incorrect use of onchip_spkid_gpios[i] in the dev_err() after regmap_read() of CS35L56_GPIO_STATUS1 returns an error. This dev_err() was incorrectly copy-pasted from one inside the for-loop, where i was valid. The read of CS35L56_GPIO_STATUS1 isn't for a specific GPIO register, so the use of onchip_spkid_gpios[i] in the error message is both irrelevant and out-of-bounds here. Fixes: 4d1e3e2c404d ("ASoC: cs35l56: Support for reading speaker ID from on-chip GPIOs") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260430101134.2655938-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index ea724101cdd1..795e2764d67e 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -1730,8 +1730,7 @@ int cs35l56_read_onchip_spkid(struct cs35l56_base *cs35l56_base) ret = regmap_read(regmap, CS35L56_GPIO_STATUS1, &val); if (ret) { - dev_err(cs35l56_base->dev, "GPIO%d status read failed: %d\n", - cs35l56_base->onchip_spkid_gpios[i] + 1, ret); + dev_err(cs35l56_base->dev, "GPIO status read failed: %d\n", ret); return ret; } -- cgit v1.2.3 From d63c219b7ff39f897da10c160a2edef76320f16c Mon Sep 17 00:00:00 2001 From: Tommaso Soncin Date: Wed, 29 Apr 2026 18:08:57 +0200 Subject: ASoC: amd: yc: Add HP OMEN Gaming Laptop 16-ap0xxx product line in quirk table Add a DMI quirk for the HP OMEN Gaming Laptop 16-ap0xxx line fixing the issue where the internal microphone was not detected. Cc: stable@vger.kernel.org Signed-off-by: Tommaso Soncin Link: https://patch.msgid.link/20260429160858.538986-1-soncintommaso@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index dde1a144e4d4..7a637d6b5576 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -59,6 +59,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15-fc0xxx"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "OMEN Gaming Laptop 16-ap0xxx"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -675,6 +682,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8EE4"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8E35"), + } + }, { .driver_data = &acp6x_card, .matches = { -- cgit v1.2.3 From 56d5a9eaf60af5c824a33a83e1468aa143627a62 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Thu, 30 Apr 2026 20:10:43 +0800 Subject: ASoC: sdw_utils: avoid the SDCA companion function not supported failure Treat the companion amp as generic AMP until full support for companion amp is added. Signed-off-by: Derek Fang Reviewed-by: Charles Keepax Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260430121043.552241-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 1637cc3f3d59..849ae876d7a4 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -1608,6 +1608,7 @@ int asoc_sdw_get_dai_type(u32 type) switch (type) { case SDCA_FUNCTION_TYPE_SMART_AMP: case SDCA_FUNCTION_TYPE_SIMPLE_AMP: + case SDCA_FUNCTION_TYPE_COMPANION_AMP: return SOC_SDW_DAI_TYPE_AMP; case SDCA_FUNCTION_TYPE_SMART_MIC: case SDCA_FUNCTION_TYPE_SIMPLE_MIC: -- cgit v1.2.3 From e8446a4a574d19f0fb39c06af15dbc5165079474 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Tue, 28 Apr 2026 00:07:08 -0300 Subject: ASoC: fsl_xcvr: Fix event generation for cached controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ALSA controls should return 1 from a put callback when the control value changes. fsl_xcvr_capds_put() and fsl_xcvr_tx_cs_put() both update cached control data but always return 0, so ALSA suppresses change notifications for the Capabilities Data Structure and playback IEC958 channel status controls. Compare the old and new cached values before copying the new data, and return whether the control value changed. Fixes: 28564486866f ("ASoC: fsl_xcvr: Add XCVR ASoC CPU DAI driver") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260428-asoc-fsl-xcvr-event-generation-v1-1-f21cf0812c4f@gmail.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_xcvr.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index ee16cf681488..6677d3bf36ec 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -228,10 +228,14 @@ static int fsl_xcvr_capds_put(struct snd_kcontrol *kcontrol, { struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); struct fsl_xcvr *xcvr = snd_soc_dai_get_drvdata(dai); + int changed; - memcpy(xcvr->cap_ds, ucontrol->value.bytes.data, FSL_XCVR_CAPDS_SIZE); + changed = memcmp(xcvr->cap_ds, ucontrol->value.bytes.data, + sizeof(xcvr->cap_ds)) != 0; + memcpy(xcvr->cap_ds, ucontrol->value.bytes.data, + sizeof(xcvr->cap_ds)); - return 0; + return changed; } static struct snd_kcontrol_new fsl_xcvr_earc_capds_kctl = { @@ -1040,10 +1044,15 @@ static int fsl_xcvr_tx_cs_put(struct snd_kcontrol *kcontrol, { struct snd_soc_dai *dai = snd_kcontrol_chip(kcontrol); struct fsl_xcvr *xcvr = snd_soc_dai_get_drvdata(dai); + int changed; - memcpy(xcvr->tx_iec958.status, ucontrol->value.iec958.status, 24); + changed = memcmp(xcvr->tx_iec958.status, + ucontrol->value.iec958.status, + sizeof(xcvr->tx_iec958.status)) != 0; + memcpy(xcvr->tx_iec958.status, ucontrol->value.iec958.status, + sizeof(xcvr->tx_iec958.status)); - return 0; + return changed; } static struct snd_kcontrol_new fsl_xcvr_rx_ctls[] = { -- cgit v1.2.3 From 24e0fd8b852062d5e8a740f7945eaa26818adce8 Mon Sep 17 00:00:00 2001 From: John Madieu Date: Fri, 1 May 2026 13:59:49 +0000 Subject: spi: imx: Fix precedence bug in spi_imx_dma_max_wml_find() The watermark search in spi_imx_dma_max_wml_find() reads: if (!dma_data->dma_len % (i * bytes_per_word)) break; The unary ! binds tighter than %, so this parses as: if ((!dma_data->dma_len) % (i * bytes_per_word)) break; !dma_data->dma_len is 0 or 1, and `0 % x == 0` for any x; `1 % x` is 0 unless x == 1. The condition is therefore false in every case except dma_len != 0 with i * bytes_per_word == 1, i.e. i == 1 and bytes_per_word == 1. The loop almost always falls through to its end, leaving i == 0, which the post-loop fallback rewrites to 1: if (i == 0) i = 1; So spi_imx->wml ends up at 1 for essentially every DMA transfer, defeating the entire purpose of the function. The DMA engine then requests service after every single FIFO word instead of using multi-word bursts, hurting throughput on every DMA-capable variant. Add the missing parentheses so the modulo is computed first, then negated: if (!(dma_data->dma_len % (i * bytes_per_word))) break; Fixes: faa8e404ad8e ("spi: imx: support dynamic burst length for ECSPI DMA mode") Signed-off-by: John Madieu Reviewed-by: Frank Li Link: https://patch.msgid.link/20260501135951.2416527-2-john.madieu@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index e5c907c45b87..7ae8078c10ef 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1836,7 +1836,7 @@ static void spi_imx_dma_max_wml_find(struct spi_imx_data *spi_imx, unsigned int i; for (i = spi_imx->devtype_data->fifo_size / 2; i > 0; i--) { - if (!dma_data->dma_len % (i * bytes_per_word)) + if (!(dma_data->dma_len % (i * bytes_per_word))) break; } /* Use 1 as wml in case no available burst length got */ -- cgit v1.2.3 From f5b5548255040ec3bef05bcb1e9c9c3614dfa7db Mon Sep 17 00:00:00 2001 From: John Madieu Date: Fri, 1 May 2026 13:59:50 +0000 Subject: spi: imx: Fix UAF on package-1 prepare failure in spi_imx_dma_data_prepare() When transfer->len exceeds MX51_ECSPI_CTRL_MAX_BURST and is not a multiple of it, spi_imx_dma_data_prepare() splits the transfer into two DMA packages. If preparing the second package fails: ret = spi_imx_dma_tx_data_handle(spi_imx, &spi_imx->dma_data[1], transfer->tx_buf + spi_imx->dma_data[0].data_len, false); if (ret) { kfree(spi_imx->dma_data[0].dma_tx_buf); kfree(spi_imx->dma_data[0].dma_rx_buf); kfree(spi_imx->dma_data); } } return 0; the function frees the package-0 buffers and the dma_data array, then falls through to `return 0`, telling the caller the prepare succeeded. The caller then dereferences the freed dma_data array, producing a use-after-free. Return the error from the failure path so the caller takes its existing failure branch. Fixes: faa8e404ad8e ("spi: imx: support dynamic burst length for ECSPI DMA mode") Signed-off-by: John Madieu Reviewed-by: Frank Li Link: https://patch.msgid.link/20260501135951.2416527-3-john.madieu@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 7ae8078c10ef..4e3dbd01d619 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1709,6 +1709,7 @@ static int spi_imx_dma_data_prepare(struct spi_imx_data *spi_imx, kfree(spi_imx->dma_data[0].dma_tx_buf); kfree(spi_imx->dma_data[0].dma_rx_buf); kfree(spi_imx->dma_data); + return ret; } } -- cgit v1.2.3 From 894e04b7116297a6529e0c4ed90e3eb160939805 Mon Sep 17 00:00:00 2001 From: John Madieu Date: Fri, 1 May 2026 13:59:51 +0000 Subject: spi: imx: Propagate prepare_transfer() error from spi_imx_setupxfer() spi_imx_setupxfer() calls the per-variant prepare_transfer() callback and returns 0 unconditionally: spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t); return 0; mx51_ecspi_prepare_transfer() can return -EINVAL when the requested word_delay does not fit in MX51_ECSPI_PERIOD_MASK. The error is detected after a partial set of register writes (CTRL: BL, clkdiv, SMC), so the controller is left in a partially-configured state and the transfer is then submitted as if setup succeeded. Propagate the return value. The other variants' prepare_transfer callbacks all return 0, so this is a no-op for them. Fixes: a3bb4e663df3 ("spi: imx: support word delay") Signed-off-by: John Madieu Reviewed-by: Frank Li Link: https://patch.msgid.link/20260501135951.2416527-4-john.madieu@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 4e3dbd01d619..480d1e8b281f 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1382,9 +1382,7 @@ static int spi_imx_setupxfer(struct spi_device *spi, spi_imx->target_burst = t->len; } - spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t); - - return 0; + return spi_imx->devtype_data->prepare_transfer(spi_imx, spi, t); } static void spi_imx_sdma_exit(struct spi_imx_data *spi_imx) -- cgit v1.2.3 From 7672749e1496215e8683ce57cf323119033954cf Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 30 Apr 2026 11:10:18 +0100 Subject: spi: microchip-core-qspi: control built-in cs manually The coreQSPI IP supports only a single chip select, which is automagically operated by the hardware - set low when the transmit buffer first gets written to and set high when the number of bytes written to the TOTALBYTES field of the FRAMES register have been sent on the bus. Additional devices must use GPIOs for their chip selects. It was reported to me that if there are two devices attached to this QSPI controller that the in-built chip select is set low while linux tries to access the device attached to the GPIO. This went undetected as the boards that connected multiple devices to the SPI controller all exclusively used GPIOs for chip selects, not relying on the built-in chip select at all. It turns out that this was because the built-in chip select, when controlled automagically, is set low when active and high when inactive, thereby ruling out its use for active-high devices or devices that need to transmit with the chip select disabled. Modify the driver so that it controls chip select directly, retaining the behaviour for mem_ops of setting the chip select active for the entire duration of the transfer in the exec_op callback. For regular transfers, implement the set_cs callback for the core to use. As part of this, the existing setup callback, mchp_coreqspi_setup_op(), is removed. Modifying the CLKIDLE field is not safe to do during operation when there are multiple devices, so this code is removed entirely. Setting the MASTER and ENABLE fields is something that can be done once at probe, it doesn't need to be re-run for each device. Instead the new setup callback sets the built-in chip select to its inactive state for active-low devices, as the reset value of the chip select in software controlled mode is low. Fixes: 8f9cf02c88528 ("spi: microchip-core-qspi: Add regular transfers") Fixes: 8596124c4c1bc ("spi: microchip-core-qspi: Add support for microchip fpga qspi controllers") CC: stable@vger.kernel.org Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260430-hamstring-busload-f941d0347b5e@spud Signed-off-by: Mark Brown --- drivers/spi/spi-microchip-core-qspi.c | 79 ++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index eab059fb0bc2..ffa0f33a0ae0 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -74,6 +74,13 @@ #define STATUS_FLAGSX4 BIT(8) #define STATUS_MASK GENMASK(8, 0) +/* + * QSPI Direct Access register defines + */ +#define DIRECT_ACCESS_EN_SSEL BIT(0) +#define DIRECT_ACCESS_OP_SSEL BIT(1) +#define DIRECT_ACCESS_OP_SSEL_SHIFT 1 + #define BYTESUPPER_MASK GENMASK(31, 16) #define BYTESLOWER_MASK GENMASK(15, 0) @@ -158,6 +165,38 @@ static int mchp_coreqspi_set_mode(struct mchp_coreqspi *qspi, const struct spi_m return 0; } +static void mchp_coreqspi_set_cs(struct spi_device *spi, bool enable) +{ + struct mchp_coreqspi *qspi = spi_controller_get_devdata(spi->controller); + u32 val; + + val = readl(qspi->regs + REG_DIRECT_ACCESS); + + val &= ~DIRECT_ACCESS_OP_SSEL; + val |= !enable << DIRECT_ACCESS_OP_SSEL_SHIFT; + + writel(val, qspi->regs + REG_DIRECT_ACCESS); +} + +static int mchp_coreqspi_setup(struct spi_device *spi) +{ + struct mchp_coreqspi *qspi = spi_controller_get_devdata(spi->controller); + u32 val; + + /* + * Active low devices need to be specifically set to their inactive + * states during probe. + */ + if (spi->mode & SPI_CS_HIGH) + return 0; + + val = readl(qspi->regs + REG_DIRECT_ACCESS); + val |= DIRECT_ACCESS_OP_SSEL; + writel(val, qspi->regs + REG_DIRECT_ACCESS); + + return 0; +} + static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -380,19 +419,6 @@ static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_devi return 0; } -static int mchp_coreqspi_setup_op(struct spi_device *spi_dev) -{ - struct spi_controller *ctlr = spi_dev->controller; - struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); - u32 control = readl_relaxed(qspi->regs + REG_CONTROL); - - control |= (CONTROL_MASTER | CONTROL_ENABLE); - control &= ~CONTROL_CLKIDLE; - writel_relaxed(control, qspi->regs + REG_CONTROL); - - return 0; -} - static inline void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const struct spi_mem_op *op) { u32 idle_cycles = 0; @@ -483,6 +509,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o reinit_completion(&qspi->data_completion); mchp_coreqspi_config_op(qspi, op); + mchp_coreqspi_set_cs(mem->spi, true); if (op->cmd.opcode) { qspi->txbuf = &opcode; qspi->rxbuf = NULL; @@ -523,6 +550,7 @@ static int mchp_coreqspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *o err = -ETIMEDOUT; error: + mchp_coreqspi_set_cs(mem->spi, false); mutex_unlock(&qspi->op_lock); mchp_coreqspi_disable_ints(qspi); @@ -686,6 +714,7 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; int ret; + u32 num_cs, val; ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*qspi)); if (!ctlr) @@ -718,10 +747,18 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) return ret; } + /* + * The IP core only has a single CS, any more have to be provided via + * gpios + */ + if (of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs)) + num_cs = 1; + + ctlr->num_chipselect = num_cs; + ctlr->bits_per_word_mask = SPI_BPW_MASK(8); ctlr->mem_ops = &mchp_coreqspi_mem_ops; ctlr->mem_caps = &mchp_coreqspi_mem_caps; - ctlr->setup = mchp_coreqspi_setup_op; ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD; ctlr->dev.of_node = np; @@ -729,9 +766,21 @@ static int mchp_coreqspi_probe(struct platform_device *pdev) ctlr->prepare_message = mchp_coreqspi_prepare_message; ctlr->unprepare_message = mchp_coreqspi_unprepare_message; ctlr->transfer_one = mchp_coreqspi_transfer_one; - ctlr->num_chipselect = 2; + ctlr->setup = mchp_coreqspi_setup; + ctlr->set_cs = mchp_coreqspi_set_cs; ctlr->use_gpio_descriptors = true; + val = readl_relaxed(qspi->regs + REG_CONTROL); + val |= (CONTROL_MASTER | CONTROL_ENABLE); + writel_relaxed(val, qspi->regs + REG_CONTROL); + + /* + * Put cs into software controlled mode + */ + val = readl_relaxed(qspi->regs + REG_DIRECT_ACCESS); + val |= DIRECT_ACCESS_EN_SSEL; + writel(val, qspi->regs + REG_DIRECT_ACCESS); + ret = spi_register_controller(ctlr); if (ret) return dev_err_probe(&pdev->dev, ret, -- cgit v1.2.3 From eb56deaabf127e8985fc91fa6c97bf8a3b062844 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 30 Apr 2026 11:10:19 +0100 Subject: spi: microchip-core-qspi: don't attempt to transmit during emulated read-only dual/quad operations The core will deal with reads by creating clock cycles itself, there's no need to generate clock cycles by transmitting garbage data at the driver level. Further, transmitting garbage data just bricks the transfer since QSPI doesn't have a dedicated master-out line like MOSI in regular SPI. I'm not entirely sure if the transfer is bricked because of the garbage data being transmitted on the bus or because the core loses track of whether it is supposed to be sending or receiving data. Fixes: 8f9cf02c88528 ("spi: microchip-core-qspi: Add regular transfers") CC: stable@vger.kernel.org Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260430-freezing-saloon-95b1f3d9dad0@spud Signed-off-by: Mark Brown --- drivers/spi/spi-microchip-core-qspi.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index ffa0f33a0ae0..70215a407b5a 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -690,18 +690,28 @@ static int mchp_coreqspi_transfer_one(struct spi_controller *ctlr, struct spi_de struct spi_transfer *t) { struct mchp_coreqspi *qspi = spi_controller_get_devdata(ctlr); + bool dual_quad = false; qspi->tx_len = t->len; + if (t->tx_nbits == SPI_NBITS_QUAD || t->rx_nbits == SPI_NBITS_QUAD || + t->tx_nbits == SPI_NBITS_DUAL || + t->rx_nbits == SPI_NBITS_DUAL) + dual_quad = true; + if (t->tx_buf) qspi->txbuf = (u8 *)t->tx_buf; if (!t->rx_buf) { mchp_coreqspi_write_op(qspi); - } else { + } else if (!dual_quad) { qspi->rxbuf = (u8 *)t->rx_buf; qspi->rx_len = t->len; mchp_coreqspi_write_read_op(qspi); + } else { + qspi->rxbuf = (u8 *)t->rx_buf; + qspi->rx_len = t->len; + mchp_coreqspi_read_op(qspi); } return 0; -- cgit v1.2.3 From 0b2eb1f8473eddeff5317e521498329581432f89 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 30 Apr 2026 11:10:20 +0100 Subject: spi: microchip-core-qspi: remove some inline markings Remove inline markings from a number of functions that are called as part of mem ops callbacks. None of them are either particularly trivial or sensitive to overhead of a function call. Just let the compiler decide what to do with them. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260430-serpent-stimulate-59fb860ef429@spud Signed-off-by: Mark Brown --- drivers/spi/spi-microchip-core-qspi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-microchip-core-qspi.c b/drivers/spi/spi-microchip-core-qspi.c index 70215a407b5a..4dee0fea1df8 100644 --- a/drivers/spi/spi-microchip-core-qspi.c +++ b/drivers/spi/spi-microchip-core-qspi.c @@ -197,7 +197,7 @@ static int mchp_coreqspi_setup(struct spi_device *spi) return 0; } -static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) +static void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -233,7 +233,7 @@ static inline void mchp_coreqspi_read_op(struct mchp_coreqspi *qspi) } } -static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi) +static void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -261,7 +261,7 @@ static inline void mchp_coreqspi_write_op(struct mchp_coreqspi *qspi) } } -static inline void mchp_coreqspi_write_read_op(struct mchp_coreqspi *qspi) +static void mchp_coreqspi_write_read_op(struct mchp_coreqspi *qspi) { u32 control, data; @@ -419,7 +419,7 @@ static int mchp_coreqspi_setup_clock(struct mchp_coreqspi *qspi, struct spi_devi return 0; } -static inline void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const struct spi_mem_op *op) +static void mchp_coreqspi_config_op(struct mchp_coreqspi *qspi, const struct spi_mem_op *op) { u32 idle_cycles = 0; int total_bytes, cmd_bytes, frames, ctrl; -- cgit v1.2.3 From d581fc99d3b958cb6e363104e9aab57f36aee6f3 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 23 Apr 2026 13:56:47 +0100 Subject: mm/memfd_luo: reject memfds whose page count exceeds UINT_MAX memfd_luo_preserve_folios() declares max_folios as unsigned int and computes it from the inode size, then passes it to memfd_pin_folios() which itself caps max_folios at unsigned int. For files whose base-page count exceeds UINT_MAX (larger than 16 TiB with 4 KiB pages), the assignment truncates silently: only a prefix of the file gets pinned and preserved, while memfd_luo_preserve() still records the full inode size in ser->size. On retrieve the inode is restored to the full size but only the preserved prefix repopulates the page cache, so the tail comes back as holes and user data is silently lost across the live update. Reject such files at preserve time with -EFBIG rather than chunk the pin loop, which would also require enlarging the preserved folios array well beyond what is practical. Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd") Signed-off-by: David Carlier Reviewed-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Link: https://patch.msgid.link/20260423125648.152113-1-devnexen@gmail.com Signed-off-by: Pasha Tatashin --- mm/memfd_luo.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 35d1247281e0..94ae113f68f6 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -259,7 +259,7 @@ static int memfd_luo_preserve(struct liveupdate_file_op_args *args) struct inode *inode = file_inode(args->file); struct memfd_luo_folio_ser *folios_ser; struct memfd_luo_ser *ser; - u64 nr_folios; + u64 nr_folios, inode_size; int err = 0, seals; inode_lock(inode); @@ -285,7 +285,18 @@ static int memfd_luo_preserve(struct liveupdate_file_op_args *args) } ser->pos = args->file->f_pos; - ser->size = i_size_read(inode); + inode_size = i_size_read(inode); + + /* + * memfd_pin_folios() caps at UINT_MAX folios; refuse larger + * files to avoid silently preserving only a prefix. + */ + if (DIV_ROUND_UP_ULL(inode_size, PAGE_SIZE) > UINT_MAX) { + err = -EFBIG; + goto err_free_ser; + } + + ser->size = inode_size; ser->seals = seals; err = memfd_luo_preserve_folios(args->file, &ser->folios, -- cgit v1.2.3 From 7b0b68b2b95606e65594958686833e53423f58f2 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 23 Apr 2026 13:56:48 +0100 Subject: mm/memfd_luo: document preservation of file seals Commit 8a552d68a86e ("mm: memfd_luo: preserve file seals") started preserving file seals across live update and restoring them via memfd_add_seals() on retrieve, but the DOC header was not updated and still listed seals under "Non-Preserved Properties" as being unsealed on restore. Move the Seals entry to the "Preserved Properties" section and describe the actual behavior, including the MEMFD_LUO_ALL_SEALS restriction that both preserve and retrieve enforce. Fixes: 8a552d68a86e ("mm: memfd_luo: preserve file seals") Signed-off-by: David Carlier Reviewed-by: Pratyush Yadav Reviewed-by: Pasha Tatashin Link: https://patch.msgid.link/20260423125648.152113-2-devnexen@gmail.com Signed-off-by: Pasha Tatashin --- mm/memfd_luo.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c index 94ae113f68f6..59de210bee5f 100644 --- a/mm/memfd_luo.c +++ b/mm/memfd_luo.c @@ -50,6 +50,11 @@ * memfds are always opened with ``O_RDWR`` and ``O_LARGEFILE``. This property * is maintained. * + * Seals + * File seals set on the memfd are preserved and re-applied on restore. + * Only seals known to this LUO version (see ``MEMFD_LUO_ALL_SEALS``) may + * be present; preservation fails with ``-EOPNOTSUPP`` otherwise. + * * Non-Preserved Properties * ======================== * @@ -61,10 +66,6 @@ * A memfd can be created with the ``MFD_CLOEXEC`` flag that sets the * ``FD_CLOEXEC`` on the file. This flag is not preserved and must be set * again after restore via ``fcntl()``. - * - * Seals - * File seals are not preserved. The file is unsealed on restore and if - * needed, must be sealed again via ``fcntl()``. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -- cgit v1.2.3 From 05c5078de822148e7cb84968a8783ddfcb6c9ef1 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Wed, 22 Apr 2026 18:32:58 +0200 Subject: wifi: ath12k: fix leak in some ath12k_wmi_xxx() functions Some wmi functions were using plain 'return ath12k_wmi_cmd_send(...)' without explicitly handling the error code. This leads to leaking the skb in case of error. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00218-QCAHKSWPL_SILICONZ-1 Fixes: 66a9448b1b89 ("wifi: ath12k: implement hardware data filter") Fixes: 593174170919 ("wifi: ath12k: implement WoW enable and wakeup commands") Fixes: 4a3c212eee0e ("wifi: ath12k: add basic WoW functionalities") Fixes: 16f474d6d49d ("wifi: ath12k: add WoW net-detect functionality") Fixes: 1666108c74c4 ("wifi: ath12k: support ARP and NS offload") Fixes: aab4ae566fa1 ("wifi: ath12k: support GTK rekey offload") Fixes: 7af01e569529 ("wifi: ath12k: handle keepalive during WoWLAN suspend and resume") Signed-off-by: Nicolas Escande Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260422163258.3013872-1-nico.escande@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 103 +++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 65a05a9520ff..75c87edd2a8a 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -10251,7 +10251,7 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a { struct wmi_hw_data_filter_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10275,7 +10275,13 @@ int ath12k_wmi_hw_data_filter_cmd(struct ath12k *ar, struct wmi_hw_data_filter_a "wmi hw data filter enable %d filter_bitmap 0x%x\n", arg->enable, arg->hw_filter_bitmap); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_HW_DATA_FILTER_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) @@ -10283,6 +10289,7 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) struct wmi_wow_host_wakeup_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10295,14 +10302,20 @@ int ath12k_wmi_wow_host_wakeup_ind(struct ath12k *ar) ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow host wakeup ind\n"); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_enable(struct ath12k *ar) { struct wmi_wow_enable_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10317,7 +10330,13 @@ int ath12k_wmi_wow_enable(struct ath12k *ar) cmd->pause_iface_config = cpu_to_le32(WOW_IFACE_PAUSE_ENABLED); ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow enable\n"); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, @@ -10327,6 +10346,7 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, struct wmi_wow_add_del_event_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10343,7 +10363,13 @@ int ath12k_wmi_wow_add_wakeup_event(struct ath12k *ar, u32 vdev_id, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow add wakeup event %s enable %d vdev_id %d\n", wow_wakeup_event(event), enable, vdev_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, @@ -10356,6 +10382,7 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, struct sk_buff *skb; void *ptr; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*tlv) + /* array struct */ @@ -10435,7 +10462,13 @@ int ath12k_wmi_wow_add_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id, ath12k_dbg_dump(ar->ab, ATH12K_DBG_WMI, NULL, "wow bitmask: ", bitmap->bitmaskbuf, pattern_len); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_ADD_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) @@ -10443,6 +10476,7 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) struct wmi_wow_del_pattern_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10459,7 +10493,13 @@ int ath12k_wmi_wow_del_pattern(struct ath12k *ar, u32 vdev_id, u32 pattern_id) ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi tlv wow del pattern vdev_id %d pattern_id %d\n", vdev_id, pattern_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_WOW_DEL_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static struct sk_buff * @@ -10595,6 +10635,7 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id, struct wmi_pno_scan_req_arg *pno_scan) { struct sk_buff *skb; + int ret; if (pno_scan->enable) skb = ath12k_wmi_op_gen_config_pno_start(ar, vdev_id, pno_scan); @@ -10604,7 +10645,13 @@ int ath12k_wmi_wow_config_pno(struct ath12k *ar, u32 vdev_id, if (IS_ERR_OR_NULL(skb)) return -ENOMEM; - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static void ath12k_wmi_fill_ns_offload(struct ath12k *ar, @@ -10717,6 +10764,7 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar, void *buf_ptr; size_t len; u8 ns_cnt, ns_ext_tuples = 0; + int ret; ns_cnt = offload->ipv6_count; @@ -10752,7 +10800,13 @@ int ath12k_wmi_arp_ns_offload(struct ath12k *ar, if (ns_ext_tuples) ath12k_wmi_fill_ns_offload(ar, offload, &buf_ptr, enable, 1); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_SET_ARP_NS_OFFLOAD_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, @@ -10762,7 +10816,7 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, struct wmi_gtk_rekey_offload_cmd *cmd; struct sk_buff *skb; __le64 replay_ctr; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10789,7 +10843,13 @@ int ath12k_wmi_gtk_rekey_offload(struct ath12k *ar, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "offload gtk rekey vdev: %d %d\n", arvif->vdev_id, enable); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID offload\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, @@ -10797,7 +10857,7 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, { struct wmi_gtk_rekey_offload_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath12k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -10811,7 +10871,13 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "get gtk rekey vdev_id: %d\n", arvif->vdev_id); - return ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath12k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID getinfo\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_sta_keepalive(struct ath12k *ar, @@ -10822,6 +10888,7 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, struct wmi_sta_keepalive_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*arp); skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); @@ -10849,7 +10916,13 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, "wmi sta keepalive vdev %d enabled %d method %d interval %d\n", arg->vdev_id, arg->enabled, arg->method, arg->interval); - return ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to send WMI_STA_KEEPALIVE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath12k_wmi_mlo_setup(struct ath12k *ar, struct wmi_mlo_setup_arg *mlo_params) -- cgit v1.2.3 From 81594a12d5cecb3ab35b603a00037c7c3ee87ab2 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 27 Apr 2026 16:00:11 +0530 Subject: wifi: ath12k: initialize RSSI dBm conversion event state Currently, the RSSI dBm conversion event handler leaves struct ath12k_wmi_rssi_dbm_conv_info_arg uninitialized on the stack before calling the TLV parser. If one of the optional sub-TLVs is absent, the corresponding *_present flag retains stack garbage and later gets read in ath12k_wmi_update_rssi_offsets(). With UBSAN enabled this triggers an invalid-load report for _Bool: UBSAN: invalid-load in drivers/net/wireless/ath/ath12k/wmi.c:9682:15 load of value 9 is not a valid value for type '_Bool' Call Trace: ath12k_wmi_rssi_dbm_conversion_params_info_event.cold+0x72/0x85 [ath12k] ath12k_wmi_op_rx+0x1871/0x2ab0 [ath12k] ath12k_htc_rx_completion_handler+0x44b/0x810 [ath12k] ath12k_ce_recv_process_cb+0x554/0x9f0 [ath12k] ath12k_ce_per_engine_service+0xbe/0xf0 [ath12k] ath12k_pci_ce_workqueue+0x69/0x120 [ath12k] Initialize the parsed event state to zero before passing it to the TLV parser so missing sub-TLVs correctly leave the presence flags false. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: 0314ee81a91d ("wifi: ath12k: handle WMI event for real noise floor calculation") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260427103011.2983269-1-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 75c87edd2a8a..b5e904a55aea 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -9778,7 +9778,7 @@ static void ath12k_wmi_rssi_dbm_conversion_params_info_event(struct ath12k_base *ab, struct sk_buff *skb) { - struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info; + struct ath12k_wmi_rssi_dbm_conv_info_arg rssi_info = {}; struct ath12k *ar; s32 noise_floor; u32 pdev_id; -- cgit v1.2.3 From 0e1308803d2c3fd365a6d21e6be355ec1e28eaaf Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 27 Apr 2026 13:51:41 +0800 Subject: wifi: ath12k: fix peer_id usage in normal RX path ath12k_dp_rx_deliver_msdu() currently uses hal_rx_desc_data::peer_id parsed from mpdu_start descriptor to do peer lookup. However In an A-MSDU aggregation scenario, hardware only populates mpdu_start descriptor for the first sub-msdu, but not the following ones. In that case peer_id could be invalid, leading to peer lookup failure: ath12k_wifi7_pci 0000:06:00.0: rx skb 00000000c391c041 len 1532 peer (null) 0 ucast sn 0 eht320 rate_idx 12 vht_nss 2 freq 6105 band 3 flag 0x40d1a fcs-err 0 mic-err 0 amsdu-more 0 As a result pubsta is NULL and parts of ieee80211_rx_status structure are left uninitialized, which may cause unexpected behavior. Fix it by switching the normal RX path to use ath12k_skb_rxcb::peer_id which is parsed from REO ring's rx_mpdu_desc and is always valid. hal_rx_desc_data::peer_id is still used in ath12k_wifi7_dp_rx_frag_h_mpdu(), which is safe since A-MSDU aggregation does not occur for fragmented frames. Similarly, ath12k_skb_rxcb::peer_id may be overwritten by hal_rx_desc_data::peer_id in ath12k_wifi7_dp_rx_h_mpdu(), which only handles non-aggregated multicast/broadcast traffic. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 11157e0910fd ("wifi: ath12k: Use ath12k_dp_peer in per packet Tx & Rx paths") Signed-off-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260427-ath12k-fix-peer-id-source-v1-1-b5f701fb8e88@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 25557dea5826..b108ccd0f637 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1340,7 +1340,7 @@ void ath12k_dp_rx_deliver_msdu(struct ath12k_pdev_dp *dp_pdev, struct napi_struc bool is_mcbc = rxcb->is_mcbc; bool is_eapol = rxcb->is_eapol; - peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rx_info->peer_id); + peer = ath12k_dp_peer_find_by_peerid(dp_pdev, rxcb->peer_id); pubsta = peer ? peer->sta : NULL; -- cgit v1.2.3 From d748603f12baff112caa3ab7d39f50100f010dbd Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 9 Dec 2025 11:04:59 +0100 Subject: wifi: ath5k: do not access array OOB Vincent reports: > The ath5k driver seems to do an array-index-out-of-bounds access as > shown by the UBSAN kernel message: > UBSAN: array-index-out-of-bounds in drivers/net/wireless/ath/ath5k/base.c:1741:20 > index 4 is out of range for type 'ieee80211_tx_rate [4]' > ... > Call Trace: > > dump_stack_lvl+0x5d/0x80 > ubsan_epilogue+0x5/0x2b > __ubsan_handle_out_of_bounds.cold+0x46/0x4b > ath5k_tasklet_tx+0x4e0/0x560 [ath5k] > tasklet_action_common+0xb5/0x1c0 It is real. 'ts->ts_final_idx' can be 3 on 5212, so: info->status.rates[ts->ts_final_idx + 1].idx = -1; with the array defined as: struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; while the size is: #define IEEE80211_TX_MAX_RATES 4 is indeed bogus. Set this 'idx = -1' sentinel only if the array index is less than the array size. As mac80211 will not look at rates beyond the size (IEEE80211_TX_MAX_RATES). Note: The effect of the OOB write is negligible. It just overwrites the next member of info->status, i.e. ack_signal. Signed-off-by: Jiri Slaby (SUSE) Reported-by: Vincent Danjean Link: https://lore.kernel.org/all/aQYUkIaT87ccDCin@eldamar.lan Closes: https://bugs.debian.org/1119093 Fixes: 6d7b97b23e11 ("ath5k: fix tx status reporting issues") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251209100459.2253198-1-jirislaby@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath5k/base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 05c9c07591fc..6ca31d4ea437 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -1738,7 +1738,8 @@ ath5k_tx_frame_completed(struct ath5k_hw *ah, struct sk_buff *skb, } info->status.rates[ts->ts_final_idx].count = ts->ts_final_retry; - info->status.rates[ts->ts_final_idx + 1].idx = -1; + if (ts->ts_final_idx + 1 < IEEE80211_TX_MAX_RATES) + info->status.rates[ts->ts_final_idx + 1].idx = -1; if (unlikely(ts->ts_status)) { ah->stats.ack_fail++; -- cgit v1.2.3 From 2a46a9356ba7b1bdd741c8b41e5374edcd960557 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (TI)" Date: Tue, 28 Apr 2026 11:04:56 +0200 Subject: drm/bridge: tda998x: Use __be32 for audio port OF property pointer of_get_property() returns a pointer to big-endian (__be32) data, but port_data in tda998x_get_audio_ports() was declared as const u32 *, causing a sparse endianness type mismatch warning. Fix the declaration to use const __be32 *. Fixes: 7e567624dc5a4 ("drm/i2c: tda998x: Register ASoC hdmi-codec and add audio DT binding") Cc: stable@vger.kernel.org Signed-off-by: Kory Maincent (TI) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260428090457.121894-1-kory.maincent@bootlin.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/tda998x_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index d9b388165de1..779b976f601c 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1762,7 +1762,7 @@ static const struct drm_bridge_funcs tda998x_bridge_funcs = { static int tda998x_get_audio_ports(struct tda998x_priv *priv, struct device_node *np) { - const u32 *port_data; + const __be32 *port_data; u32 size; int i; -- cgit v1.2.3 From b5d0ad616ca8dd8c7b6b24dc13012e342278a085 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (TI)" Date: Fri, 17 Apr 2026 17:54:45 +0200 Subject: drm/bridge: tda998x: Return NULL instead of 0 in tda998x_edid_read() tda998x_edid_read() returns a const struct drm_edid pointer, but when tda998x_edid_delay_wait() fails (process killed while waiting for the HPD timeout), the integer literal 0 is returned instead of NULL, triggering a sparse warning: "Using plain integer as NULL pointer" Replace 0 with NULL to fix the sparse warning. Fixes: c76a8be4feec ("drm/bridge: tda998x: Add support for DRM_BRIDGE_ATTACH_NO_CONNECTOR") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604172257.Imo6GOH9-lkp@intel.com/ Signed-off-by: Kory Maincent (TI) Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260417155446.1068893-1-kory.maincent@bootlin.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/tda998x_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index 779b976f601c..6c427bc75896 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1293,7 +1293,7 @@ static const struct drm_edid *tda998x_edid_read(struct tda998x_priv *priv, * can't handle signals gracefully. */ if (tda998x_edid_delay_wait(priv)) - return 0; + return NULL; if (priv->rev == TDA19988) reg_clear(priv, REG_TX4, TX4_PD_RAM); -- cgit v1.2.3 From f80785888f7c0980a49545b87a80e3817c9ed7c6 Mon Sep 17 00:00:00 2001 From: Anton Swart Date: Sun, 3 May 2026 23:15:17 +0200 Subject: ALSA: usb-audio: Add quirk flags for AlphaTheta EUPHONIA The AlphaTheta EUPHONIA (VID 0x2b73, PID 0x0047) is a USB Audio Class 2 DJ mixer that requires implicit feedback for full-duplex operation. The capture endpoint (0x83 IN, interface 2) acts as the implicit feedback source for the playback endpoint (0x03 OUT, interface 1), and the device firmware does not send isochronous data on the capture endpoint unless the host is simultaneously sending data on the playback endpoint, i.e. playback must be started first. Without QUIRK_FLAG_PLAYBACK_FIRST the kernel waits for capture URBs before submitting playback URBs, creating a deadlock: the device waits for playback data and the host waits for capture data. Without QUIRK_FLAG_GENERIC_IMPLICIT_FB the kernel does not detect the implicit feedback relationship between the two interfaces. The same flag combination is already used for the Behringer UMC202HD, UMC204HD and UMC404HD (0x1397:0x0507/0x0508/0x0509), which exhibit the identical implicit-feedback topology. Tested on Raspberry Pi 5 with kernel 6.12.75; continuous full-duplex streaming at 96 kHz / 24-bit, zero XRUNs. Signed-off-by: Anton Swart Link: https://patch.msgid.link/20260503211517.14332-1-anton.swart.jhb@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index f77ff05dff0b..b17bdae09bc3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2460,6 +2460,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b73, 0x0047, /* AlphaTheta EUPHONIA */ + QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2d95, 0x8011, /* VIVO USB-C HEADSET */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ -- cgit v1.2.3 From 0749daa8eb5ab90334aaad3b0671efd7150d43b1 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Sun, 3 May 2026 21:55:52 -0300 Subject: ALSA: firewire-tascam: Do not drop unread control events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tscm_hwdep_read_queue() copies as many queued control events as fit in the userspace buffer. When the buffer is smaller than the current contiguous queue segment, length is rounded down to the number of bytes that can be copied. However, after copying that shortened length, the code advances pull_pos to the original tail_pos, marking the whole contiguous segment as consumed. Any events between the copied portion and tail_pos are lost. Limit tail_pos to the position after the entries actually copied before updating pull_pos. When the whole segment fits, this is equivalent to the old tail_pos update; when the buffer is smaller, the remaining events stay queued for the next read. Fixes: a8c0d13267a4 ("ALSA: firewire-tascam: notify events of change of state for userspace applications") Cc: stable@vger.kernel.org Suggested-by: Takashi Sakamoto Signed-off-by: Cássio Gabriel Reviewed-by: Takashi Sakamoto Co-developed-by: Takashi Sakamoto Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260503-alsa-firewire-tascam-read-queue-v2-1-126c6efd7642@gmail.com --- sound/firewire/tascam/tascam-hwdep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c index 867b4ea1096e..6270263e7bf4 100644 --- a/sound/firewire/tascam/tascam-hwdep.c +++ b/sound/firewire/tascam/tascam-hwdep.c @@ -73,6 +73,7 @@ static long tscm_hwdep_read_queue(struct snd_tscm *tscm, char __user *buf, length = rounddown(remained, sizeof(*entries)); if (length == 0) break; + tail_pos = head_pos + length / sizeof(*entries); spin_unlock_irq(&tscm->lock); if (copy_to_user(pos, &entries[head_pos], length)) -- cgit v1.2.3 From db4cd7d6c571c5ec6b52a6e80ec59ad99c4aa1d6 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Mon, 4 May 2026 19:38:05 +0800 Subject: ALSA: usb-audio: Add quirk flags for JBL Pebbles JBL Pebbles is a pair of desktop speakers with UAC interface. Its Playback and Capture mixers use linear volume with val = 0/999/1 and 0/3996/4. Meanwhile, the reported sample rates are truncated to multiples of 0x100 (i.e., 44100 => 44032), resulting in noisy kmsg, as a warning message is printed each time a stream is opened. Add a quirk table entry matching VID/PID=0x05fc/0x0231 and applying linear volume and sample rate quirk flags, so that it can work properly. Also note that the volume control knob on device is an incremental encoder. It does nothing but sends KEY_VOLUMEUP and KEY_VOLUMEDOWN per rotation, controlling the UAC Playback volume mixer indirectly. Hence, the linear volume quirk flags also enable the volume control knob to function properly. Quirky device sample: usb 5-1.1: new full-speed USB device number 12 using xhci_hcd usb 5-1.1: New USB device found, idVendor=05fc, idProduct=0231, bcdDevice= 1.00 usb 5-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 5-1.1: Product: JBL Pebbles usb 5-1.1: Manufacturer: Harman International Industries usb 5-1.1: SerialNumber: 1.0.0 usb-storage 5-1.1:1.0: USB Mass Storage device detected scsi host0: usb-storage 5-1.1:1.0 usb 5-1.1: Found last interface = 1 usb 5-1.1: 2:1: add audio endpoint 0x5 usb 5-1.1: Creating new data endpoint #5 usb 5-1.1: 2:1 Set sample rate 44100, clock 0 usb 5-1.1: current rate 44032 is different from the runtime rate 44100 usb 5-1.1: 3:1: add audio endpoint 0x84 usb 5-1.1: Creating new data endpoint #84 usb 5-1.1: 3:1 Set sample rate 44100, clock 0 usb 5-1.1: current rate 44032 is different from the runtime rate 44100 usb 5-1.1: [2] FU [PCM Playback Switch] ch = 1, val = 0/1/1 usb 5-1.1: Warning! Unlikely big volume step count (=999), linear volume or wrong cval->res? usb 5-1.1: [2] FU [PCM Playback Volume] ch = 2, val = 0/999/1 usb 5-1.1: [5] FU [Mic Capture Switch] ch = 1, val = 0/1/1 usb 5-1.1: Warning! Unlikely big volume step count (=999), linear volume or wrong cval->res? usb 5-1.1: [5] FU [Mic Capture Volume] ch = 2, val = 0/3996/4 input: Harman International Industries JBL Pebbles as /devices/pci0000:00/0000:00:08.3/0000:67:00.3/usb5/5-1/5-1.1/5-1.1:1.4/0003:05FC:0231.0018/input/input55 hid-generic 0003:05FC:0231.0018: input,hidraw2: USB HID v2.01 Device [Harman International Industries JBL Pebbles] on usb-0000:67:00.3-1.1/input4 Signed-off-by: Rong Zhang Link: https://patch.msgid.link/20260504-uac-jbl-pebbles-v1-1-c888d592a286@rong.moe Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b17bdae09bc3..17983d9774f8 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2277,6 +2277,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x05e1, 0x0480, /* Hauppauge Woodbury */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x05fc, 0x0231, /* JBL Pebbles */ + QUIRK_FLAG_MIXER_PLAYBACK_LINEAR_VOL | QUIRK_FLAG_MIXER_CAPTURE_LINEAR_VOL | + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0624, 0x3d3f, /* AB13X USB Audio */ QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x0644, 0x8043, /* TEAC UD-501/UD-501V2/UD-503/NT-503 */ -- cgit v1.2.3 From dc1e0172be54e742bccb28d5f14c0c395e28c098 Mon Sep 17 00:00:00 2001 From: Fernando Antunez Antonio Date: Mon, 4 May 2026 07:33:26 -0600 Subject: ALSA: hda/realtek: Fix mute and mic-mute LEDs for HP Envy X360 15-fh0xxx This enables the mute and mic-mute LEDs on the HP Envy X360 15-fh0xxx 2-in-1 laptops. The quirk 'ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX' has been created and is now enabled for this device. This is my first patch, and I'm still getting to grips with the code, so there's probably a better way to implement this fix. I apologize for any inconvenience caused by the constant release of new versions of this patch. Signed-off-by: Fernando Antunez Antonio Link: https://patch.msgid.link/20260504-hpenvy-muteled-fix-v3-1-5567fd9b3d25@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e061673d3c2e..a7525ed83e2b 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -4137,6 +4137,7 @@ enum { ALC245_FIXUP_CS35L41_I2C_2_MUTE_LED, ALC236_FIXUP_HP_DMIC, ALC256_FIXUP_HONOR_MRB_XXX_M1020_AUDIO, + ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6693,6 +6694,12 @@ static const struct hda_fixup alc269_fixups[] = { { 0x1b, 0x90170110 }, { } } + }, + [ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_i2c_two, + .chained = true, + .chain_id = ALC245_FIXUP_HP_X360_MUTE_LEDS } }; @@ -7115,7 +7122,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8be6, "HP Envy 16", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8be7, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8be8, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8be9, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8be9, "HP Envy x360 2-in-1 Laptop 15-fh0xxx", ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX), SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c15, "HP Spectre x360 2-in-1 Laptop 14-eu0xxx", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8c16, "HP Spectre x360 2-in-1 Laptop 16-aa0xxx", ALC245_FIXUP_HP_SPECTRE_X360_16_AA0XXX), -- cgit v1.2.3 From f3c57c9c2a49a21d784b7c04a2c883bffc070659 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Mon, 4 May 2026 11:08:45 -0300 Subject: ALSA: usb-audio: midi2: Restart output URBs on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USB MIDI 2.0 suspend saves the endpoint running state, clears it and kills all endpoint URBs. Resume restores the running state, but only restarts input endpoints. For a running output endpoint, this leaves the endpoint marked running with an empty URB queue. Output transfer progress depends on either the rawmidi trigger path starting the queue or an output completion refilling it. After suspend there is no completion left, and output data that remains queued in the raw UMP or legacy rawmidi buffer can stay stalled until userspace happens to trigger the stream again. Restore the saved state with atomic accessors, keep input endpoints restarted as before, and restart output endpoints that were running before suspend. Clear the saved suspend state after restoring it. Fixes: ff49d1df79ae ("ALSA: usb-audio: USB MIDI 2.0 UMP support") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260504-usb-midi2-output-resume-v1-1-c089cc8ad3c6@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/midi2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c index 3546ba926cb3..2785600d2312 100644 --- a/sound/usb/midi2.c +++ b/sound/usb/midi2.c @@ -227,7 +227,7 @@ static void kill_midi_urbs(struct snd_usb_midi2_endpoint *ep, bool suspending) if (!ep) return; if (suspending) - ep->suspended = ep->running; + atomic_set(&ep->suspended, atomic_read(&ep->running)); atomic_set(&ep->running, 0); for (i = 0; i < ep->num_urbs; i++) { if (!ep->urbs[i].urb) @@ -1188,10 +1188,11 @@ void snd_usb_midi_v2_suspend_all(struct snd_usb_audio *chip) static void resume_midi2_endpoint(struct snd_usb_midi2_endpoint *ep) { - ep->running = ep->suspended; - if (ep->direction == STR_IN) + atomic_set(&ep->running, atomic_read(&ep->suspended)); + atomic_set(&ep->suspended, 0); + + if (ep->direction == STR_IN || atomic_read(&ep->running)) submit_io_urbs(ep); - /* FIXME: does it all? */ } void snd_usb_midi_v2_resume_all(struct snd_usb_audio *chip) -- cgit v1.2.3 From 320e55722ca466a7d40dd69e1aea982cb6189006 Mon Sep 17 00:00:00 2001 From: Nicola Lunghi Date: Mon, 4 May 2026 16:45:20 +0200 Subject: ALSA: usb-audio: add clock quirk for Motu 1248 The Motu 1248 (and probably other older Motu AVB interfaces) take more than 2 seconds to switch clock. During the clock switching process the device return that the clock is not valid. This is similar to what already implemented for the Microbook II interface. Add the Motu 1248 usb id to the existing Motu quirk. Signed-off-by: Nicola Lunghi Link: https://patch.msgid.link/20260504144520.699522-2-nick83ola@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 842ba5b801ea..2e0c18e35281 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -208,11 +208,18 @@ static bool uac_clock_source_is_valid_quirk(struct snd_usb_audio *chip, } /* - * MOTU MicroBook IIc - * Sample rate changes takes more than 2 seconds for this device. Clock - * validity request returns false during that period. + * Quirk for older MOTU AVB / hybrid interfaces + * + * These devices take more than 2 seconds to switch sample rate or + * clock source. During this period the clock validity request + * returns false, causing ALSA to fail prematurely. + * + * Affected models (all use vendor 0x07fd): + * - MicroBook IIc → 0x0004 + * - 1248, 624, 8A, UltraLite AVB, 8M, 16A, ... → 0x0005 */ - if (chip->usb_id == USB_ID(0x07fd, 0x0004)) { + if (chip->usb_id == USB_ID(0x07fd, 0x0004) || /* MicroBook IIc */ + chip->usb_id == USB_ID(0x07fd, 0x0005)) { /* 1248 / 624 / 8A / UltraLite AVB / ... */ count = 0; while ((!ret) && (count < 50)) { -- cgit v1.2.3 From 17e4c68ff35090d8cb743e3c82c09f92fda1ebda Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 25 Apr 2026 11:41:53 +0800 Subject: kunit: config: Enable KUNIT_DEBUGFS by default The KUNIT_DEBUGFS option is currently enabled based on the value of KUNIT_ALL_TESTS, but it really doesn't have anything to do with the set of enabled tests, so just enable it by default anyway. In particular, this shouldn't be only visible if KUNIT_ALL_TESTS is set, which is quite confusing. Link: https://lore.kernel.org/r/20260425034155.53913-1-david@davidgow.net Fixes: beaed42c427d ("kunit: default KUNIT_* fragments to KUNIT_ALL_TESTS") Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 498cc51e493d..f80ca3aeedb0 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -16,8 +16,8 @@ menuconfig KUNIT if KUNIT config KUNIT_DEBUGFS - bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" if !KUNIT_ALL_TESTS - default KUNIT_ALL_TESTS + bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" + default y help Enable debugfs representation for kunit. Currently this consists of /sys/kernel/debug/kunit//results files for each -- cgit v1.2.3 From 8f80b5b227ef9ea422080487715c841856339aed Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 25 Apr 2026 11:41:54 +0800 Subject: kunit: config: KUNIT_DEBUGFS should depend on DEBUG_FS CONFIG_KUNIT_DEBUGFS is totally useless without debugfs, so it should depend on CONFIG_DEBUG_FS. Link: https://lore.kernel.org/r/20260425034155.53913-2-david@davidgow.net Fixes: e2219db280e3 ("kunit: add debugfs /sys/kernel/debug/kunit//results display") Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index f80ca3aeedb0..94ff8e4089bf 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -17,6 +17,7 @@ if KUNIT config KUNIT_DEBUGFS bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" + depends on DEBUG_FS default y help Enable debugfs representation for kunit. Currently this consists -- cgit v1.2.3 From 93618edf753838a727dbff63c7c291dee22d656b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 1 May 2026 08:31:22 -1000 Subject: cgroup: Defer css percpu_ref kill on rmdir until cgroup is depopulated A chain of commits going back to v7.0 reworked rmdir to satisfy the controller invariant that a subsystem's ->css_offline() must not run while tasks are still doing kernel-side work in the cgroup. [1] d245698d727a ("cgroup: Defer task cgroup unlink until after the task is done switching out") [2] a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") [3] 1b164b876c36 ("cgroup: Wait for dying tasks to leave on rmdir") [4] 4c56a8ac6869 ("cgroup: Fix cgroup_drain_dying() testing the wrong condition") [5] 13e786b64bd3 ("cgroup: Increment nr_dying_subsys_* from rmdir context") [1] moved task cset unlink from do_exit() to finish_task_switch() so a task's cset link drops only after the task has fully stopped scheduling. That made tasks past exit_signals() linger on cset->tasks until their final context switch, which led to a series of problems as what userspace expected to see after rmdir diverged from what the kernel needs to wait for. [2]-[5] tried to bridge that divergence: [2] filtered the exiting tasks from cgroup.procs; [3] had rmdir(2) sleep in TASK_UNINTERRUPTIBLE for them; [4] fixed the wait's condition; [5] made nr_dying_subsys_* visible synchronously. The cgroup_drain_dying() wait in [3] turned out to be a dead end. When the rmdir caller is also the reaper of a zombie that pins a pidns teardown (e.g. host PID 1 systemd reaping orphan pids that were re-parented to it during the same teardown), rmdir blocks in TASK_UNINTERRUPTIBLE waiting for those pids to free, the pids can't free because PID 1 is the reaper and it's stuck in rmdir, and the system A-A deadlocks. No internal lock ordering breaks this; the wait itself is the bug. The css killing side that drove the original reorder, however, can be made cleanly asynchronous: ->css_offline() is already async, run from css_killed_work_fn() driven by percpu_ref_kill_and_confirm(). The fix is to make that chain start only after all tasks have left the cgroup. rmdir's user-visible side then returns as soon as cgroup.procs and friends are empty, while ->css_offline() still runs only after the cgroup is fully drained. Verified by the original reproducer (pidns teardown + zombie reaper, runs under vng) which hangs vanilla and succeeds here, and by per-commit deterministic repros for [2], [3], [4], [5] with a boot parameter that widens the post-exit_signals() window so each state is reliably reachable. Some stress tests on top of that. cgroup_apply_control_disable() has the same shape of pre-existing race: when a controller is disabled via subtree_control, kill_css() ran synchronously while tasks past exit_signals() could still be linked to the cgroup's csets, and ->css_offline() could fire before they drained. This patch preserves the existing synchronous behavior at that call site (kill_css_sync() + kill_css_finish() back-to-back) and a follow-up patch will defer kill_css_finish() there using a per-css trigger. This seems like the right approach and I don't see problems with it. The changes are somewhat invasive but not excessively so, so backporting to -stable should be okay. If something does turn out to be wrong, the fallback is to revert the entire chain ([1]-[5]) and rework in the development branch instead. v2: Pin cgrp across the deferred destroy work with explicit cgroup_get()/cgroup_put() around queue_work() and the work_fn. v1 wasn't actually broken (ordered cgroup_offline_wq + queue_work order in cgroup_task_dead() saved it) but the explicit ref removes the dependency on those non-obvious invariants. Also note the pre-existing cgroup_apply_control_disable() race in the description; a follow-up will defer kill_css_finish() there. Fixes: 1b164b876c36 ("cgroup: Wait for dying tasks to leave on rmdir") Cc: stable@vger.kernel.org # v7.0+ Reported-and-tested-by: Martin Pitt Link: https://lore.kernel.org/all/afHNg2VX2jy9bW7y@piware.de/ Link: https://lore.kernel.org/all/35e0670adb4abeab13da2c321582af9f@kernel.org/ Signed-off-by: Tejun Heo Acked-by: Sebastian Andrzej Siewior --- include/linux/cgroup-defs.h | 4 +- kernel/cgroup/cgroup.c | 250 +++++++++++++++++++++----------------------- 2 files changed, 119 insertions(+), 135 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f42563739d2e..50a784da7a81 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -611,8 +611,8 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; - /* used by cgroup_rmdir() to wait for dying tasks to leave */ - wait_queue_head_t dying_populated_waitq; + /* defers killing csses after removal until cgroup is depopulated */ + struct work_struct finish_destroy_work; /* used to schedule release agent */ struct work_struct release_agent_work; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c928dea9dea6..bd10a7e2f9c5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -264,10 +264,12 @@ static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_skip(struct css_task_iter *it, struct task_struct *task); static int cgroup_destroy_locked(struct cgroup *cgrp); +static void cgroup_finish_destroy(struct cgroup *cgrp); +static void kill_css_sync(struct cgroup_subsys_state *css); +static void kill_css_finish(struct cgroup_subsys_state *css); static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, struct cgroup_subsys *ss); static void css_release(struct percpu_ref *ref); -static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype cfts[], bool is_add); @@ -797,6 +799,16 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) if (was_populated == cgroup_is_populated(cgrp)) break; + /* + * Subtree just emptied below an offlined cgrp. Fire deferred + * destroy. The transition is one-shot. + */ + if (was_populated && !css_is_online(&cgrp->self)) { + cgroup_get(cgrp); + WARN_ON_ONCE(!queue_work(cgroup_offline_wq, + &cgrp->finish_destroy_work)); + } + cgroup1_check_for_release(cgrp); TRACE_CGROUP_PATH(notify_populated, cgrp, cgroup_is_populated(cgrp)); @@ -2039,6 +2051,16 @@ static int cgroup_reconfigure(struct fs_context *fc) return 0; } +static void cgroup_finish_destroy_work_fn(struct work_struct *work) +{ + struct cgroup *cgrp = container_of(work, struct cgroup, finish_destroy_work); + + cgroup_lock(); + cgroup_finish_destroy(cgrp); + cgroup_unlock(); + cgroup_put(cgrp); +} + static void init_cgroup_housekeeping(struct cgroup *cgrp) { struct cgroup_subsys *ss; @@ -2065,7 +2087,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) #endif init_waitqueue_head(&cgrp->offline_waitq); - init_waitqueue_head(&cgrp->dying_populated_waitq); + INIT_WORK(&cgrp->finish_destroy_work, cgroup_finish_destroy_work_fn); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } @@ -3375,7 +3397,8 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { - kill_css(css); + kill_css_sync(css); + kill_css_finish(css); } else if (!css_visible(css)) { css_clear_dir(css); if (ss->css_reset) @@ -5514,7 +5537,7 @@ static struct cftype cgroup_psi_files[] = { * css destruction is four-stage process. * * 1. Destruction starts. Killing of the percpu_ref is initiated. - * Implemented in kill_css(). + * Implemented in kill_css_finish(). * * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs * and thus css_tryget_online() is guaranteed to fail, the css can be @@ -5993,7 +6016,7 @@ out_unlock: /* * This is called when the refcnt of a css is confirmed to be killed. * css_tryget_online() is now guaranteed to fail. Tell the subsystem to - * initiate destruction and put the css ref from kill_css(). + * initiate destruction and put the css ref from kill_css_finish(). */ static void css_killed_work_fn(struct work_struct *work) { @@ -6025,15 +6048,12 @@ static void css_killed_ref_fn(struct percpu_ref *ref) } /** - * kill_css - destroy a css - * @css: css to destroy + * kill_css_sync - synchronous half of css teardown + * @css: css being killed * - * This function initiates destruction of @css by removing cgroup interface - * files and putting its base reference. ->css_offline() will be invoked - * asynchronously once css_tryget_online() is guaranteed to fail and when - * the reference count reaches zero, @css will be released. + * See cgroup_destroy_locked(). */ -static void kill_css(struct cgroup_subsys_state *css) +static void kill_css_sync(struct cgroup_subsys_state *css) { struct cgroup_subsys *ss = css->ss; @@ -6056,24 +6076,6 @@ static void kill_css(struct cgroup_subsys_state *css) */ css_clear_dir(css); - /* - * Killing would put the base ref, but we need to keep it alive - * until after ->css_offline(). - */ - css_get(css); - - /* - * cgroup core guarantees that, by the time ->css_offline() is - * invoked, no new css reference will be given out via - * css_tryget_online(). We can't simply call percpu_ref_kill() and - * proceed to offlining css's because percpu_ref_kill() doesn't - * guarantee that the ref is seen as killed on all CPUs on return. - * - * Use percpu_ref_kill_and_confirm() to get notifications as each - * css is confirmed to be seen as killed on all CPUs. - */ - percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); - css->cgroup->nr_dying_subsys[ss->id]++; /* * Parent css and cgroup cannot be freed until after the freeing @@ -6086,44 +6088,88 @@ static void kill_css(struct cgroup_subsys_state *css) } /** - * cgroup_destroy_locked - the first stage of cgroup destruction + * kill_css_finish - deferred half of css teardown + * @css: css being killed + * + * See cgroup_destroy_locked(). + */ +static void kill_css_finish(struct cgroup_subsys_state *css) +{ + lockdep_assert_held(&cgroup_mutex); + + /* + * Skip on re-entry: cgroup_apply_control_disable() may have killed @css + * earlier. cgroup_destroy_locked() can still walk it because + * offline_css() (which NULLs cgrp->subsys[ssid]) runs async. + */ + if (percpu_ref_is_dying(&css->refcnt)) + return; + + /* + * Killing would put the base ref, but we need to keep it alive until + * after ->css_offline(). + */ + css_get(css); + + /* + * cgroup core guarantees that, by the time ->css_offline() is invoked, + * no new css reference will be given out via css_tryget_online(). We + * can't simply call percpu_ref_kill() and proceed to offlining css's + * because percpu_ref_kill() doesn't guarantee that the ref is seen as + * killed on all CPUs on return. + * + * Use percpu_ref_kill_and_confirm() to get notifications as each css is + * confirmed to be seen as killed on all CPUs. + */ + percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); +} + +/** + * cgroup_destroy_locked - destroy @cgrp (called on rmdir) * @cgrp: cgroup to be destroyed * - * css's make use of percpu refcnts whose killing latency shouldn't be - * exposed to userland and are RCU protected. Also, cgroup core needs to - * guarantee that css_tryget_online() won't succeed by the time - * ->css_offline() is invoked. To satisfy all the requirements, - * destruction is implemented in the following two steps. - * - * s1. Verify @cgrp can be destroyed and mark it dying. Remove all - * userland visible parts and start killing the percpu refcnts of - * css's. Set up so that the next stage will be kicked off once all - * the percpu refcnts are confirmed to be killed. - * - * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the - * rest of destruction. Once all cgroup references are gone, the - * cgroup is RCU-freed. - * - * This function implements s1. After this step, @cgrp is gone as far as - * the userland is concerned and a new cgroup with the same name may be - * created. As cgroup doesn't care about the names internally, this - * doesn't cause any problem. + * Tear down @cgrp on behalf of rmdir. Constraints: + * + * - Userspace: rmdir must succeed when cgroup.procs and friends are empty. + * + * - Kernel: subsystem ->css_offline() must not run while any task in @cgrp's + * subtree is still doing kernel work. A task hidden from cgroup.procs (past + * exit_signals() with signal->live cleared) can still schedule, allocate, and + * consume resources until its final context switch. Dying descendants in the + * subtree can host such tasks too. + * + * - Kernel: css_tryget_online() must fail by the time ->css_offline() runs. + * + * The destruction runs in three parts: + * + * - This function: synchronous user-visible state teardown plus kill_css_sync() + * on each subsystem css. + * + * - cgroup_finish_destroy(): kicks the percpu_ref kill via kill_css_finish() on + * each subsystem css. Fires once @cgrp's subtree is fully drained, either + * inline here or from cgroup_update_populated(). + * + * - The percpu_ref kill chain: css_killed_ref_fn -> css_killed_work_fn -> + * ->css_offline() -> release/free. + * + * Return 0 on success, -EBUSY if a userspace-visible task or an online child + * remains. */ static int cgroup_destroy_locked(struct cgroup *cgrp) - __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct cgroup *tcgrp, *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *css; struct cgrp_cset_link *link; + struct css_task_iter it; + struct task_struct *task; int ssid, ret; lockdep_assert_held(&cgroup_mutex); - /* - * Only migration can raise populated from zero and we're already - * holding cgroup_mutex. - */ - if (cgroup_is_populated(cgrp)) + css_task_iter_start(&cgrp->self, 0, &it); + task = css_task_iter_next(&it); + css_task_iter_end(&it); + if (task) return -EBUSY; /* @@ -6147,9 +6193,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) link->cset->dead = true; spin_unlock_irq(&css_set_lock); - /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) - kill_css(css); + kill_css_sync(css); /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */ css_clear_dir(&cgrp->self); @@ -6180,79 +6225,27 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); + if (!cgroup_is_populated(cgrp)) + cgroup_finish_destroy(cgrp); + return 0; }; /** - * cgroup_drain_dying - wait for dying tasks to leave before rmdir - * @cgrp: the cgroup being removed - * - * cgroup.procs and cgroup.threads use css_task_iter which filters out - * PF_EXITING tasks so that userspace doesn't see tasks that have already been - * reaped via waitpid(). However, cgroup_has_tasks() - which tests whether the - * cgroup has non-empty css_sets - is only updated when dying tasks pass through - * cgroup_task_dead() in finish_task_switch(). This creates a window where - * cgroup.procs reads empty but cgroup_has_tasks() is still true, making rmdir - * fail with -EBUSY from cgroup_destroy_locked() even though userspace sees no - * tasks. - * - * This function aligns cgroup_has_tasks() with what userspace can observe. If - * cgroup_has_tasks() but the task iterator sees nothing (all remaining tasks are - * PF_EXITING), we wait for cgroup_task_dead() to finish processing them. As the - * window between PF_EXITING and cgroup_task_dead() is short, the wait is brief. + * cgroup_finish_destroy - deferred half of @cgrp destruction + * @cgrp: cgroup whose subtree just became empty * - * This function only concerns itself with this cgroup's own dying tasks. - * Whether the cgroup has children is cgroup_destroy_locked()'s problem. - * - * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we - * retry the full check from scratch. - * - * Must be called with cgroup_mutex held. + * See cgroup_destroy_locked() for the rationale. */ -static int cgroup_drain_dying(struct cgroup *cgrp) - __releases(&cgroup_mutex) __acquires(&cgroup_mutex) +static void cgroup_finish_destroy(struct cgroup *cgrp) { - struct css_task_iter it; - struct task_struct *task; - DEFINE_WAIT(wait); + struct cgroup_subsys_state *css; + int ssid; lockdep_assert_held(&cgroup_mutex); -retry: - if (!cgroup_has_tasks(cgrp)) - return 0; - - /* Same iterator as cgroup.threads - if any task is visible, it's busy */ - css_task_iter_start(&cgrp->self, 0, &it); - task = css_task_iter_next(&it); - css_task_iter_end(&it); - - if (task) - return -EBUSY; - /* - * All remaining tasks are PF_EXITING and will pass through - * cgroup_task_dead() shortly. Wait for a kick and retry. - * - * cgroup_has_tasks() can't transition from false to true while we're - * holding cgroup_mutex, but the true to false transition happens - * under css_set_lock (via cgroup_task_dead()). We must retest and - * prepare_to_wait() under css_set_lock. Otherwise, the transition - * can happen between our first test and prepare_to_wait(), and we - * sleep with no one to wake us. - */ - spin_lock_irq(&css_set_lock); - if (!cgroup_has_tasks(cgrp)) { - spin_unlock_irq(&css_set_lock); - return 0; - } - prepare_to_wait(&cgrp->dying_populated_waitq, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); - schedule(); - finish_wait(&cgrp->dying_populated_waitq, &wait); - mutex_lock(&cgroup_mutex); - goto retry; + for_each_css(css, ssid, cgrp) + kill_css_finish(css); } int cgroup_rmdir(struct kernfs_node *kn) @@ -6264,12 +6257,9 @@ int cgroup_rmdir(struct kernfs_node *kn) if (!cgrp) return 0; - ret = cgroup_drain_dying(cgrp); - if (!ret) { - ret = cgroup_destroy_locked(cgrp); - if (!ret) - TRACE_CGROUP_PATH(rmdir, cgrp); - } + ret = cgroup_destroy_locked(cgrp); + if (!ret) + TRACE_CGROUP_PATH(rmdir, cgrp); cgroup_kn_unlock(kn); return ret; @@ -7029,7 +7019,6 @@ void cgroup_task_exit(struct task_struct *tsk) static void do_cgroup_task_dead(struct task_struct *tsk) { - struct cgrp_cset_link *link; struct css_set *cset; unsigned long flags; @@ -7043,11 +7032,6 @@ static void do_cgroup_task_dead(struct task_struct *tsk) if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live)) list_add_tail(&tsk->cg_list, &cset->dying_tasks); - /* kick cgroup_drain_dying() waiters, see cgroup_rmdir() */ - list_for_each_entry(link, &cset->cgrp_links, cgrp_link) - if (waitqueue_active(&link->cgrp->dying_populated_waitq)) - wake_up(&link->cgrp->dying_populated_waitq); - if (dl_task(tsk)) dec_dl_tasks_cs(tsk); -- cgit v1.2.3 From 360f61bc29085d65a24dbaaf707c802553a239fd Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 30 Apr 2026 06:09:58 +0200 Subject: MAINTAINERS: Update mail for Peter Rosin I'm resigning from my position at Axentia. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- .mailmap | 1 + MAINTAINERS | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.mailmap b/.mailmap index b78aa092b4bb..eec4a740f7ca 100644 --- a/.mailmap +++ b/.mailmap @@ -682,6 +682,7 @@ Peter A Jonsson Peter Hilber Peter Oruba Peter Oruba +Peter Rosin Pierre-Louis Bossart Pratyush Anand Pratyush Yadav diff --git a/MAINTAINERS b/MAINTAINERS index 882214b0e7db..1b0c453bca46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4299,18 +4299,16 @@ F: Documentation/devicetree/bindings/leds/backlight/awinic,aw99706.yaml F: drivers/video/backlight/aw99706.c AXENTIA ARM DEVICES -M: Peter Rosin L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained +S: Orphan F: arch/arm/boot/dts/microchip/at91-linea.dtsi F: arch/arm/boot/dts/microchip/at91-natte.dtsi F: arch/arm/boot/dts/microchip/at91-nattis-2-natte-2.dts F: arch/arm/boot/dts/microchip/at91-tse850-3.dts AXENTIA ASOC DRIVERS -M: Peter Rosin L: linux-sound@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/sound/axentia,* F: sound/soc/atmel/tse850-pcm5142.c @@ -12046,7 +12044,7 @@ F: Documentation/i2c/busses/i2c-nvidia-gpu.rst F: drivers/i2c/busses/i2c-nvidia-gpu.c I2C MUXES -M: Peter Rosin +M: Peter Rosin L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-arb* @@ -12447,7 +12445,7 @@ F: drivers/iio/industrialio-backend.c F: include/linux/iio/backend.h IIO DIGITAL POTENTIOMETER DAC -M: Peter Rosin +M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-dac-dpot-dac @@ -12455,7 +12453,7 @@ F: Documentation/devicetree/bindings/iio/dac/dpot-dac.yaml F: drivers/iio/dac/dpot-dac.c IIO ENVELOPE DETECTOR -M: Peter Rosin +M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-adc-envelope-detector @@ -12471,7 +12469,7 @@ F: include/linux/iio/iio-gts-helper.h F: drivers/iio/test/iio-test-gts.c IIO MULTIPLEXER -M: Peter Rosin +M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml @@ -12502,7 +12500,7 @@ F: include/linux/iio/ F: tools/iio/ IIO UNIT CONVERTER -M: Peter Rosin +M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.yaml @@ -15718,7 +15716,7 @@ F: Documentation/devicetree/bindings/media/i2c/maxim,max96717.yaml F: drivers/media/i2c/max96717.c MAX9860 MONO AUDIO VOICE CODEC DRIVER -M: Peter Rosin +M: Peter Rosin L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/sound/max9860.txt @@ -15933,7 +15931,7 @@ F: Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml F: drivers/net/can/spi/mcp251xfd/ MCP4018 AND MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVERS -M: Peter Rosin +M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531 @@ -18238,7 +18236,7 @@ F: include/linux/mmc/ F: include/uapi/linux/mmc/ MULTIPLEXER SUBSYSTEM -M: Peter Rosin +M: Peter Rosin S: Odd Fixes F: Documentation/ABI/testing/sysfs-class-mux* F: Documentation/devicetree/bindings/mux/ @@ -19347,7 +19345,7 @@ F: include/dt-bindings/display/tda998x.h K: "nxp,tda998x" NXP TFA9879 DRIVER -M: Peter Rosin +M: Peter Rosin L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/sound/trivial-codec.yaml -- cgit v1.2.3 From 60f21a2649308bbd84919ba6656d5ccd660953cf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 27 Apr 2026 14:16:34 -1000 Subject: cgroup, sched_ext: Include exiting tasks in cgroup iter a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") made css_task_iter_advance() skip exiting tasks so cgroup.procs stays consistent with waitpid() visibility. Unfortunately, this broke scx_task_iter. scx_task_iter walks either scx_tasks (global) or a cgroup subtree via css_task_iter() and the two modes are expected to cover the same set of tasks. After the above change the cgroup-scoped mode silently skips tasks past exit_signals() that are still on scx_tasks. scx_sub_enable_workfn()'s abort path is one of the symptoms: an exiting SCX_TASK_SUB_INIT task can race past the cgroup iter leaking __scx_init_task() state. Other iterations share the same gap. Add CSS_TASK_ITER_WITH_DEAD to opt out of the skip and use it from scx_task_iter(). Fixes: b0e4c2f8a0f0 ("sched_ext: Implement cgroup subtree iteration for scx_task_iter") Reported-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup.c | 8 +++++--- kernel/sched/ext.c | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e52160e85af4..f6d037a30fd8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -53,6 +53,7 @@ struct kernel_clone_args; enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ + CSS_TASK_ITER_WITH_DEAD = (1U << 2), /* include exiting tasks */ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ }; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1f084ee71443..e51ce4cd3739 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5059,10 +5059,12 @@ repeat: task = list_entry(it->task_pos, struct task_struct, cg_list); /* - * Hide tasks that are exiting but not yet removed. Keep zombie - * leaders with live threads visible. + * Hide tasks that are exiting but not yet removed by default. Keep + * zombie leaders with live threads visible. Usages that need to walk + * every existing task can opt out via CSS_TASK_ITER_WITH_DEAD. */ - if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) + if (!(it->flags & CSS_TASK_ITER_WITH_DEAD) && + (task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) goto repeat; if (it->flags & CSS_TASK_ITER_PROCS) { diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 9483be03a4ca..dc5d4787296b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -766,7 +766,8 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); iter->cgrp = cgrp; iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self); - css_task_iter_start(iter->css_pos, 0, &iter->css_iter); + css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD, + &iter->css_iter); return; } #endif @@ -866,7 +867,8 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter) iter->css_pos = css_next_descendant_pre(iter->css_pos, &iter->cgrp->self); if (iter->css_pos) - css_task_iter_start(iter->css_pos, 0, &iter->css_iter); + css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD, + &iter->css_iter); } return NULL; } -- cgit v1.2.3 From ff9eda4ea906b1f02fc260ddc42d2d9bd736a49c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 27 Apr 2026 14:16:35 -1000 Subject: sched_ext: Skip past-sched_ext_dead() tasks in scx_task_iter_next_locked() scx_task_iter's cgroup-scoped mode can return tasks whose sched_ext_dead() has already completed: cgroup_task_dead() removes from cset->tasks after sched_ext_dead() in finish_task_switch() and is irq-work deferred on PREEMPT_RT. The global mode is fine - sched_ext_dead() removes from scx_tasks via list_del_init() first. Callers (sub-sched enable prep/abort/apply, scx_sub_disable(), scx_fail_parent()) assume returned tasks are still on @sch and trip WARN_ON_ONCE() or operate on torn-down state otherwise. Set %SCX_TASK_OFF_TASKS in sched_ext_dead() under @p's rq lock and have scx_task_iter_next_locked() skip flagged tasks under the same lock. Setter and reader serialize on the per-task rq lock - no race. Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 1 + kernel/sched/ext.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1a3af2ea2a79..adb9a4de068a 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -101,6 +101,7 @@ enum scx_ent_flags { SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ SCX_TASK_SUB_INIT = 1 << 4, /* task being initialized for a sub sched */ SCX_TASK_IMMED = 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */ + SCX_TASK_OFF_TASKS = 1 << 6, /* removed from scx_tasks by sched_ext_dead() */ /* * Bits 8 and 9 are used to carry task state: diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index dc5d4787296b..3f0d8aeaed81 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -928,16 +928,27 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter) * * Test for idle_sched_class as only init_tasks are on it. */ - if (p->sched_class != &idle_sched_class) - break; - } - if (!p) - return NULL; + if (p->sched_class == &idle_sched_class) + continue; - iter->rq = task_rq_lock(p, &iter->rf); - iter->locked_task = p; + iter->rq = task_rq_lock(p, &iter->rf); + iter->locked_task = p; - return p; + /* + * cgroup_task_dead() removes the dead tasks from cset->tasks + * after sched_ext_dead() and cgroup iteration may see tasks + * which already finished sched_ext_dead(). %SCX_TASK_OFF_TASKS + * is set by sched_ext_dead() under @p's rq lock. Test it to + * avoid visiting tasks which are already dead from SCX POV. + */ + if (p->scx.flags & SCX_TASK_OFF_TASKS) { + __scx_task_iter_rq_unlock(iter); + continue; + } + + return p; + } + return NULL; } /** @@ -3850,6 +3861,11 @@ void sched_ext_dead(struct task_struct *p) /* * @p is off scx_tasks and wholly ours. scx_root_enable()'s READY -> * ENABLED transitions can't race us. Disable ops for @p. + * + * %SCX_TASK_OFF_TASKS synchronizes against cgroup task iteration - see + * scx_task_iter_next_locked(). NONE tasks need no marking: cgroup + * iteration is only used from sub-sched paths, which require root + * enabled. Root enable transitions every live task to at least READY. */ if (scx_get_task_state(p) != SCX_TASK_NONE) { struct rq_flags rf; @@ -3857,6 +3873,7 @@ void sched_ext_dead(struct task_struct *p) rq = task_rq_lock(p, &rf); scx_disable_and_exit_task(scx_task_sched(p), p); + p->scx.flags |= SCX_TASK_OFF_TASKS; task_rq_unlock(rq, p, &rf); } } -- cgit v1.2.3 From 84ae1840260fece9b6b70d3872b79384bbe5a90b Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 23 Apr 2026 22:06:19 +0200 Subject: drm/sti: remove bridge when sti_hda component_add fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use devm_drm_bridge_add() so the bridge is released if probe fails after registration, and drop the manual drm_bridge_remove() in remove(). Check the return value of devm_drm_bridge_add(). Signed-off-by: Osama Abdelkader Fixes: d28726efc637 ("drm/sti: hda: add bridge before attaching") Cc: stable@vger.kernel.org Reviewed-by: Luca Ceresoli Acked-by: Raphaël Gallais-Pou Link: https://patch.msgid.link/20260423200622.325076-1-osama.abdelkader@gmail.com Signed-off-by: Raphael Gallais-Pou --- drivers/gpu/drm/sti/sti_hda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index b7397827889c..360a88ca8f0c 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -741,6 +741,7 @@ static int sti_hda_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct sti_hda *hda; struct resource *res; + int ret; DRM_INFO("%s\n", __func__); @@ -779,7 +780,9 @@ static int sti_hda_probe(struct platform_device *pdev) return PTR_ERR(hda->clk_hddac); } - drm_bridge_add(&hda->bridge); + ret = devm_drm_bridge_add(dev, &hda->bridge); + if (ret) + return ret; platform_set_drvdata(pdev, hda); @@ -788,10 +791,7 @@ static int sti_hda_probe(struct platform_device *pdev) static void sti_hda_remove(struct platform_device *pdev) { - struct sti_hda *hda = platform_get_drvdata(pdev); - component_del(&pdev->dev, &sti_hda_ops); - drm_bridge_remove(&hda->bridge); } static const struct of_device_id hda_of_match[] = { -- cgit v1.2.3 From b34c82777a2c0648ee053595f4b290fd5249b093 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 30 Apr 2026 10:27:47 +0100 Subject: sched_ext: idle: Recheck prev_cpu after narrowing allowed mask scx_select_cpu_dfl() narrows @allowed to @cpus_allowed & @p->cpus_ptr when the BPF caller supplies a @cpus_allowed that differs from @p->cpus_ptr and @p doesn't have full affinity. However, @is_prev_allowed was computed against the original (wider) @cpus_allowed, so the prev_cpu fast paths could pick a @prev_cpu that is in @cpus_allowed but not in @p->cpus_ptr, violating the intended invariant that the returned CPU is always usable by @p. The kernel masks this via the SCX_EV_SELECT_CPU_FALLBACK fallback, but the behavior contradicts the documented contract. Move the @is_prev_allowed evaluation past the narrowing block so it tests against the final @allowed mask. Fixes: ee9a4e92799d ("sched_ext: idle: Properly handle invalid prev_cpu during idle selection") Cc: stable@vger.kernel.org # v6.16+ Assisted-by: Claude Signed-off-by: David Carlier Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext_idle.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 7468560a6d80..6e1980763270 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -465,12 +465,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, preempt_disable(); - /* - * Check whether @prev_cpu is still within the allowed set. If not, - * we can still try selecting a nearby CPU. - */ - is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); - /* * Determine the subset of CPUs usable by @p within @cpus_allowed. */ @@ -487,6 +481,12 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, } } + /* + * Check whether @prev_cpu is still within the allowed set. If not, + * we can still try selecting a nearby CPU. + */ + is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); + /* * This is necessary to protect llc_cpus. */ -- cgit v1.2.3 From d8769544bde51b0ac980d10f8fe9f9fed6c95995 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Thu, 30 Apr 2026 13:11:42 -0700 Subject: docs: cgroup-v1: Update charge-commit section Commit 1d8f136a421f ("memcg/hugetlb: remove memcg hugetlb try-commit-cancel protocol") removed mem_cgroup_commit_charge() and mem_cgroup_cancel_charge(), but the docs still refer to those functions. There is no longer any charge cancellation. Update the docs to match the code. Signed-off-by: T.J. Mercier Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v1/memcg_test.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst index 9f8e27355cba..7c7cd457cf69 100644 --- a/Documentation/admin-guide/cgroup-v1/memcg_test.rst +++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst @@ -47,21 +47,19 @@ Please note that implementation details can be changed. Called when swp_entry's refcnt goes down to 0. A charge against swap disappears. -3. charge-commit-cancel +3. charge-commit ======================= Memcg pages are charged in two steps: - mem_cgroup_try_charge() - - mem_cgroup_commit_charge() or mem_cgroup_cancel_charge() + - commit_charge() At try_charge(), there are no flags to say "this page is charged". at this point, usage += PAGE_SIZE. At commit(), the page is associated with the memcg. - At cancel(), simply usage -= PAGE_SIZE. - Under below explanation, we assume CONFIG_SWAP=y. 4. Anonymous -- cgit v1.2.3 From a200cdbf95932631ec338d08a6e9e31b34c4e8a6 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Mon, 27 Apr 2026 12:00:11 +0800 Subject: ovpn: reset MAC header before passing skb up After decapsulating a packet, the skb->mac_header still points to the outer transport header. Fix this by calling skb_reset_mac_header() in ovpn_netdev_write() to ensure the MAC header points to the beginning of the inner IP/network packet, as expected by the rest of the stack. Reported-by: Minqiang Chen Fixes: 8534731dbf2d ("ovpn: implement packet processing") Signed-off-by: Qingfang Deng Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index db43a1f8a07a..d92bb87be2b2 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -85,6 +85,7 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) skb_scrub_packet(skb, true); /* network header reset in ovpn_decrypt_post() */ + skb_reset_mac_header(skb); skb_reset_transport_header(skb); skb_reset_inner_headers(skb); -- cgit v1.2.3 From c539cb30f93f119566f2ae9d016cce11f188d780 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 25 Mar 2026 17:49:18 +0100 Subject: ovpn: ensure packet delivery happens with BH disabled ovpn injects decrypted packets into the netdev RX path through ovpn_netdev_write() which invokes gro_cells_receive() and dev_dstats_rx_add(). ovpn_netdev_write() is normally called in softirq context, however, in case of TCP connections it may also be invoked process context. When this happens gro_cells_receive() will throw a warning: [ 230.183747][ T12] WARNING: net/core/gro_cells.c:30 at gro_cells_receive+0x708/0xaa0, CPU#1: kworker/u16:0/12 and lockdep will also report a potential inconsistent lock state: WARNING: inconsistent lock state 7.0.0-rc4+ #246 Tainted: G W -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. because attempts to acquire gro_cells->bh_lock by both contexts may lead to a deadlock. At the same time, dev_dstats_rx_add() does not expect to race with a softirq (which may happen when invoked in process context), because the latter may access its per-cpu state and corrupt it. Fix all this by invoking local_bh_disable/enable() around gro_cells_receive() and dev_dstats_rx_add() to ensure that bottom halves are always disabled before calling both of them. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index d92bb87be2b2..22c555dd962e 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -91,12 +91,18 @@ static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb) /* cause packet to be "received" by the interface */ pkt_len = skb->len; + /* we may get here in process context in case of TCP connections, + * therefore we have to disable BHs to ensure gro_cells_receive() + * and dev_dstats_rx_add() do not get corrupted or enter deadlock + */ + local_bh_disable(); ret = gro_cells_receive(&peer->ovpn->gro_cells, skb); if (likely(ret == NET_RX_SUCCESS)) { /* update RX stats with the size of decrypted packet */ ovpn_peer_stats_increment_rx(&peer->vpn_stats, pkt_len); dev_dstats_rx_add(peer->ovpn->dev, pkt_len); } + local_bh_enable(); } void ovpn_decrypt_post(void *data, int ret) -- cgit v1.2.3 From 201ba706318d460a2ea660e3652610be62532a70 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 29 Apr 2026 10:00:16 +0200 Subject: selftests: ovpn: reduce ping count in test.sh The second stage of test.sh ("run baseline data traffic") performs a basic connectivity check with ping -qfc 500 -w 3. On slower CI instances this is too strict for TCP: the RTT is high enough that 500 echo requests do not reliably complete within 3 seconds, so the stage flakes and the test fails even though the ovpn setup is healthy. Reduce the packet count to 100 for both the plain and 3000-byte pings in that stage. This still verifies peer setup, key exchange, routing, and data-path traffic, without making the basic connectivity check depend on timing out under load. Fixes: 959bc330a439 ("testing/selftests: add test tool and scripts for ovpn module") Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- tools/testing/selftests/net/ovpn/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index b50dbe45a4d0..c06e3135fbef 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -98,10 +98,10 @@ ovpn_run_basic_traffic() { sleep 0.3 ovpn_cmd_ok "send baseline traffic to peer ${p}" \ ip netns exec ovpn_peer0 \ - ping -qfc 500 -w 3 5.5.5.$((p + 1)) + ping -qfc 100 -w 3 5.5.5.$((p + 1)) ovpn_cmd_ok "send large-payload traffic to peer ${p}" \ ip netns exec ovpn_peer0 \ - ping -qfc 500 -s 3000 -w 3 5.5.5.$((p + 1)) + ping -qfc 100 -s 3000 -w 3 5.5.5.$((p + 1)) wait "${tcpdump_pid1}" || return 1 wait "${tcpdump_pid2}" || return 1 -- cgit v1.2.3 From afbd961305eb483515650ccfcb7743608e7add78 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 30 Apr 2026 10:44:13 +0300 Subject: ipvs: fixes for the new ip_vs_status info Sashiko reports some problems for the recently added /proc/net/ip_vs_status: * ip_vs_status_show() as a table reader may run long after the conn_tab and svc_table table are released. While ip_vs_conn_flush() properly changes the conn_tab_changes counter when conn_tab is removed, ip_vs_del_service() and ip_vs_flush() were missing such change for the svc_table_changes counter. As result, readers like ip_vs_dst_event() and ip_vs_status_show() may continue to use a freed table after a cond_resched_rcu() call. * While counting the buckets in ip_vs_status_show() make sure we traverse only the needed number of entries in the chain. This also prevents possible overflow of the 'count' variable. * Add check for 'loops' to prevent infinite loops while restarting the traversal on table change. * While IP_VS_CONN_TAB_MAX_BITS is 20 on 32-bit platforms and there is no risk to overflow when multiplying the number of conn_tab buckets to 100, prefer the div_u64() helper to make the following dividing safer. * Use 0440 permissions for ip_vs_status to restrict the info only to root due to the exported information for hash distribution. Link: https://sashiko.dev/#/patchset/20260410112352.23599-1-fw%40strlen.de Fixes: 9a9ccef907a7 ("ipvs: add ip_vs_status info") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 51 +++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6632daa87ded..27e50afe9a54 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2032,6 +2032,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc) cancel_delayed_work_sync(&ipvs->svc_resize_work); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -2078,6 +2081,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = rcu_dereference_protected(ipvs->svc_table, 1); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); + /* Inform readers that table is removed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); while (1) { p = rcu_dereference_protected(t->new_tbl, 1); call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); @@ -3004,7 +3010,8 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) int old_gen, new_gen; u32 counts[8]; u32 bucket; - int count; + u32 count; + int loops; u32 sum1; u32 sum; int i; @@ -3020,6 +3027,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) if (!atomic_read(&ipvs->conn_count)) goto after_conns; old_gen = atomic_read(&ipvs->conn_tab_changes); + loops = 0; repeat_conn: smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ @@ -3032,8 +3040,11 @@ repeat_conn: resched_score++; ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; - hlist_bl_for_each_entry_rcu(hn, e, head, node) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3042,37 +3053,41 @@ repeat_conn: new_gen = atomic_read(&ipvs->conn_tab_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_conns; old_gen = new_gen; goto repeat_conn; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_conns: t = rcu_dereference(ipvs->svc_table); count = ip_vs_get_num_services(ipvs); - seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Services:\t%u\n", count); seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); if (!count) goto after_svc; old_gen = atomic_read(&ipvs->svc_table_changes); + loops = 0; repeat_svc: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ @@ -3086,8 +3101,11 @@ repeat_svc: ip_vs_rht_walk_bucket_rcu(t, bucket, head) { count = 0; hlist_bl_for_each_entry_rcu(svc, e, head, - s_list) + s_list) { count++; + if (count >= ARRAY_SIZE(counts) - 1) + break; + } } resched_score += count; if (resched_score >= 100) { @@ -3096,24 +3114,27 @@ repeat_svc: new_gen = atomic_read(&ipvs->svc_table_changes); /* New table installed ? */ if (old_gen != new_gen) { + /* Too many changes? */ + if (++loops >= 5) + goto after_svc; old_gen = new_gen; goto repeat_svc; } } - counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + counts[count]++; } } for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) sum += counts[i]; sum1 = sum - counts[0]; - seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", - counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n", + counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U))); for (i = 1; i < ARRAY_SIZE(counts); i++) { if (!counts[i]) continue; - seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n", i, counts[i], - (unsigned long)counts[i] * 100 / max(sum1, 1U)); + div_u64((u64)counts[i] * 100U, max(sum1, 1U))); } after_svc: @@ -5039,7 +5060,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; - if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net, ip_vs_status_show, NULL)) goto err_status; #endif -- cgit v1.2.3 From f2da9a96abb4b7a64626e931cedd85f05d5498ca Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 30 Apr 2026 10:44:14 +0300 Subject: ipvs: fix races around the conn_lfactor and svc_lfactor sysctl vars Sashiko warns that the new sysctls vars can be changed after the hash tables are destroyed and their respective resizing works canceled, leading to mod_delayed_work() being called for canceled works. Solve this in different ways. conn_tab can be present even without services and is destroyed only on netns exit, so use disable_delayed_work_sync() to disable the work instead of adding more synchronization mechanisms. As for the svc_table, it is destroyed when the services are deleted, so we must be sure that netns exit is not called yet (the check for 'enable') and the work is not canceled by checking all under same mutex lock. Also, use WRITE_ONCE when updating the sysctl vars as we already read them with READ_ONCE. Link: https://sashiko.dev/#/patchset/20260410112352.23599-1-fw%40strlen.de Fixes: 8d7de5477e47 ("ipvs: add conn_lfactor and svc_lfactor sysctl vars") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2082bfb2d93c..84a4921a7865 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1835,7 +1835,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) if (!rcu_dereference_protected(ipvs->conn_tab, 1)) return; - cancel_delayed_work_sync(&ipvs->conn_resize_work); + disable_delayed_work_sync(&ipvs->conn_resize_work); if (!atomic_read(&ipvs->conn_count)) goto unreg; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 27e50afe9a54..caec516856e9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2469,7 +2469,7 @@ static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; + WRITE_ONCE(*valp, val); if (rcu_access_pointer(ipvs->conn_tab)) mod_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, 0); @@ -2496,10 +2496,16 @@ static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write, if (val < -8 || val > 8) { ret = -EINVAL; } else { - *valp = val; - if (rcu_access_pointer(ipvs->svc_table)) + mutex_lock(&ipvs->service_mutex); + WRITE_ONCE(*valp, val); + /* Make sure the services are present */ + if (rcu_access_pointer(ipvs->svc_table) && + READ_ONCE(ipvs->enable) && + !test_bit(IP_VS_WORK_SVC_NORESIZE, + &ipvs->work_flags)) mod_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 0); + mutex_unlock(&ipvs->service_mutex); } } return ret; -- cgit v1.2.3 From d493d9de1c21313cf62be0f6e1a4d48385fa7beb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 30 Apr 2026 10:44:15 +0300 Subject: ipvs: fix the spin_lock usage for RT build syzbot reports for sleeping function called from invalid context [1]. The recently added code for resizable hash tables uses hlist_bl bit locks in combination with spin_lock for the connection fields (cp->lock). Fix the following problems: * avoid using spin_lock(&cp->lock) under locked bit lock because it sleeps on PREEMPT_RT * as the recent changes call ip_vs_conn_hash() only for newly allocated connection, the spin_lock can be removed there because the connection is still not linked to table and does not need cp->lock protection. * the lock can be removed also from ip_vs_conn_unlink() where we are the last connection user. * the last place that is fixed is ip_vs_conn_fill_cport() where now the cp->lock is locked before the other locks to ensure other packets do not modify the cp->flags in non-atomic way. Here we make sure cport and flags are changed only once if two or more packets race to fill the cport. Also, we fill cport early, so that if we race with resizing there will be valid cport key for the hashing. Add a warning if too many hash table changes occur for our RCU read-side critical section which is error condition but minor because the connection still can expire gracefully. Still, restore the cport to 0 to allow retransmitted packet to properly fill the cport. Problems reported by Sashiko. [1]: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 16, name: ktimers/0 preempt_count: 2, expected: 0 RCU nest depth: 3, expected: 3 8 locks held by ktimers/0/16: #0: ffffffff8de5f260 (local_bh){.+.+}-{1:3}, at: __local_bh_disable_ip+0x3c/0x420 kernel/softirq.c:163 #1: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: __local_bh_disable_ip+0x3c/0x420 kernel/softirq.c:163 #2: ffff8880b8826360 (&base->expiry_lock){+...}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:45 [inline] #2: ffff8880b8826360 (&base->expiry_lock){+...}-{3:3}, at: timer_base_lock_expiry kernel/time/timer.c:1502 [inline] #2: ffff8880b8826360 (&base->expiry_lock){+...}-{3:3}, at: __run_timer_base+0x120/0x9f0 kernel/time/timer.c:2384 #3: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:300 [inline] #3: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #3: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: __rt_spin_lock kernel/locking/spinlock_rt.c:50 [inline] #3: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0x1e0/0x400 kernel/locking/spinlock_rt.c:57 #4: ffffc90000157a80 ((&cp->timer)){+...}-{0:0}, at: call_timer_fn+0xd4/0x5e0 kernel/time/timer.c:1745 #5: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:300 [inline] #5: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #5: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: ip_vs_conn_unlink net/netfilter/ipvs/ip_vs_conn.c:315 [inline] #5: ffffffff8dfc80c0 (rcu_read_lock){....}-{1:3}, at: ip_vs_conn_expire+0x257/0x2390 net/netfilter/ipvs/ip_vs_conn.c:1260 #6: ffffffff8de5f260 (local_bh){.+.+}-{1:3}, at: __local_bh_disable_ip+0x3c/0x420 kernel/softirq.c:163 #7: ffff888068d4c3f0 (&cp->lock#2){+...}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:45 [inline] #7: ffff888068d4c3f0 (&cp->lock#2){+...}-{3:3}, at: ip_vs_conn_unlink net/netfilter/ipvs/ip_vs_conn.c:324 [inline] #7: ffff888068d4c3f0 (&cp->lock#2){+...}-{3:3}, at: ip_vs_conn_expire+0xd4a/0x2390 net/netfilter/ipvs/ip_vs_conn.c:1260 Preemption disabled at: [] bit_spin_lock include/linux/bit_spinlock.h:38 [inline] [] hlist_bl_lock+0x18/0x110 include/linux/list_bl.h:149 CPU: 0 UID: 0 PID: 16 Comm: ktimers/0 Tainted: G W L syzkaller #0 PREEMPT_{RT,(full)} Tainted: [W]=WARN, [L]=SOFTLOCKUP Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/18/2026 Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 __might_resched+0x329/0x480 kernel/sched/core.c:9162 __rt_spin_lock kernel/locking/spinlock_rt.c:48 [inline] rt_spin_lock+0xc2/0x400 kernel/locking/spinlock_rt.c:57 spin_lock include/linux/spinlock_rt.h:45 [inline] ip_vs_conn_unlink net/netfilter/ipvs/ip_vs_conn.c:324 [inline] ip_vs_conn_expire+0xd4a/0x2390 net/netfilter/ipvs/ip_vs_conn.c:1260 call_timer_fn+0x192/0x5e0 kernel/time/timer.c:1748 expire_timers kernel/time/timer.c:1799 [inline] __run_timers kernel/time/timer.c:2374 [inline] __run_timer_base+0x6a3/0x9f0 kernel/time/timer.c:2386 run_timer_base kernel/time/timer.c:2395 [inline] run_timer_softirq+0xb7/0x170 kernel/time/timer.c:2405 handle_softirqs+0x1de/0x6d0 kernel/softirq.c:622 __do_softirq kernel/softirq.c:656 [inline] run_ktimerd+0x69/0x100 kernel/softirq.c:1151 smpboot_thread_fn+0x541/0xa50 kernel/smpboot.c:160 kthread+0x388/0x470 kernel/kthread.c:436 ret_from_fork+0x514/0xb70 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Reported-by: syzbot+504e778ddaecd36fdd17@syzkaller.appspotmail.com Link: https://sashiko.dev/#/patchset/20260415200216.79699-1-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260420165539.85174-4-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260422135823.50489-4-ja%40ssi.bg Fixes: 2fa7cc9c7025 ("ipvs: switch to per-net connection table") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 74 +++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 84a4921a7865..9ea6b4fa78bf 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -267,27 +267,20 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hash_key2 = hash_key; use2 = false; } + conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */, &head, &head2); - spin_lock(&cp->lock); - - if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - cp->flags |= IP_VS_CONN_F_HASHED; - WRITE_ONCE(cp->hn0.hash_key, hash_key); - WRITE_ONCE(cp->hn1.hash_key, hash_key2); - refcount_inc(&cp->refcnt); - hlist_bl_add_head_rcu(&cp->hn0.node, head); - if (use2) - hlist_bl_add_head_rcu(&cp->hn1.node, head2); - ret = 1; - } else { - pr_err("%s(): request for already hashed, called from %pS\n", - __func__, __builtin_return_address(0)); - ret = 0; - } - spin_unlock(&cp->lock); + cp->flags |= IP_VS_CONN_F_HASHED; + WRITE_ONCE(cp->hn0.hash_key, hash_key); + WRITE_ONCE(cp->hn1.hash_key, hash_key2); + refcount_inc(&cp->refcnt); + hlist_bl_add_head_rcu(&cp->hn0.node, head); + if (use2) + hlist_bl_add_head_rcu(&cp->hn1.node, head2); + conn_tab_unlock(head, head2); + ret = 1; /* Schedule resizing if load increases */ if (atomic_read(&ipvs->conn_count) > t->u_thresh && @@ -321,7 +314,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */, &head, &head2); - spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ @@ -334,7 +326,6 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) } } - spin_unlock(&cp->lock); conn_tab_unlock(head, head2); rcu_read_unlock(); @@ -637,6 +628,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) struct ip_vs_conn_hnode *hn; u32 hash_key, hash_key_new; struct ip_vs_conn_param p; + bool by_me = false; int ntbl; int dir; @@ -664,8 +656,16 @@ retry: t = rcu_dereference(t->new_tbl); ntbl++; /* We are lost? */ - if (ntbl >= 2) + if (ntbl >= 2) { + spin_lock_bh(&cp->lock); + if (cp->flags & IP_VS_CONN_F_NO_CPORT && by_me) + cp->cport = 0; + /* hn1 will be rehashed on next packet */ + spin_unlock_bh(&cp->lock); + IP_VS_ERR_RL("%s(): Too many ht changes for dir %d\n", + __func__, dir); return; + } } /* Rehashing during resize? Use the recent table for adds */ @@ -683,10 +683,13 @@ retry: if (head > head2 && t == t2) swap(head, head2); + /* Protect the cp->flags modification */ + spin_lock_bh(&cp->lock); + /* Lock seqcount only for the old bucket, even if we are on new table * because it affects the del operation, not the adding. */ - spin_lock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_lock(&t->lock[hash_key & t->lock_mask].l); preempt_disable_nested(); write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]); @@ -704,14 +707,23 @@ retry: hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + spin_unlock_bh(&cp->lock); hash_key = hash_key_new; goto retry; } - spin_lock(&cp->lock); - if ((cp->flags & IP_VS_CONN_F_NO_CPORT) && - (cp->flags & IP_VS_CONN_F_HASHED)) { + /* Fill cport once, even if multiple packets try to do it */ + if (cp->flags & IP_VS_CONN_F_NO_CPORT && (!cp->cport || by_me)) { + /* If we race with resizing make sure cport is set for dir 1 */ + if (!cp->cport) { + cp->cport = cport; + by_me = true; + } + if (!dir) { + atomic_dec(&ipvs->no_cport_conns[af_id]); + cp->flags &= ~IP_VS_CONN_F_NO_CPORT; + } /* We do not recalc hash_key_r under lock, we assume the * parameters in cp do not change, i.e. cport is * the only possible change. @@ -726,21 +738,17 @@ retry: hlist_bl_del_rcu(&hn->node); hlist_bl_add_head_rcu(&hn->node, head_new); } - if (!dir) { - atomic_dec(&ipvs->no_cport_conns[af_id]); - cp->flags &= ~IP_VS_CONN_F_NO_CPORT; - cp->cport = cport; - } } - spin_unlock(&cp->lock); if (head != head2) hlist_bl_unlock(head2); hlist_bl_unlock(head); write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); - spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); - if (dir--) + spin_unlock(&t->lock[hash_key & t->lock_mask].l); + + spin_unlock_bh(&cp->lock); + if (dir-- && by_me) goto next_dir; } -- cgit v1.2.3 From fbe1e01e818ee6db86ff947599bf0bea96de7e71 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 30 Apr 2026 10:44:16 +0300 Subject: ipvs: do not leak dest after get from dest trash Sashiko warns about leaked dest if ip_vs_start_estimator() fails in ip_vs_add_dest(). Add ip_vs_trash_put_dest() to put back the dest into dest trash. Link: https://sashiko.dev/#/patchset/20260428175725.72050-1-ja%40ssi.bg Fixes: 705dd3444081 ("ipvs: use kthreads for stats estimation") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index caec516856e9..d81077c2457a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1102,6 +1102,24 @@ out: return dest; } +/* Put destination in trash */ +static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest, unsigned long istart, + bool cleanup) +{ + spin_lock_bh(&ipvs->dest_trash_lock); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), + refcount_read(&dest->refcnt)); + if (list_empty(&ipvs->dest_trash) && !cleanup) + mod_timer(&ipvs->dest_trash_timer, + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); + /* dest lives in trash with reference */ + list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = istart; + spin_unlock_bh(&ipvs->dest_trash_lock); +} + static void ip_vs_dest_rcu_free(struct rcu_head *head) { struct ip_vs_dest *dest; @@ -1461,9 +1479,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) ntohs(dest->vport)); ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); + /* On error put back dest into the trash */ if (ret < 0) - return ret; - __ip_vs_update_dest(svc, dest, udest, 1); + ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start, + false); + else + __ip_vs_update_dest(svc, dest, udest, 1); } else { /* * Allocate and initialize the dest structure @@ -1533,17 +1554,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - spin_lock_bh(&ipvs->dest_trash_lock); - IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", - IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - refcount_read(&dest->refcnt)); - if (list_empty(&ipvs->dest_trash) && !cleanup) - mod_timer(&ipvs->dest_trash_timer, - jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); - /* dest lives in trash with reference */ - list_add(&dest->t_list, &ipvs->dest_trash); - dest->idle_start = 0; - spin_unlock_bh(&ipvs->dest_trash_lock); + ip_vs_trash_put_dest(ipvs, dest, 0, cleanup); /* Queue up delayed work to expire all no destination connections. * No-op when CONFIG_SYSCTL is disabled. -- cgit v1.2.3 From 2fd109238925d53c44ea409df0558844af7877b8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 30 Apr 2026 10:44:17 +0300 Subject: ipvs: fix races around est_mutex and est_cpulist Sashiko reports for races and possible crash around the usage of est_cpulist_valid and sysctl_est_cpulist. The problem is that we do not lock est_mutex in some places which can lead to wrong write ordering and as result problems when calling cpumask_weight() and cpumask_empty(). Fix them by moving the est_max_threads read/write under locked est_mutex. Do the same for one ip_vs_est_reload_start() call to protect the cpumask_empty() usage of sysctl_est_cpulist. To remove the chance of deadlock while stopping the estimation kthreads, keep the data structure for kthread 0 even after last estimator is removed and do not hold mutexes while stopping this task. Now we will use a new flag 'needed' to know when kthread 0 should run. The kthreads above 0 do not use mutexes, so stop them under est_mutex because their kthread data still can be destroyed if they do not serve estimators. Now all kthreads will be started by the est_reload_work to properly serialize the stop/start for kthread 0. Reduce the use of service_mutex in ip_vs_est_calc_phase() because under est_mutex we can safely walk est_kt_arr to stop the kthreads above slot 0. As ip_vs_stop_estimator() for tot_stats should be called under service_mutex, do it early in the netns exit path in ip_vs_flush() to avoid locking the mutex again later. It still should be called in ip_vs_control_net_cleanup_sysctl() when we are called during netns init error. Use -2 for ktid as indicator if estimator was already stopped. Finally, fix use-after-free for kd->est_row in ip_vs_est_calc_phase(). est->ktrow should simply switch to a delay value while estimator is linked to est_temp_list. Link: https://sashiko.dev/#/patchset/20260331165015.2777765-1-longman%40redhat.com Link: https://sashiko.dev/#/patchset/20260420171308.87192-1-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260422125123.40658-1-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260424175858.54752-1-ja%40ssi.bg Link: https://sashiko.dev/#/patchset/20260425103918.7447-1-ja%40ssi.bg Fixes: f0be83d54217 ("ipvs: add est_cpulist and est_nice sysctl vars") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 11 +++++- net/netfilter/ipvs/ip_vs_ctl.c | 51 +++++++++++++++++++++----- net/netfilter/ipvs/ip_vs_est.c | 83 ++++++++++++++++++++++++------------------ 3 files changed, 100 insertions(+), 45 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 72d325c81313..d28ad8a0541f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -491,6 +491,7 @@ struct ip_vs_est_kt_data { DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */ unsigned long est_timer; /* estimation timer (jiffies) */ struct ip_vs_stats *calc_stats; /* Used for calculation */ + int needed; /* task is needed */ int tick_len[IPVS_EST_NTICKS]; /* est count */ int id; /* ktid per netns */ int chain_max; /* max ests per tick chain */ @@ -1884,11 +1885,19 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); -void ip_vs_est_reload_start(struct netns_ipvs *ipvs); +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart); int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, struct ip_vs_est_kt_data *kd); void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd); +static inline void ip_vs_stop_estimator_tot_stats(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + ipvs->tot_stats->s.est.ktid = -2; +#endif +} + static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) { #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d81077c2457a..5c9f8e0e238f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -261,12 +261,28 @@ static void est_reload_work_handler(struct work_struct *work) if (!kd) continue; /* New config ? Stop kthread tasks */ - if (genid != genid_done) - ip_vs_est_kthread_stop(kd); + if (genid != genid_done) { + if (!id) { + /* Only we can stop kt 0 but not under mutex */ + mutex_unlock(&ipvs->est_mutex); + ip_vs_est_kthread_stop(kd); + mutex_lock(&ipvs->est_mutex); + if (!READ_ONCE(ipvs->enable)) + goto unlock; + /* kd for kt 0 is never destroyed */ + } else { + ip_vs_est_kthread_stop(kd); + } + } if (!kd->task && !ip_vs_est_stopped(ipvs)) { + bool start; + /* Do not start kthreads above 0 in calc phase */ - if ((!id || !ipvs->est_calc_phase) && - ip_vs_est_kthread_start(ipvs, kd) < 0) + if (id) + start = !ipvs->est_calc_phase; + else + start = kd->needed; + if (start && ip_vs_est_kthread_start(ipvs, kd) < 0) repeat = true; } } @@ -1823,11 +1839,16 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, *svc_p = svc; if (!READ_ONCE(ipvs->enable)) { + mutex_lock(&ipvs->est_mutex); + /* Now there is a service - full throttle */ WRITE_ONCE(ipvs->enable, 1); + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); + /* Start estimation for first time */ - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); } return 0; @@ -2103,6 +2124,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) t = p; } } + /* Stop the tot_stats estimator early under service_mutex + * to avoid locking it again later. + */ + if (cleanup) + ip_vs_stop_estimator_tot_stats(ipvs); return 0; } @@ -2348,7 +2374,7 @@ static int ipvs_proc_est_cpumask_set(const struct ctl_table *table, /* est_max_threads may depend on cpulist size */ ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); ipvs->est_calc_phase = 1; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); unlock: mutex_unlock(&ipvs->est_mutex); @@ -2428,7 +2454,7 @@ static int ipvs_proc_est_nice(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -2455,7 +2481,7 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, mutex_lock(&ipvs->est_mutex); if (*valp != val) { *valp = val; - ip_vs_est_reload_start(ipvs); + ip_vs_est_reload_start(ipvs, true); } mutex_unlock(&ipvs->est_mutex); } @@ -5005,7 +5031,14 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + if (ipvs->tot_stats->s.est.ktid != -2) { + /* Not stopped yet? This happens only on netns init error and + * we even do not need to lock the service_mutex for this case. + */ + mutex_lock(&ipvs->service_mutex); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + mutex_unlock(&ipvs->service_mutex); + } if (ipvs->est_cpulist_valid) free_cpumask_var(ipvs->sysctl_est_cpulist); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 433ba3cab58c..ab09f5182951 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -68,6 +68,11 @@ and the limit of estimators per kthread - est_add_ktid: ktid where to add new ests, can point to empty slot where we should add kt data + - data protected by service_mutex: est_temp_list, est_add_ktid, + est_kt_count(R/W), est_kt_arr(R/W), est_genid_done, kd->needed(R/W) + - data protected by est_mutex: est_genid, est_max_threads, sysctl_est_cpulist, + est_cpulist_valid, sysctl_est_nice, est_stopped, sysctl_run_estimation, + est_kt_count(R), est_kt_arr(R), kd->needed(R), kd->task (id > 0) */ static struct lock_class_key __ipvs_est_key; @@ -227,14 +232,17 @@ static int ip_vs_estimation_kthread(void *data) } /* Schedule stop/start for kthread tasks */ -void ip_vs_est_reload_start(struct netns_ipvs *ipvs) +void ip_vs_est_reload_start(struct netns_ipvs *ipvs, bool restart) { + lockdep_assert_held(&ipvs->est_mutex); + /* Ignore reloads before first service is added */ if (!READ_ONCE(ipvs->enable)) return; ip_vs_est_stopped_recalc(ipvs); - /* Bump the kthread configuration genid */ - atomic_inc(&ipvs->est_genid); + /* Bump the kthread configuration genid if stopping is requested */ + if (restart) + atomic_inc(&ipvs->est_genid); queue_delayed_work(system_long_wq, &ipvs->est_reload_work, 0); } @@ -304,12 +312,17 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) void *arr = NULL; int i; - if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && - READ_ONCE(ipvs->enable) && ipvs->est_max_threads) - return -EINVAL; - mutex_lock(&ipvs->est_mutex); + /* Allow kt 0 data to be created before the services are added + * and limit the kthreads when services are present. + */ + if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads && + READ_ONCE(ipvs->enable) && ipvs->est_max_threads) { + ret = -EINVAL; + goto out; + } + for (i = 0; i < id; i++) { if (!ipvs->est_kt_arr[i]) break; @@ -333,6 +346,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) kd->est_timer = jiffies; kd->id = id; ip_vs_est_set_params(ipvs, kd); + kd->needed = 1; /* Pre-allocate stats used in calc phase */ if (!id && !kd->calc_stats) { @@ -341,12 +355,8 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs) goto out; } - /* Start kthread tasks only when services are present */ - if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) { - ret = ip_vs_est_kthread_start(ipvs, kd); - if (ret < 0) - goto out; - } + /* Request kthread to be started */ + ip_vs_est_reload_start(ipvs, false); if (arr) ipvs->est_kt_count++; @@ -482,12 +492,11 @@ out: /* Start estimation for stats */ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { + struct ip_vs_est_kt_data *kd = ipvs->est_kt_count > 0 ? + ipvs->est_kt_arr[0] : NULL; struct ip_vs_estimator *est = &stats->est; int ret; - if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable)) - ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); - est->ktid = -1; est->ktrow = IPVS_EST_NTICKS - 1; /* Initial delay */ @@ -496,8 +505,15 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) * will not allocate much memory, just for kt 0. */ ret = 0; - if (!ipvs->est_kt_count || !ipvs->est_kt_arr[0]) + if (!kd) { ret = ip_vs_est_add_kthread(ipvs); + } else if (!kd->needed) { + mutex_lock(&ipvs->est_mutex); + /* We have job for the kt 0 task */ + kd->needed = 1; + ip_vs_est_reload_start(ipvs, true); + mutex_unlock(&ipvs->est_mutex); + } if (ret >= 0) hlist_add_head(&est->list, &ipvs->est_temp_list); else @@ -578,16 +594,14 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) } end_kt0: - /* kt 0 is freed after all other kthreads and chains are empty */ + /* kt 0 task is stopped after all other kt slots and chains are empty */ if (ipvs->est_kt_count == 1 && hlist_empty(&ipvs->est_temp_list)) { kd = ipvs->est_kt_arr[0]; - if (!kd || !kd->est_count) { + if (kd && !kd->est_count) { mutex_lock(&ipvs->est_mutex); - if (kd) { - ip_vs_est_kthread_destroy(kd); - ipvs->est_kt_arr[0] = NULL; - } - ipvs->est_kt_count--; + /* Keep the kt0 data but request kthread_stop */ + kd->needed = 0; + ip_vs_est_reload_start(ipvs, true); mutex_unlock(&ipvs->est_mutex); ipvs->est_add_ktid = 0; } @@ -647,9 +661,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) u64 val; INIT_HLIST_HEAD(&chain); - mutex_lock(&ipvs->service_mutex); + mutex_lock(&ipvs->est_mutex); kd = ipvs->est_kt_arr[0]; - mutex_unlock(&ipvs->service_mutex); + mutex_unlock(&ipvs->est_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; @@ -748,16 +762,16 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; - mutex_lock(&ipvs->service_mutex); - /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period */ mutex_lock(&ipvs->est_mutex); for (id = 1; id < ipvs->est_kt_count; id++) { /* netns clean up started, abort */ - if (!READ_ONCE(ipvs->enable)) - goto unlock2; + if (kthread_should_stop() || !READ_ONCE(ipvs->enable)) { + mutex_unlock(&ipvs->est_mutex); + return; + } kd = ipvs->est_kt_arr[id]; if (!kd) continue; @@ -765,9 +779,11 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) } mutex_unlock(&ipvs->est_mutex); + mutex_lock(&ipvs->service_mutex); + /* Move all estimators to est_temp_list but carefully, * all estimators and kthread data can be released while - * we reschedule. Even for kthread 0. + * we reschedule. */ step = 0; @@ -849,9 +865,7 @@ walk_chain: ip_vs_stop_estimator(ipvs, stats); /* Tasks are stopped, move without RCU grace period */ est->ktid = -1; - est->ktrow = row - kd->est_row; - if (est->ktrow < 0) - est->ktrow += IPVS_EST_NTICKS; + est->ktrow = delay; hlist_add_head(&est->list, &ipvs->est_temp_list); /* kd freed ? */ if (last) @@ -889,7 +903,6 @@ end_dequeue: if (genid == atomic_read(&ipvs->est_genid)) ipvs->est_calc_phase = 0; -unlock2: mutex_unlock(&ipvs->est_mutex); unlock: -- cgit v1.2.3 From 4ee52b7021a7cb9356f8b9aff5631c68512a9e1b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 30 Apr 2026 10:44:18 +0300 Subject: ipvs: fix shift-out-of-bounds in ip_vs_rht_desired_size Calling roundup_pow_of_two() with 0 has undefined result: UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 shift exponent 64 is too large for 64-bit type 'unsigned long' CPU: 1 UID: 0 PID: 77 Comm: kworker/u8:4 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/18/2026 Workqueue: events_unbound conn_resize_work_handler Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 ubsan_epilogue+0xa/0x30 lib/ubsan.c:233 __ubsan_handle_shift_out_of_bounds+0x385/0x410 lib/ubsan.c:494 __roundup_pow_of_two include/linux/log2.h:57 [inline] ip_vs_rht_desired_size+0x2cf/0x410 net/netfilter/ipvs/ip_vs_core.c:240 ip_vs_conn_desired_size net/netfilter/ipvs/ip_vs_conn.c:765 [inline] conn_resize_work_handler+0x1b6/0x14c0 net/netfilter/ipvs/ip_vs_conn.c:822 process_one_work kernel/workqueue.c:3302 [inline] process_scheduled_works+0xb5d/0x1860 kernel/workqueue.c:3385 worker_thread+0xa53/0xfc0 kernel/workqueue.c:3466 kthread+0x388/0x470 kernel/kthread.c:436 ret_from_fork+0x514/0xb70 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Reported-by: syzbot+217f1db9c791e27fe54a@syzkaller.appspotmail.com Fixes: b655388111cf ("ipvs: add resizable hash tables") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f5b7a2047291..d40b404c1bf6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -237,7 +237,7 @@ int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, { if (!t) return 1 << min_bits; - n = roundup_pow_of_two(n); + n = n > 0 ? roundup_pow_of_two(n) : 1; if (lfactor < 0) { int factor = min(-lfactor, max_bits); -- cgit v1.2.3 From aa6065206987278291c09d0c6aebed687114c925 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 30 Apr 2026 10:44:19 +0300 Subject: ipvs: Guard access of HK_TYPE_KTHREAD cpumask with RCU The ip_vs_ctl.c file and the associated ip_vs.h file are the only places in the kernel where HK_TYPE_KTHREAD cpumask is being retrieved and used. Now that HK_TYPE_KTHREAD/HK_TYPE_DOMAIN cpumask can be changed at run time. We need to use RCU to guard access to this cpumask to avoid a potential UAF problem as the returned cpumask may be freed before it is being used. We can replace HK_TYPE_KTHREAD by HK_TYPE_DOMAIN as they are aliases of each other, but keeping the HK_TYPE_KTHREAD name can highlight the fact that it is the kthread initiated by ipvs that is being controlled. Fixes: 03ff73510169 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") Signed-off-by: Waiman Long Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 20 ++++++++++++++++---- net/netfilter/ipvs/ip_vs_ctl.c | 13 ++++++++----- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d28ad8a0541f..02762ce73a0c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1412,7 +1412,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { if (ipvs->est_cpulist_valid) return ipvs->sysctl_est_cpulist; @@ -1530,7 +1530,7 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } -static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +static inline const struct cpumask *__sysctl_est_cpulist(struct netns_ipvs *ipvs) { return housekeeping_cpumask(HK_TYPE_KTHREAD); } @@ -1565,6 +1565,18 @@ static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) return READ_ONCE(ipvs->sysctl_svc_lfactor); } +static inline bool sysctl_est_cpulist_empty(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_empty(__sysctl_est_cpulist(ipvs)); +} + +static inline unsigned int sysctl_est_cpulist_weight(struct netns_ipvs *ipvs) +{ + guard(rcu)(); + return cpumask_weight(__sysctl_est_cpulist(ipvs)); +} + /* IPVS core functions * (from ip_vs_core.c) */ @@ -1904,7 +1916,7 @@ static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) /* Stop tasks while cpulist is empty or if disabled with flag */ ipvs->est_stopped = !sysctl_run_estimation(ipvs) || (ipvs->est_cpulist_valid && - cpumask_empty(sysctl_est_cpulist(ipvs))); + sysctl_est_cpulist_empty(ipvs)); #endif } @@ -1920,7 +1932,7 @@ static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs) static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs) { unsigned int limit = IPVS_EST_CPU_KTHREADS * - cpumask_weight(sysctl_est_cpulist(ipvs)); + sysctl_est_cpulist_weight(ipvs); return max(1U, limit); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5c9f8e0e238f..c7c7f6a7a9f6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2394,11 +2394,14 @@ static int ipvs_proc_est_cpumask_get(const struct ctl_table *table, mutex_lock(&ipvs->est_mutex); - if (ipvs->est_cpulist_valid) - mask = *valp; - else - mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); - ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + /* HK_TYPE_KTHREAD cpumask needs RCU protection */ + scoped_guard(rcu) { + if (ipvs->est_cpulist_valid) + mask = *valp; + else + mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); + ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + } mutex_unlock(&ipvs->est_mutex); -- cgit v1.2.3 From 8f78b749f3da0f43990490b4c1193b5ede3eec0a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 30 Apr 2026 10:44:20 +0300 Subject: sched/isolation: Make HK_TYPE_KTHREAD an alias of HK_TYPE_DOMAIN Since commit 041ee6f3727a ("kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management"), kthreads default to use the HK_TYPE_DOMAIN cpumask. IOW, it is no longer affected by the setting of the nohz_full boot kernel parameter. That means HK_TYPE_KTHREAD should now be an alias of HK_TYPE_DOMAIN instead of HK_TYPE_KERNEL_NOISE to correctly reflect the current kthread behavior. Make the change as HK_TYPE_KTHREAD is still being used in some networking code. Fixes: 041ee6f3727a ("kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management") Signed-off-by: Waiman Long Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/linux/sched/isolation.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index dc3975ff1b2e..cf0fd03dd7a2 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -20,6 +20,11 @@ enum hk_type { HK_TYPE_KERNEL_NOISE, HK_TYPE_MAX, + /* + * HK_TYPE_KTHREAD is now an alias of HK_TYPE_DOMAIN + */ + HK_TYPE_KTHREAD = HK_TYPE_DOMAIN, + /* * The following housekeeping types are only set by the nohz_full * boot commandline option. So they can share the same value. @@ -29,7 +34,6 @@ enum hk_type { HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, - HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION -- cgit v1.2.3 From d82ba05263c69fa2437fe93e4e561cc40f4c03af Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 1 May 2026 07:39:41 +0000 Subject: af_unix: Set gc_in_progress to true in unix_gc(). Igor Ushakov reported that unix_gc() could run with gc_in_progress being false if the work is scheduled while running: Thread 1 Thread 2 Thread 3 -------- -------- -------- unix_schedule_gc() unix_schedule_gc() `- if (!gc_in_progress) `- if (!gc_in_progress) |- gc_in_progress = true | `- queue_work() | unix_gc() <----------------/ | | |- gc_in_progress = true ... `- queue_work() | | `- gc_in_progress = false | | unix_gc() <---------------------------------------------' | ... /* gc_in_progress == false */ | `- gc_in_progress = false unix_peek_fpl() relies on gc_in_progress not to confuse GC by MSG_PEEK. Let's set gc_in_progress to true in unix_gc(). Fixes: 8b90a9f819dc ("af_unix: Run GC on only one CPU.") Reported-by: Igor Ushakov Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260501073945.1884564-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/unix/garbage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a7967a345827..0783555e2526 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -607,6 +607,8 @@ static void unix_gc(struct work_struct *work) struct sk_buff_head hitlist; struct sk_buff *skb; + WRITE_ONCE(gc_in_progress, true); + spin_lock(&unix_gc_lock); if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) { @@ -649,10 +651,8 @@ void unix_schedule_gc(struct user_struct *user) READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) return; - if (!READ_ONCE(gc_in_progress)) { - WRITE_ONCE(gc_in_progress, true); + if (!READ_ONCE(gc_in_progress)) queue_work(system_dfl_wq, &unix_gc_work); - } if (user && READ_ONCE(unix_graph_cyclic_sccs)) flush_work(&unix_gc_work); -- cgit v1.2.3 From 76b93a8107574006b25495664304ea9237494d70 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 1 May 2026 02:58:41 -0700 Subject: netpoll: pass buffer size to egress_dev() to avoid MAC truncation egress_dev() formats np->dev_mac via snprintf() but receives buf as a bare char *, so it cannot derive the buffer size from the pointer. The size argument was hardcoded to MAC_ADDR_STR_LEN (3 * ETH_ALEN - 1 = 17), which is silly wrong in two ways: 1) misleading kernel log output on the MAC-selected target path (np->dev_name[0] == '\0'); for example "aa:bb:cc:dd:ee:ff doesn't exist, aborting" was logged as "aa:bb:cc:dd:ee:f doesn't exist, aborting". 2) the second argument of snprintf is the size of the buffer, not the size of what you want to write. Add a bufsz parameter to egress_dev() and pass sizeof(buf) from each caller, matching the standard snprintf() idiom and removing the hardcoded size from the helper. Every caller already declares "char buf[MAC_ADDR_STR_LEN + 1]" so the formatted MAC continues to fit. Tested by booting with netconsole=6665@/aa:bb:cc:dd:ee:ff,6666@10.0.0.1/00:11:22:33:44:55 on a kernel without a matching device. Pre-fix dmesg shows "aa:bb:cc:dd:ee:f doesn't exist, aborting"; post-fix shows the full "aa:bb:cc:dd:ee:ff doesn't exist, aborting". Fixes: f8a10bed32f5 ("netconsole: allow selection of egress interface via MAC address") Cc: stable@vger.kernel.org Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260501-netpoll_snprintf_fix-v1-1-84b0566e6597@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4381e0fc25bf..84faace50ac2 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -608,14 +608,16 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); /* * Returns a pointer to a string representation of the identifier used * to select the egress interface for the given netpoll instance. buf - * must be a buffer of length at least MAC_ADDR_STR_LEN + 1. + * is used to format np->dev_mac when np->dev_name is empty; bufsz must + * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address + * and its NUL terminator. */ -static char *egress_dev(struct netpoll *np, char *buf) +static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz) { if (np->dev_name[0]) return np->dev_name; - snprintf(buf, MAC_ADDR_STR_LEN, "%pM", np->dev_mac); + snprintf(buf, bufsz, "%pM", np->dev_mac); return buf; } @@ -645,7 +647,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) if (!IS_ENABLED(CONFIG_IPV6)) { np_err(np, "IPv6 is not supported %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EINVAL; } @@ -667,7 +669,7 @@ static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return err; } @@ -687,14 +689,14 @@ static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); return -EDESTADDRREQ; } @@ -736,7 +738,8 @@ int netpoll_setup(struct netpoll *np) ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac); if (!ndev) { - np_err(np, "%s doesn't exist, aborting\n", egress_dev(np, buf)); + np_err(np, "%s doesn't exist, aborting\n", + egress_dev(np, buf, sizeof(buf))); err = -ENODEV; goto unlock; } @@ -744,14 +747,14 @@ int netpoll_setup(struct netpoll *np) if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = -EBUSY; goto put; } if (!netif_running(ndev)) { np_info(np, "device %s not up yet, forcing it\n", - egress_dev(np, buf)); + egress_dev(np, buf, sizeof(buf))); err = dev_open(ndev, NULL); if (err) { -- cgit v1.2.3 From 915f9860fe1c9f7eb6c48c299b2db64fd57ef32f Mon Sep 17 00:00:00 2001 From: James Calligeros Date: Sun, 3 May 2026 22:23:23 +1000 Subject: ASoC: tas2764: Deal with bogus initial temperature register value The TAS2764 datasheet specifies that the chip initialises the temperature register such that the temperature reading is 2.6 *C, ostensibly to prevent tripping the chip's protection circuitry. The chip is not capable of representing 2.6 *C however, and the register is actually initialised to 0. The ADC does not start sampling until the chip is powered up, and the last sampled temperature persists in the register during software shutdown. Therefore, any reading returning 0 is almost certain to be from before the ADC has actually started sampling, meaning that it is invalid. Return -ENODATA early if the temperature has not yet been sampled by the chip, and indicate a fault condition using HWMON_T_FAULT. Fixes: 186dfc85f9a8 ("ASoC: tas2764: expose die temp to hwmon") Signed-off-by: James Calligeros Signed-off-by: Mark Brown --- sound/soc/codecs/tas2764.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 6aab6d2b7419..55211266927d 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -684,18 +684,33 @@ static int tas2764_read_die_temp(struct tas2764_priv *tas2764, long *result) * As per datasheet, subtract 93 from raw value to get degrees * Celsius. hwmon wants millidegrees. * - * NOTE: The chip will initialise the TAS2764_TEMP register to - * 2.6 *C to avoid triggering temperature protection. Since the - * ADC is powered down during software shutdown, this value will - * persist until the chip is fully powered up (e.g. the PCM it's - * attached to is opened). The ADC will power down again when - * the chip is put back into software shutdown, with the last - * value sampled persisting in the ADC's register. + * NOTE: The TAS2764 datasheet mentions initialising TAS2764_TEMP + * such that the temperature is 2.6 *C, however the register + * is actually initialised to 0. The ADC is also powered down during + * software shutdown. The last sampled temperature will persist + * in the register while the amp is in this power state. */ + if (reg == 0) + return -ENODATA; + *result = (reg - 93) * 1000; return 0; } +static int tas2764_hwmon_is_fault(struct tas2764_priv *tas2764, long *result) +{ + int ret; + long temp; + + ret = tas2764_read_die_temp(tas2764, &temp); + if (ret == -ENODATA) { + *result = true; + return 0; + } + + return ret; +} + static umode_t tas2764_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) @@ -705,6 +720,7 @@ static umode_t tas2764_hwmon_is_visible(const void *data, switch (attr) { case hwmon_temp_input: + case hwmon_temp_fault: return 0444; default: break; @@ -724,6 +740,9 @@ static int tas2764_hwmon_read(struct device *dev, case hwmon_temp_input: ret = tas2764_read_die_temp(tas2764, val); break; + case hwmon_temp_fault: + ret = tas2764_hwmon_is_fault(tas2764, val); + break; default: ret = -EOPNOTSUPP; break; @@ -733,7 +752,7 @@ static int tas2764_hwmon_read(struct device *dev, } static const struct hwmon_channel_info *const tas2764_hwmon_info[] = { - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_FAULT), NULL }; -- cgit v1.2.3 From d0771f4995d3285756bf496cf6e346df99481f83 Mon Sep 17 00:00:00 2001 From: James Calligeros Date: Sun, 3 May 2026 22:23:24 +1000 Subject: ASoC: tas2770: Deal with bogus initial temperature value TAS2770 initialises the temperature readout registers to 0. This value persists until the chip is fully powered up and the ADC starts sampling. The ADC then persists the last sampled temperature during software shutdown. The ADC should therefore never return 0 in normal operating conditions, so return -ENODATA and mark it as a fault condition using HWMON_T_FAULT. Fixes: ff73e2780169 ("ASoC: tas2770: expose die temp to hwmon") Signed-off-by: James Calligeros Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 50501bcbe916..dbda9f327535 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -633,10 +633,27 @@ static int tas2770_read_die_temp(struct tas2770_priv *tas2770, long *result) * value read back from its registers will be the last value sampled * before entering software shutdown. */ + if (reading == 0) + return -ENODATA; + *result = (reading - (93 * 16)) * 1000 / 16; return 0; } +static int tas2770_hwmon_is_fault(struct tas2770_priv *tas2770, long *result) +{ + int ret; + long temp; + + ret = tas2770_read_die_temp(tas2770, &temp); + if (ret == -ENODATA) { + *result = true; + return 0; + } + + return ret; +} + static umode_t tas2770_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) @@ -646,6 +663,7 @@ static umode_t tas2770_hwmon_is_visible(const void *data, switch (attr) { case hwmon_temp_input: + case hwmon_temp_fault: return 0444; default: break; @@ -665,6 +683,9 @@ static int tas2770_hwmon_read(struct device *dev, case hwmon_temp_input: ret = tas2770_read_die_temp(tas2770, val); break; + case hwmon_temp_fault: + ret = tas2770_hwmon_is_fault(tas2770, val); + break; default: ret = -EOPNOTSUPP; break; @@ -674,7 +695,7 @@ static int tas2770_hwmon_read(struct device *dev, } static const struct hwmon_channel_info *const tas2770_hwmon_info[] = { - HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_FAULT), NULL }; -- cgit v1.2.3 From 36bdc0e815b4e8a05b9028d8ef8a25e1ead35cc1 Mon Sep 17 00:00:00 2001 From: Markus Baier Date: Fri, 1 May 2026 18:39:41 +0200 Subject: net: usb: asix: ax88772: re-add usbnet_link_change() in phylink callbacks Commit e0bffe3e6894 ("net: asix: ax88772: migrate to phylink") replaced the asix_adjust_link() PHY callback with phylink's mac_link_up() and mac_link_down() handlers, but did not carry over the usbnet_link_change() notification that commit 805206e66fab ("net: asix: fix "can't send until first packet is send" issue") had added. As a result, the original symptom returns: when the link comes up, usbnet is never notified, so the RX URB submission stays dormant until some other event (e.g. a transmitted packet triggering the status endpoint interrupt) wakes it up. This is reproducible with the Apple A1277 USB Ethernet Adapter (05ac:1402, AX88772A based) on a Banana Pro using a static IPv4 configuration. After bringing the interface up, no incoming packets are received until the first outgoing frame triggers usbnet's RX path. Restore the link change notification, gated on a carrier transition so the call remains idempotent if the status endpoint also reports the change later. Fixes: e0bffe3e6894 ("net: asix: ax88772: migrate to phylink") Signed-off-by: Markus Baier Tested-by: Oleksij Rempel Link: https://patch.msgid.link/20260501163941.107668-1-Markus.Baier@soslab.tu-darmstadt.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index df0bcfedddbc..293ef80c4e30 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -756,6 +756,7 @@ static void ax88772_mac_link_down(struct phylink_config *config, struct usbnet *dev = netdev_priv(to_net_dev(config->dev)); asix_write_medium_mode(dev, 0, 0); + usbnet_link_change(dev, false, false); } static void ax88772_mac_link_up(struct phylink_config *config, @@ -786,6 +787,7 @@ static void ax88772_mac_link_up(struct phylink_config *config, m |= AX_MEDIUM_RFC; asix_write_medium_mode(dev, m, 0); + usbnet_link_change(dev, true, false); } static const struct phylink_mac_ops ax88772_phylink_mac_ops = { -- cgit v1.2.3 From 059b7dbd20a6f0c539a45ddff1573cb8946685b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2026 12:26:52 +0000 Subject: vsock/virtio: fix potential unbounded skb queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit virtio_transport_inc_rx_pkt() checks vvs->rx_bytes + len > vvs->buf_alloc. virtio_transport_recv_enqueue() skips coalescing for packets with VIRTIO_VSOCK_SEQ_EOM. If fed with packets with len == 0 and VIRTIO_VSOCK_SEQ_EOM, a very large number of packets can be queued because vvs->rx_bytes stays at 0. Fix this by estimating the skb metadata size: (Number of skbs in the queue) * SKB_TRUESIZE(0) Fixes: 077706165717 ("virtio/vsock: don't use skbuff state to account credit") Signed-off-by: Eric Dumazet Cc: Arseniy Krasnov Cc: Stefan Hajnoczi Cc: Stefano Garzarella Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Xuan Zhuo Cc: "Eugenio Pérez" Cc: virtualization@lists.linux.dev Link: https://patch.msgid.link/20260430122653.554058-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 416d533f493d..9b8014516f4f 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -447,7 +447,9 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->buf_used + len > vvs->buf_alloc) + u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + + if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len; -- cgit v1.2.3 From c4a99a921949cddc590b22bb14eeb23dffcc3ba6 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Fri, 1 May 2026 21:35:34 +0200 Subject: mptcp: use MPJoinSynAckHMacFailure for SynAck HMAC failure In subflow_finish_connect(), HMAC validation of the server's HMAC in SYN/ACK + MP_JOIN increments MPTCP_MIB_JOINACKMAC ("HMAC was wrong on ACK + MP_JOIN") on failure. The function processes the SYN/ACK, not the ACK; the matching MPTCP_MIB_JOINSYNACKMAC counter ("HMAC was wrong on SYN/ACK + MP_JOIN") exists but is not incremented anywhere in the tree. The mirror site on the server, subflow_syn_recv_sock(), already uses JOINACKMAC correctly for ACK HMAC failure. Use JOINSYNACKMAC at the SYN/ACK validation site so each counter reflects the packet whose HMAC actually failed. Suggested-by: Matthieu Baerts (NGI0) Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure") Cc: stable@vger.kernel.org Signed-off-by: Shardul Bankar Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-1-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e2cb9d23e4a0..bda6862264ca 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -581,7 +581,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup); if (!subflow_thmac_valid(subflow)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } -- cgit v1.2.3 From a6da02d4c00fdda2417e42ad2b762a9209e6cc49 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Fri, 1 May 2026 21:35:35 +0200 Subject: mptcp: use MPTCP_RST_EMPTCP for ACK HMAC validation failure When HMAC validation fails on a received ACK + MP_JOIN in subflow_syn_recv_sock(), the subflow is reset with reason MPTCP_RST_EPROHIBIT ("Administratively prohibited"). This is incorrect: HMAC validation failure is an MPTCP protocol-level error, not an administrative policy denial. The mirror site on the client, in subflow_finish_connect(), already uses MPTCP_RST_EMPTCP ("MPTCP-specific error") for the same kind of HMAC failure on the SYN/ACK + MP_JOIN. Use the same reason on the server side for symmetry and accuracy. Suggested-by: Matthieu Baerts (NGI0) Fixes: 443041deb5ef ("mptcp: fix NULL pointer in can_accept_new_subflow") Cc: stable@vger.kernel.org Signed-off-by: Shardul Bankar Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-2-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bda6862264ca..d562e149606f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -908,7 +908,7 @@ create_child: if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP); goto dispose_child; } -- cgit v1.2.3 From 6254a16d6f0c672e3809ca5d7c9a28a55d71f764 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 1 May 2026 21:35:36 +0200 Subject: mptcp: fix rx timestamp corruption on fastopen The skb cb offset containing the timestamp presence flag is cleared before loading such information. Cache such value before MPTCP CB initialization. Fixes: 36b122baf6a8 ("mptcp: add subflow_v(4,6)_send_synack()") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-3-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/fastopen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 82ec15bcfd7f..082c46c0f50e 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -12,6 +12,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf struct sock *sk, *ssk; struct sk_buff *skb; struct tcp_sock *tp; + bool has_rxtstamp; /* on early fallback the subflow context is deleted by * subflow_syn_recv_sock() @@ -40,12 +41,13 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf */ tp->copied_seq += skb->len; subflow->ssn_offset += skb->len; + has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; /* Only the sequence delta is relevant */ MPTCP_SKB_CB(skb)->map_seq = -skb->len; MPTCP_SKB_CB(skb)->end_seq = 0; MPTCP_SKB_CB(skb)->offset = 0; - MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; MPTCP_SKB_CB(skb)->cant_coalesce = 1; mptcp_data_lock(sk); -- cgit v1.2.3 From 70ece9d7021c54cf40c72b31b066e9088f5f75f5 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 1 May 2026 21:35:37 +0200 Subject: mptcp: sockopt: increase seq in mptcp_setsockopt_all_sf mptcp_setsockopt_all_sf() was missing a call to sockopt_seq_inc(). This is required not to cause missing synchronization for newer subflows created later on. This helper is called each time a socket option is set on subflows, and future ones will need to inherit this option after their creation. Fixes: 51c5fd09e1b4 ("mptcp: add TCP_MAXSEG sockopt support") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260501-net-mptcp-misc-fixes-7-1-rc3-v1-4-b70118df778e@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0efe40be2fde..1cf608e7357b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -812,6 +812,10 @@ static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, if (ret) break; } + + if (!ret) + sockopt_seq_inc(msk); + return ret; } -- cgit v1.2.3 From ac0841d7d202073415c808bda7848502163b87dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 May 2026 12:41:02 +0000 Subject: net: prevent possible UAF in rtnl_prop_list_size() I was mistaken by synchronize_rcu() [1] call in netdev_name_node_alt_destroy(), giving a false sense of RCU safety at delete times. We have to use list_del_rcu() to not confuse potential readers in rtnl_prop_list_size(). [1] This synchronize_rcu() call was later removed in commit 723de3ebef03 ("net: free altname using an RCU callback"). Fixes: 9f30831390ed ("net: add rcu safety to rtnl_prop_list_size()") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260502124102.499204-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 06c195906231..8bfa8313ef62 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -371,7 +371,7 @@ static void netdev_name_node_alt_free(struct rcu_head *head) static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) { netdev_name_node_del(name_node); - list_del(&name_node->list); + list_del_rcu(&name_node->list); call_rcu(&name_node->rcu, netdev_name_node_alt_free); } -- cgit v1.2.3 From 30cb24f97d44f6b81c14b85c5323de62eef1fb7f Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 2 May 2026 15:19:45 +0100 Subject: psp: strip variable-length PSP header in psp_dev_rcv() psp_dev_rcv() unconditionally removes a fixed PSP_ENCAP_HLEN, even when psph->hdrlen indicates that the PSP header carries optional fields. A frame whose PSP header advertises a non-zero VC or any extension would therefore be silently mis-decapsulated: option bytes would spill into the inner packet head and downstream parsing would fail on a corrupted skb. Compute the full PSP header length from psph->hdrlen, pull the optional bytes into the linear region, and strip the whole header when decapsulating. Optional fields (VC, ...) are still ignored, just discarded with the rest of the header instead of leaking. crypt_offset and the VIRT flag are intentionally not validated here - callers know their device's PSP implementation and can decide. Both in-tree callers gate on hardware-validated PSP, so this is a correctness fix rather than a reachable corruption path under current configurations. Fixes: 0eddb8023cee ("psp: provide decapsulation and receive helper for drivers") Reviewed-by: Willem de Bruijn Reviewed-by: Daniel Zahka Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260502141945.14484-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- net/psp/psp_main.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index 9508b6c38003..e45549f08eef 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -263,15 +263,16 @@ EXPORT_SYMBOL(psp_dev_encapsulate); /* Receive handler for PSP packets. * - * Presently it accepts only already-authenticated packets and does not - * support optional fields, such as virtualization cookies. The caller should - * ensure that skb->data is pointing to the mac header, and that skb->mac_len - * is set. This function does not currently adjust skb->csum (CHECKSUM_COMPLETE - * is not supported). + * Accepts only already-authenticated packets. The full PSP header is + * stripped according to psph->hdrlen; any optional fields it advertises + * (virtualization cookies, etc.) are ignored and discarded along with the + * rest of the header. The caller should ensure that skb->data is pointing + * to the mac header, and that skb->mac_len is set. This function does not + * currently adjust skb->csum (CHECKSUM_COMPLETE is not supported). */ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) { - int l2_hlen = 0, l3_hlen, encap; + int l2_hlen = 0, l3_hlen, encap, psp_hlen; struct psp_skb_ext *pse; struct psphdr *psph; struct ethhdr *eth; @@ -312,18 +313,36 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) if (unlikely(uh->dest != htons(PSP_DEFAULT_UDP_PORT))) return -EINVAL; - pse = skb_ext_add(skb, SKB_EXT_PSP); - if (!pse) + psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + + sizeof(struct udphdr)); + + /* Strip the full PSP header per psph->hdrlen; VC/options are pulled + * into the linear region only so they can be discarded with the + * rest of the header. + */ + psp_hlen = (psph->hdrlen + 1) * 8; + + if (unlikely(psp_hlen < sizeof(struct psphdr))) + return -EINVAL; + + if (psp_hlen > sizeof(struct psphdr) && + !pskb_may_pull(skb, l2_hlen + l3_hlen + + sizeof(struct udphdr) + psp_hlen)) return -EINVAL; psph = (struct psphdr *)(skb->data + l2_hlen + l3_hlen + sizeof(struct udphdr)); + + pse = skb_ext_add(skb, SKB_EXT_PSP); + if (!pse) + return -EINVAL; + pse->spi = psph->spi; pse->dev_id = dev_id; pse->generation = generation; pse->version = FIELD_GET(PSPHDR_VERFL_VERSION, psph->verfl); - encap = PSP_ENCAP_HLEN; + encap = sizeof(struct udphdr) + psp_hlen; encap += strip_icv ? PSP_TRL_SIZE : 0; if (proto == htons(ETH_P_IP)) { @@ -340,8 +359,9 @@ int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv) ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) - encap); } - memmove(skb->data + PSP_ENCAP_HLEN, skb->data, l2_hlen + l3_hlen); - skb_pull(skb, PSP_ENCAP_HLEN); + memmove(skb->data + sizeof(struct udphdr) + psp_hlen, + skb->data, l2_hlen + l3_hlen); + skb_pull(skb, sizeof(struct udphdr) + psp_hlen); if (strip_icv) pskb_trim(skb, skb->len - PSP_TRL_SIZE); -- cgit v1.2.3 From a6039776c7994dd0b9a4acce23a3f897d1688cbf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 2 May 2026 18:07:47 +0000 Subject: ipmr: Add __rcu to netns_ipv4.mrt. kernel test robot reported this Sparse warning: $ make C=1 net/ipv4/ipmr.o net/ipv4/ipmr.c:312:24: error: incompatible types in comparison expression (different address spaces): net/ipv4/ipmr.c:312:24: struct mr_table [noderef] __rcu * net/ipv4/ipmr.c:312:24: struct mr_table * Let's add __rcu annotation to netns_ipv4.mrt. Fixes: b3b6babf4751 ("ipmr: Free mr_table after RCU grace period.") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202605030032.glNApko7-lkp@intel.com/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260502180755.359554-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80ccd4dda8e0..6e27c56514df 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -275,7 +275,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MROUTE #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES - struct mr_table *mrt; + struct mr_table __rcu *mrt; #else struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; -- cgit v1.2.3 From 07d99587396024932e02474c3a5bede71d108454 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 2 May 2026 11:55:02 +0100 Subject: net: dsa: mt7530: fix .get_stats64 sleeping in atomic context The .get_stats64 callback runs in atomic context, but on MDIO-connected switches every register read acquires the MDIO bus mutex, which can sleep: [ 12.645973] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:609 [ 12.654442] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 759, name: grep [ 12.663377] preempt_count: 0, expected: 0 [ 12.667410] RCU nest depth: 1, expected: 0 [ 12.671511] INFO: lockdep is turned off. [ 12.675441] CPU: 0 UID: 0 PID: 759 Comm: grep Tainted: G S W 7.0.0+ #0 PREEMPT [ 12.675453] Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN [ 12.675456] Hardware name: Bananapi BPI-R64 (DT) [ 12.675459] Call trace: [ 12.675462] show_stack+0x14/0x1c (C) [ 12.675477] dump_stack_lvl+0x68/0x8c [ 12.675487] dump_stack+0x14/0x1c [ 12.675495] __might_resched+0x14c/0x220 [ 12.675504] __might_sleep+0x44/0x80 [ 12.675511] __mutex_lock+0x50/0xb10 [ 12.675523] mutex_lock_nested+0x20/0x30 [ 12.675532] mt7530_get_stats64+0x40/0x2ac [ 12.675542] dsa_user_get_stats64+0x2c/0x40 [ 12.675553] dev_get_stats+0x44/0x1e0 [ 12.675564] dev_seq_printf_stats+0x24/0xe0 [ 12.675575] dev_seq_show+0x14/0x3c [ 12.675583] seq_read_iter+0x37c/0x480 [ 12.675595] seq_read+0xd0/0xec [ 12.675605] proc_reg_read+0x94/0xe4 [ 12.675615] vfs_read+0x98/0x29c [ 12.675625] ksys_read+0x54/0xdc [ 12.675633] __arm64_sys_read+0x18/0x20 [ 12.675642] invoke_syscall.constprop.0+0x54/0xec [ 12.675653] do_el0_svc+0x3c/0xb4 [ 12.675662] el0_svc+0x38/0x200 [ 12.675670] el0t_64_sync_handler+0x98/0xdc [ 12.675679] el0t_64_sync+0x158/0x15c For MDIO-connected switches, poll MIB counters asynchronously using a delayed workqueue every second and let .get_stats64 return the cached values under a spinlock. A mod_delayed_work() call on each read triggers an immediate refresh so counters stay responsive when queried more frequently. MMIO-connected switches (MT7988, EN7581, AN7583) are not affected because their regmap does not sleep, so they continue to read MIB counters directly in .get_stats64. Fixes: 88c810f35ed5 ("net: dsa: mt7530: implement .get_stats64") Signed-off-by: Daniel Golle Acked-by: Chester A. Unal Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/6940b913da2c29156f0feff74b678d3c526ee84c.1777719253.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++-- drivers/net/dsa/mt7530.h | 8 ++++++ 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b9423389c2ef..44d670904ad8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -25,6 +25,9 @@ #include "mt7530.h" +#define MT7530_STATS_POLL_INTERVAL (1 * HZ) +#define MT7530_STATS_RATE_LIMIT (HZ / 10) + static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs) { return container_of(pcs, struct mt753x_pcs, pcs); @@ -906,10 +909,9 @@ static void mt7530_get_rmon_stats(struct dsa_switch *ds, int port, *ranges = mt7530_rmon_ranges; } -static void mt7530_get_stats64(struct dsa_switch *ds, int port, - struct rtnl_link_stats64 *storage) +static void mt7530_read_port_stats64(struct mt7530_priv *priv, int port, + struct rtnl_link_stats64 *storage) { - struct mt7530_priv *priv = ds->priv; uint64_t data; /* MIB counter doesn't provide a FramesTransmittedOK but instead @@ -951,6 +953,54 @@ static void mt7530_get_stats64(struct dsa_switch *ds, int port, &storage->rx_crc_errors); } +static void mt7530_stats_refresh(struct mt7530_priv *priv) +{ + struct rtnl_link_stats64 stats = {}; + struct dsa_port *dp; + int port; + + dsa_switch_for_each_user_port(dp, priv->ds) { + port = dp->index; + + mt7530_read_port_stats64(priv, port, &stats); + + spin_lock_bh(&priv->stats_lock); + priv->ports[port].stats = stats; + priv->stats_last = jiffies; + spin_unlock_bh(&priv->stats_lock); + } +} + +static void mt7530_stats_poll(struct work_struct *work) +{ + struct mt7530_priv *priv = container_of(work, struct mt7530_priv, + stats_work.work); + + mt7530_stats_refresh(priv); + schedule_delayed_work(&priv->stats_work, + MT7530_STATS_POLL_INTERVAL); +} + +static void mt7530_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *storage) +{ + struct mt7530_priv *priv = ds->priv; + bool refresh; + + if (priv->bus) { + spin_lock_bh(&priv->stats_lock); + *storage = priv->ports[port].stats; + refresh = time_after(jiffies, priv->stats_last + + MT7530_STATS_RATE_LIMIT); + spin_unlock_bh(&priv->stats_lock); + if (refresh) + mod_delayed_work(system_percpu_wq, + &priv->stats_work, 0); + } else { + mt7530_read_port_stats64(priv, port, storage); + } +} + static void mt7530_get_eth_ctrl_stats(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats) { @@ -3137,9 +3187,24 @@ mt753x_setup(struct dsa_switch *ds) if (ret && priv->irq_domain) mt7530_free_mdio_irq(priv); + if (!ret && priv->bus) { + mt7530_stats_refresh(priv); + schedule_delayed_work(&priv->stats_work, + MT7530_STATS_POLL_INTERVAL); + } + return ret; } +static void +mt753x_teardown(struct dsa_switch *ds) +{ + struct mt7530_priv *priv = ds->priv; + + if (priv->bus) + cancel_delayed_work_sync(&priv->stats_work); +} + static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { @@ -3257,6 +3322,7 @@ static int mt7988_setup(struct dsa_switch *ds) static const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .teardown = mt753x_teardown, .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, @@ -3395,6 +3461,9 @@ mt7530_probe_common(struct mt7530_priv *priv) priv->ds->ops = &mt7530_switch_ops; priv->ds->phylink_mac_ops = &mt753x_phylink_mac_ops; mutex_init(&priv->reg_mutex); + spin_lock_init(&priv->stats_lock); + INIT_DELAYED_WORK(&priv->stats_work, mt7530_stats_poll); + dev_set_drvdata(dev, priv); return 0; diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 3e0090bed298..dd33b0df3419 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -796,6 +796,7 @@ struct mt7530_fdb { * @pvid: The VLAN specified is to be considered a PVID at ingress. Any * untagged frames will be assigned to the related VLAN. * @sgmii_pcs: Pointer to PCS instance for SerDes ports + * @stats: Cached port statistics for MDIO-connected switches */ struct mt7530_port { bool enable; @@ -803,6 +804,7 @@ struct mt7530_port { u32 pm; u16 pvid; struct phylink_pcs *sgmii_pcs; + struct rtnl_link_stats64 stats; }; /* Port 5 mode definitions of the MT7530 switch */ @@ -875,6 +877,9 @@ struct mt753x_info { * @create_sgmii: Pointer to function creating SGMII PCS instance(s) * @active_cpu_ports: Holding the active CPU ports * @mdiodev: The pointer to the MDIO device structure + * @stats_lock: Protects cached per-port stats from concurrent access + * @stats_work: Delayed work for polling MIB counters on MDIO switches + * @stats_last: Jiffies timestamp of last MIB counter poll */ struct mt7530_priv { struct device *dev; @@ -900,6 +905,9 @@ struct mt7530_priv { int (*create_sgmii)(struct mt7530_priv *priv); u8 active_cpu_ports; struct mdio_device *mdiodev; + spinlock_t stats_lock; /* protects cached stats counters */ + struct delayed_work stats_work; + unsigned long stats_last; }; struct mt7530_hw_vlan_entry { -- cgit v1.2.3 From f4c50a4034e62ab75f1d5cdd191dd5f9c77fdff4 Mon Sep 17 00:00:00 2001 From: Kuan-Ting Chen Date: Mon, 4 May 2026 23:27:12 +0800 Subject: xfrm: esp: avoid in-place decrypt on shared skb frags MSG_SPLICE_PAGES can attach pages from a pipe directly to an skb. TCP marks such skbs with SKBFL_SHARED_FRAG after skb_splice_from_iter(), so later paths that may modify packet data can first make a private copy. The IPv4/IPv6 datagram append paths did not set this flag when splicing pages into UDP skbs. That leaves an ESP-in-UDP packet made from shared pipe pages looking like an ordinary uncloned nonlinear skb. ESP input then takes the no-COW fast path for uncloned skbs without a frag_list and decrypts in place over data that is not owned privately by the skb. Mark IPv4/IPv6 datagram splice frags with SKBFL_SHARED_FRAG, matching TCP. Also make ESP input fall back to skb_cow_data() when the flag is present, so ESP does not decrypt externally backed frags in place. Private nonlinear skb frags still use the existing fast path. This intentionally does not change ESP output. In esp_output_head(), the path that appends the ESP trailer to existing skb tailroom without calling skb_cow_data() is not reachable for nonlinear skbs: skb_tailroom() returns zero when skb->data_len is nonzero, while ESP tailen is positive. Thus ESP output will either use the separate destination-frag path or fall back to skb_cow_data(). Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Fixes: 7da0dde68486 ("ip, udp: Support MSG_SPLICE_PAGES") Fixes: 6d8192bd69bb ("ip6, udp6: Support MSG_SPLICE_PAGES") Reported-by: Hyunwoo Kim Reported-by: Kuan-Ting Chen Tested-by: Hyunwoo Kim Cc: stable@vger.kernel.org Signed-off-by: Kuan-Ting Chen Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 3 ++- net/ipv4/ip_output.c | 2 ++ net/ipv6/esp6.c | 3 ++- net/ipv6/ip6_output.c | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6dfc0bcdef65..6a5febbdbee4 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -873,7 +873,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e4790cc7b5c2..5bcd73cbdb41 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1233,6 +1233,8 @@ alloc_new_skb: if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9f75313734f8..9c06c5a1419d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -915,7 +915,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) nfrags = 1; goto skip_cow; - } else if (!skb_has_frag_list(skb)) { + } else if (!skb_has_frag_list(skb) && + !skb_has_shared_frag(skb)) { nfrags = skb_shinfo(skb)->nr_frags; nfrags++; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e92909ab5be..1f2a33fbed6e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1794,6 +1794,8 @@ alloc_new_skb: if (err < 0) goto error; copy = err; + if (!(flags & MSG_NO_SHARED_FRAGS)) + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; -- cgit v1.2.3 From aab3d205a086233c612fee86009265451793e0c2 Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Mon, 27 Apr 2026 19:57:15 +0300 Subject: drm/i915/display: enable ccs modifiers on dg2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since Xe driver aux ccs enablement dg2 ccs modifiers have been disabled on i915 driver. Here allow dg2 to use ccs again for framebuffers. Fixes: 6a99e91a6ca8 ("drm/i915/display: Detect AuxCCS support via display parent interface") Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Ville Syrjälä Signed-off-by: Mika Kahola Link: https://patch.msgid.link/20260427165715.864721-1-juhapekka.heikkila@gmail.com (cherry picked from commit aee13ba1448213975f36942ba5d1ce693eb5c002) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_driver.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 385a634c3ed0..d9be7a5a239c 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -750,9 +750,8 @@ static bool has_auxccs(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); - return IS_GRAPHICS_VER(i915, 9, 12) || - IS_ALDERLAKE_P(i915) || - IS_METEORLAKE(i915); + return IS_GRAPHICS_VER(i915, 9, 12) && + !HAS_FLAT_CCS(i915); } static bool has_fenced_regions(struct drm_device *drm) -- cgit v1.2.3 From 2c340aab5485ebe9e33c01437dd4815ef33c8df5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 May 2026 09:16:38 +0200 Subject: x86/efi: Restore IRQ state in EFI page fault handler The kernel's softirq API does not permit re-enabling softirqs while IRQs are disabled. The reason for this is that local_bh_enable() will not only re-enable delivery of softirqs over the back of IRQs, it will also handle any pending softirqs immediately, regardless of whether IRQs are enabled at that point. For this reason, commit d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs") disables softirqs only when IRQs are enabled, as it is not permitted otherwise, but also unnecessary, given that asynchronous softirq delivery never happens to begin with while IRQs are disabled. However, this does mean that entering a kernel mode FPU section with IRQs enabled and leaving it with IRQs disabled leads to problems, as identified by Sashiko [0]: the EFI page fault handler is called from page_fault_oops() with IRQs disabled, and thus ends the kernel mode FPU section with IRQs disabled as well, regardless of whether IRQs were enabled when it was started. This may result in schedule() being called with a non-zero preempt_count, causing a BUG(). So take care to re-enable IRQs when handling any EFI page faults if they were taken with IRQs enabled. [0] https://sashiko.dev/#/patchset/20260430074107.27051-1-ivan.hu%40canonical.com Cc: Eric Biggers Cc: Ivan Hu Cc: x86@kernel.org Cc: Fixes: d02198550423 ("x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs") Reviewed-by: Eric Biggers Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/efi.h | 3 ++- arch/x86/mm/fault.c | 2 +- arch/x86/platform/efi/quirks.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index dc8fe1361c18..be58b7f5c806 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -137,7 +137,8 @@ extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); -extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); +extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, + const struct pt_regs *regs); extern void efi_unmap_boot_services(void); void arch_efi_call_virt_setup(void); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f0e77e084482..63de8e8684f2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -686,7 +686,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, * avoid hanging the system. */ if (IS_ENABLED(CONFIG_EFI)) - efi_crash_gracefully_on_page_fault(address); + efi_crash_gracefully_on_page_fault(address, regs); /* Only not-present faults should be handled by KFENCE. */ if (!(error_code & X86_PF_PROT) && diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index c8f5e094ed9d..90a065fcb1fa 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -761,7 +761,8 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, * @return: Returns, if the page fault is not handled. This function * will never return if the page fault is handled successfully. */ -void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) +void efi_crash_gracefully_on_page_fault(unsigned long phys_addr, + const struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_X86_64)) return; @@ -810,6 +811,14 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr) return; } + /* + * The API does not permit entering a kernel mode FPU section with + * interrupts enabled and leaving it with interrupts disabled. So + * re-enable interrupts now if they were enabled when the page fault + * occurred. + */ + local_irq_restore(regs->flags); + /* * Before calling EFI Runtime Service, the kernel has switched the * calling process to efi_mm. Hence, switch back to task_mm. -- cgit v1.2.3 From 212ec34e4e726e8cd4af7bea4740db24de8a9dab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 May 2026 08:34:32 -0600 Subject: block: only read from sqe on initial invocation of blkdev_uring_cmd() This passthrough helper currently only supports discards. Part of that command is the start and length, which is read from the SQE. It does so on every invocation, where it really should just make it stable on the first invocation. This avoids needing to copy the SQE upfront, as we only really need those two 8b values stored in our per-req payload. Cc: stable@vger.kernel.org # 6.17+ Signed-off-by: Jens Axboe --- block/ioctl.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index fc3be0549aa7..ab2c9ed79946 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -857,6 +857,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) #endif struct blk_iou_cmd { + u64 start; + u64 len; int res; bool nowait; }; @@ -946,23 +948,27 @@ int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); - const struct io_uring_sqe *sqe = cmd->sqe; u32 cmd_op = cmd->cmd_op; - uint64_t start, len; - if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || - sqe->rw_flags || sqe->file_index)) - return -EINVAL; + /* Read what we need from the SQE on the first issue */ + if (!(issue_flags & IORING_URING_CMD_REISSUE)) { + const struct io_uring_sqe *sqe = cmd->sqe; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->start = READ_ONCE(sqe->addr); + bic->len = READ_ONCE(sqe->addr3); + } bic->res = 0; bic->nowait = issue_flags & IO_URING_F_NONBLOCK; - start = READ_ONCE(sqe->addr); - len = READ_ONCE(sqe->addr3); - switch (cmd_op) { case BLOCK_URING_CMD_DISCARD: - return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + return blkdev_cmd_discard(cmd, bdev, bic->start, bic->len, + bic->nowait); } return -EINVAL; } -- cgit v1.2.3 From 09ae540e1d5c02210795911bf5459282d7af04e9 Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Thu, 23 Apr 2026 02:33:49 +0500 Subject: rhashtable: drop ht->mutex in rhashtable_free_and_destroy() rhashtable_free_and_destroy() is a single-shot teardown routine: cancel_work_sync() has already quiesced the deferred rehash worker, and the function's documented contract requires the caller to guarantee no other concurrent access to the rhashtable. Under those conditions ht->mutex is not protecting anything -- taking it is a leftover from the original teardown path. That leftover is actively harmful: it closes a circular lock-class dependency with fs_reclaim. The deferred rehash worker takes ht->mutex and then allocates GFP_KERNEL memory in bucket_table_alloc(), establishing &ht->mutex -> fs_reclaim After commit b32c4a213698 ("xattr: add rhashtable-based simple_xattr infrastructure") introduced simple_xattr_ht_free(), which calls rhashtable_free_and_destroy(), the simple_xattrs teardown became reachable from evict() under the dcache shrinker. The subsequent per-subsystem adaptations made the reverse edge concrete in three independent code paths: * commit 52b364fed6e1 ("shmem: adapt to rhashtable-based simple_xattrs with lazy allocation") * commit 5bd97f5c5f24 ("kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation") * commit 50704c391fbf ("pidfs: adapt to rhashtable-based simple_xattrs") Any of the three closes the cycle fs_reclaim -> &ht->mutex which lockdep reports as follows. This particular splat was observed organically on a workstation kernel built from vfs-7.1-rc1.xattr at ~35h uptime under normal mixed workload, with CONFIG_PROVE_LOCKING=y. The path happens to go through kernfs: WARNING: possible circular locking dependency detected 7.0.0-faeab166167f-with-fixes-v1+ #191 Tainted: G U kswapd0/243 is trying to acquire lock: ffff8882e475c0f8 (&ht->mutex){+.+.}-{4:4}, at: rhashtable_free_and_destroy+0x36/0x740 but task is already holding lock: ffffffffa8ad1d00 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x995/0x1600 the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x506/0xbf0 lock_acquire.part.0+0xc7/0x280 fs_reclaim_acquire+0xd9/0x130 __kvmalloc_node_noprof+0xcd/0xb40 bucket_table_alloc.isra.0+0x5a/0x440 rhashtable_rehash_alloc+0x4e/0xd0 rht_deferred_worker+0x14b/0x440 process_one_work+0x8fd/0x16a0 worker_thread+0x601/0xff0 kthread+0x36b/0x470 ret_from_fork+0x5bf/0x910 ret_from_fork_asm+0x1a/0x30 -> #0 (&ht->mutex){+.+.}-{4:4}: check_prev_add+0xdb/0xce0 validate_chain+0x554/0x780 __lock_acquire+0x506/0xbf0 lock_acquire.part.0+0xc7/0x280 __mutex_lock+0x1b2/0x2550 rhashtable_free_and_destroy+0x36/0x740 kernfs_put.part.0+0x119/0x570 evict+0x3b6/0x9c0 __dentry_kill+0x181/0x540 shrink_dentry_list+0x135/0x440 prune_dcache_sb+0xdb/0x150 super_cache_scan+0x2ff/0x520 do_shrink_slab+0x35a/0xee0 shrink_slab_memcg+0x457/0x950 shrink_slab+0x43b/0x550 shrink_one+0x31a/0x6f0 shrink_many+0x31e/0xc80 shrink_node+0xeb3/0x14a0 balance_pgdat+0x8ed/0x1600 kswapd+0x2f3/0x530 kthread+0x36b/0x470 ret_from_fork+0x5bf/0x910 ret_from_fork_asm+0x1a/0x30 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&ht->mutex); lock(fs_reclaim); lock(&ht->mutex); Note that lockdep tracks lock classes, not instances: the two &ht->mutex sites are on different rhashtable objects (the deferred worker was triggered by some unrelated rhashtable growth), but because rhashtable_init() uses a single static lockdep key for all rhashtables, this is a real class-level cycle. Once reported, lockdep disables itself for the remainder of the boot, masking any subsequent locking bugs. Drop the mutex. After cancel_work_sync() the rehash worker is quiesced and, per this function's contract, no other concurrent access is possible; the tables are therefore owned exclusively by this function and can be walked without any lock held. Switch the table walks from rht_dereference() (which requires ht->mutex to be held under CONFIG_PROVE_RCU) to rcu_dereference_raw(), which has no lockdep annotation. rht_ptr_exclusive() already uses rcu_dereference_protected(p, 1) and needs no change. This is the only place in lib/rhashtable.c where &ht->mutex is acquired from a path reachable under fs_reclaim; the deferred worker is the only other site and it is the forward edge. Removing the acquisition here therefore eliminates the class cycle for all three subsystems that use simple_xattrs, not just the one in the splat above. No locking-semantics change is introduced for correct users; incorrect users would already be racing with rehash worker completion regardless of the mutex. Synthetic reproduction of the splat within a few-minute window was unsuccessful across several attempts (tmpfs and kernfs zombies via cgroupfs with open-fd-through-rmdir, with and without swap, up to ~60k reclaim-path executions of simple_xattr_ht_free() in a single run), consistent with the rare coincidence-of-edges profile of the bug: the forward edge is already registered in /proc/lockdep on any idle system via rht_deferred_worker, but the reverse edge requires evict() to complete kernfs_put()'s final release inside the fs_reclaim critical section, which in my attempts was ordered against rather than interleaved with the worker. Fixes: b32c4a213698 ("xattr: add rhashtable-based simple_xattr infrastructure") Signed-off-by: Mikhail Gavrilov Signed-off-by: Herbert Xu --- lib/rhashtable.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 7a67ef5b67b6..426d4e381f13 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1166,6 +1166,11 @@ static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, * This function will eventually sleep to wait for an async resize * to complete. The caller is responsible that no further write operations * occurs in parallel. + * + * After cancel_work_sync() has returned, the deferred rehash worker is + * quiesced and, per the contract above, no other concurrent access to the + * rhashtable is possible. The tables are therefore owned exclusively by + * this function and can be walked without ht->mutex held. */ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), @@ -1177,8 +1182,15 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, irq_work_sync(&ht->run_irq_work); cancel_work_sync(&ht->run_work); - mutex_lock(&ht->mutex); - tbl = rht_dereference(ht->tbl, ht); + /* + * Do NOT take ht->mutex here. The rehash worker establishes + * ht->mutex -> fs_reclaim via GFP_KERNEL bucket allocation under + * the mutex; callers on the reclaim path (e.g. simple_xattr_ht_free() + * from evict() under the dcache shrinker for shmem/kernfs/pidfs + * inodes) would otherwise close a circular dependency + * fs_reclaim -> ht->mutex. + */ + tbl = rcu_dereference_raw(ht->tbl); restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { @@ -1187,22 +1199,21 @@ restart: cond_resched(); for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? - rht_dereference(pos->next, ht) : NULL; + rcu_dereference_raw(pos->next) : NULL; !rht_is_a_nulls(pos); pos = next, next = !rht_is_a_nulls(pos) ? - rht_dereference(pos->next, ht) : NULL) + rcu_dereference_raw(pos->next) : NULL) rhashtable_free_one(ht, pos, free_fn, arg); } } - next_tbl = rht_dereference(tbl->future_tbl, ht); + next_tbl = rcu_dereference_raw(tbl->future_tbl); bucket_table_free(tbl); if (next_tbl) { tbl = next_tbl; goto restart; } - mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); -- cgit v1.2.3 From dad0d91cc2c3e6b6fb285ccfe7ddf71525797198 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 28 Apr 2026 18:14:18 +0200 Subject: mm/slab: Add kvfree_atomic() helper kvmalloc() now supports non-sleeping GFP flags, including the vmalloc fallback path. This means it may return vmalloc memory even for GFP_ATOMIC and GFP_NOWAIT allocations. Freeing such memory with kvfree() may then end up calling vfree(), which is not safe for non-sleeping contexts. Introduce kvfree_atomic() helper for such cases. It mirrors kvfree(), but uses vfree_atomic() for vmalloced memory. Signed-off-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka (SUSE) Acked-by: Harry Yoo (Oracle) Signed-off-by: Herbert Xu --- include/linux/slab.h | 3 +++ mm/slub.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/slab.h b/include/linux/slab.h index 15a60b501b95..2b5ab488e96b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1234,6 +1234,9 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) +extern void kvfree_atomic(const void *addr); +DEFINE_FREE(kvfree_atomic, void *, if (!IS_ERR_OR_NULL(_T)) kvfree_atomic(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); diff --git a/mm/slub.c b/mm/slub.c index 0baa906f39ab..8f9004536729 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6882,6 +6882,22 @@ void kvfree(const void *addr) } EXPORT_SYMBOL(kvfree); +/** + * kvfree_atomic() - Free memory. + * @addr: Pointer to allocated memory. + * + * Same as kvfree(), but uses vfree_atomic() for vmalloc + * backed memory. Must not be called from NMI context. + */ +void kvfree_atomic(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree_atomic(addr); + else + kfree(addr); +} +EXPORT_SYMBOL(kvfree_atomic); + /** * kvfree_sensitive - Free a data object containing sensitive information. * @addr: address of the data object to be freed. -- cgit v1.2.3 From d1fa83ecac31093a550534a79a33bc7f4ba8fc10 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 28 Apr 2026 18:14:19 +0200 Subject: rhashtable: Add bucket_table_free_atomic() helper rhashtable_insert_rehash() allocates a new bucket table with GFP_ATOMIC, as it is called from an RCU read-side critical section. If rhashtable_rehash_attach() then fails, the new table is freed via kvfree(). This is unsafe, since kvfree() may fall back to vfree() for vmalloc-backed allocations, which can sleep and trigger: BUG: sleeping function called from invalid context Add bucket_table_free_atomic(), which uses kvfree_atomic() so the table can be freed safely from non-sleeping context. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Herbert Xu --- lib/rhashtable.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 426d4e381f13..04b3a808fca9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -114,6 +114,14 @@ static void bucket_table_free(const struct bucket_table *tbl) kvfree(tbl); } +static void bucket_table_free_atomic(const struct bucket_table *tbl) +{ + if (tbl->nest) + nested_bucket_table_free(tbl); + + kvfree_atomic(tbl); +} + static void bucket_table_free_rcu(struct rcu_head *head) { bucket_table_free(container_of(head, struct bucket_table, rcu)); @@ -496,7 +504,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { - bucket_table_free(new_tbl); + bucket_table_free_atomic(new_tbl); if (err == -EEXIST) err = 0; } else -- cgit v1.2.3 From 5f8719945244dd65b5fa06195f4600db62581610 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 5 May 2026 10:06:53 +0200 Subject: x86/xen: Fix a potential problem in xen_e820_resolve_conflicts() When fixing a conflict in xen_e820_resolve_conflicts(), the loop over the E820 map entries needs to be restarted, as the E820 map will have been modified by the fix. Otherwise entries might be skipped by accident. Fixes: be35d91c8880 ("xen: tolerate ACPI NVS memory overlapping with Xen allocated memory") Signed-off-by: Juergen Gross Signed-off-by: Ingo Molnar Cc: xen-devel@lists.xenproject.org Link: https://patch.msgid.link/20260505080653.197775-1-jgross@suse.com --- arch/x86/xen/setup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ac8021c3a997..bb95a05259b8 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -695,17 +695,22 @@ static void __init xen_e820_resolve_conflicts(phys_addr_t start, return; end = start + size; - entry = xen_e820_table.entries; + mapcnt = 0; - for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) { + while (mapcnt < xen_e820_table.nr_entries) { + entry = xen_e820_table.entries + mapcnt; if (entry->addr >= end) return; if (entry->addr + entry->size > start && - entry->type == E820_TYPE_NVS) + entry->type == E820_TYPE_NVS) { xen_e820_swap_entry_with_ram(entry); + /* E820 map has been changed, restart loop! */ + mapcnt = 0; + continue; + } - entry++; + mapcnt++; } } -- cgit v1.2.3 From 52ac35b8a151446481496404af3a8e5e889b3c5a Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Tue, 28 Apr 2026 17:44:58 +0530 Subject: pinctrl: qcom: Fix wakeirq map by removing disconnected irqs for sm8150 PDC interrupts 122-125 were meant for ibi_i3c wakeup but sm8150 do not support i3c. GPIOs 39,51,88 and 144 are also connected to different PDC pin and already reflected in the wake irq map. Remove the unsupported wakeup interrupts from the map. Fixes: 90337380c809 ("pinctrl: qcom: sm8150: Specify PDC map") Reviewed-by: Konrad Dybcio Signed-off-by: Maulik Shah Signed-off-by: Navya Malempati Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8150.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8150.c b/drivers/pinctrl/qcom/pinctrl-sm8150.c index 0767261f5149..12713671243c 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8150.c @@ -1493,18 +1493,18 @@ static const struct msm_gpio_wakeirq_map sm8150_pdc_map[] = { { 3, 31 }, { 5, 32 }, { 8, 33 }, { 9, 34 }, { 10, 100 }, { 12, 104 }, { 24, 37 }, { 26, 38 }, { 27, 41 }, { 28, 42 }, { 30, 39 }, { 36, 43 }, { 37, 44 }, { 38, 30 }, { 39, 118 }, - { 39, 125 }, { 41, 47 }, { 42, 48 }, { 46, 50 }, { 47, 49 }, - { 48, 51 }, { 49, 53 }, { 50, 52 }, { 51, 116 }, { 51, 123 }, + { 41, 47 }, { 42, 48 }, { 46, 50 }, { 47, 49 }, + { 48, 51 }, { 49, 53 }, { 50, 52 }, { 51, 116 }, { 53, 54 }, { 54, 55 }, { 55, 56 }, { 56, 57 }, { 58, 58 }, { 60, 60 }, { 61, 61 }, { 68, 62 }, { 70, 63 }, { 76, 71 }, { 77, 66 }, { 81, 64 }, { 83, 65 }, { 86, 67 }, { 87, 84 }, - { 88, 117 }, { 88, 124 }, { 90, 69 }, { 91, 70 }, { 93, 75 }, + { 88, 117 }, { 90, 69 }, { 91, 70 }, { 93, 75 }, { 95, 72 }, { 96, 73 }, { 97, 74 }, { 101, 40 }, { 103, 77 }, { 104, 78 }, { 108, 79 }, { 112, 80 }, { 113, 81 }, { 114, 82 }, { 117, 85 }, { 118, 101 }, { 119, 87 }, { 120, 88 }, { 121, 89 }, { 122, 90 }, { 123, 91 }, { 124, 92 }, { 125, 93 }, { 129, 94 }, { 132, 105 }, { 133, 83 }, { 134, 36 }, { 136, 97 }, { 142, 103 }, - { 144, 115 }, { 144, 122 }, { 147, 102 }, { 150, 107 }, + { 144, 115 }, { 147, 102 }, { 150, 107 }, { 152, 108 }, { 153, 109 } }; -- cgit v1.2.3 From f4268b466190dae95a7585f69b4f1f8ad097632c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 29 Apr 2026 13:40:41 +0000 Subject: nfc: llcp: Fix use-after-free in llcp_sock_release() llcp_sock_release() unconditionally unlinks the socket from the local sockets list. However, if the socket is still in connecting state, it is on the connecting list. Fix this by checking the socket state and unlinking from the correct list. Fixes: b4011239a08e ("NFC: llcp: Fix non blocking sockets connections") Signed-off-by: Lee Jones Link: https://patch.msgid.link/20260429134115.3558604-1-lee@kernel.org Signed-off-by: David Heidelberg --- net/nfc/llcp_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index f1be1e84f665..feab29fc62f4 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -633,6 +633,8 @@ static int llcp_sock_release(struct socket *sock) if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); + else if (sk->sk_state == LLCP_CONNECTING) + nfc_llcp_sock_unlink(&local->connecting_sockets, sk); else nfc_llcp_sock_unlink(&local->sockets, sk); -- cgit v1.2.3 From b493ea2765cc17cb8aa7e7544a4b6dcb05b6ed77 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 29 Apr 2026 13:40:42 +0000 Subject: nfc: llcp: Fix use-after-free race in nfc_llcp_recv_cc() A race condition exists in the NFC LLCP connection state machine where the connection acceptance packet (CC) can be processed concurrently with socket release. This can lead to a use-after-free of the socket object. When nfc_llcp_recv_cc() moves the socket from the connecting_sockets list to the sockets list, it does so without holding the socket lock. If llcp_sock_release() is executing concurrently, it might have already unlinked the socket and dropped its references, which can result in nfc_llcp_recv_cc() linking a freed socket into the live list. Fix this by holding lock_sock() during the state transition and list movement in nfc_llcp_recv_cc(). After acquiring the lock, check if the socket is still hashed to ensure it hasn't already been unlinked and marked for destruction by the release path. This aligns the locking pattern with recv_hdlc() and recv_disc(). Fixes: a69f32af86e3 ("NFC: Socket linked list") Signed-off-by: Lee Jones Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260429134115.3558604-2-lee@kernel.org Signed-off-by: David Heidelberg --- net/nfc/llcp_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index db5bc6a878dd..dc65c719f35f 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1218,6 +1218,15 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, sk = &llcp_sock->sk; + lock_sock(sk); + + /* Check if socket was destroyed whilst waiting for the lock */ + if (!sk_hashed(sk)) { + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); + return; + } + /* Unlink from connecting and link to the client array */ nfc_llcp_sock_unlink(&local->connecting_sockets, sk); nfc_llcp_sock_link(&local->sockets, sk); @@ -1229,6 +1238,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, sk->sk_state = LLCP_CONNECTED; sk->sk_state_change(sk); + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); } -- cgit v1.2.3 From 3780c41460a9ad6d5d4c09a416765c6cc285033b Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Thu, 2 Apr 2026 16:32:35 -0300 Subject: drm/etnaviv: Fix armed job not being pushed to the DRM scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When xa_alloc_cyclic() failed in etnaviv_sched_push_job(), the error path skipped drm_sched_entity_push_job(). This is a violation of the DRM scheduler contract, as once a job has been armed with drm_sched_job_arm(), it must be pushed with drm_sched_entity_push_job(). From the DRM scheduler documentation, """ drm_sched_job_arm() is a point of no return since it initializes the fences and their sequence number etc. Once that function has been called, you *must* submit it with drm_sched_entity_push_job() and cannot simply abort it by calling drm_sched_job_cleanup(). """ Fix this by splitting the fence ID allocation into two phases: first, alloc an xarray slot before arming the job (which can fail), then fill in the actual fence with xa_store() after arming. This way, allocation failures are handled before the job is armed, and once armed, the job is always pushed to the scheduler. This also fixes a double call to drm_sched_job_cleanup(), as both etnaviv_sched_push_job() and its caller would call it on failure. Fixes: 764be12345c3 ("drm/etnaviv: convert user fence tracking to XArray") Signed-off-by: Maíra Canal Link: https://patch.msgid.link/20260402193424.2023318-1-mcanal@igalia.com Signed-off-by: Christian Gmeiner --- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index df4232d7e135..3cc50d697c89 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -116,16 +116,18 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit) */ mutex_lock(&gpu->sched_lock); + ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, + NULL, xa_limit_32b, &gpu->next_user_fence, + GFP_KERNEL); + if (ret < 0) + goto out_unlock; + drm_sched_job_arm(&submit->sched_job); submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); - ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, - submit->out_fence, xa_limit_32b, - &gpu->next_user_fence, GFP_KERNEL); - if (ret < 0) { - drm_sched_job_cleanup(&submit->sched_job); - goto out_unlock; - } + + xa_store(&gpu->user_fences, submit->out_fence_id, + submit->out_fence, GFP_KERNEL); /* the scheduler holds on to the job now */ kref_get(&submit->refcount); -- cgit v1.2.3 From 4a142520d166f91627f27a7017525a228137c808 Mon Sep 17 00:00:00 2001 From: Jakov Novak Date: Mon, 4 May 2026 18:23:57 +0200 Subject: wifi: libertas: notify firmware load wait on disconnect Currently, when the firmware is not fully loaded and if_usb_disconnect is called, if_usb_prog_firmware gets stuck waiting for cardp->surprise_removed or cardp->fwdnldover while lbs_remove_card also waits for the firmware loading to be completed, which never happens. This caused the reported syzbot bug. To address this, the wake_up function call can be added in the if_usb_disconnect function which notifies the if_usb_prog_firmware thread and resolves the firmware loading. Fixes: 954ee164f4f4 ("[PATCH] libertas: reorganize and simplify init sequence") Reported-and-tested-by: syzbot+c99d17aa44dbdba16ad2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c99d17aa44dbdba16ad2 Signed-off-by: Jakov Novak Link: https://patch.msgid.link/20260504162356.17250-2-jakovnovak30@gmail.com [fix subject] Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/if_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index a00d53350fa9..5cc0c5cac257 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -310,6 +310,7 @@ static void if_usb_disconnect(struct usb_interface *intf) struct lbs_private *priv = cardp->priv; cardp->surprise_removed = 1; + wake_up(&cardp->fw_wq); if (priv) { lbs_stop_card(priv); -- cgit v1.2.3 From e9e334f8063a991b4f648b8dbb8dac44cf810540 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Wed, 29 Apr 2026 20:57:52 -0700 Subject: net: mana: check xdp_rxq registration before unreg in mana_destroy_rxq() When mana_create_rxq() fails at mana_create_wq_obj() or any step before xdp_rxq_info_reg() is called, the error path jumps to `out:` which calls mana_destroy_rxq(). mana_destroy_rxq() unconditionally calls xdp_rxq_info_unreg() on xilinx xdp_rxq that was never registered, triggering a WARN_ON in net/core/xdp.c: mana 7870:00:00.0: HWC: Failed hw_channel req: 0xc000009a mana 7870:00:00.0 eth7: Failed to create RXQ: err = -71 Driver BUG WARNING: CPU: 442 PID: 491615 at ../net/core/xdp.c:150 xdp_rxq_info_unreg+0x44/0x70 Modules linked in: tcp_bbr xsk_diag udp_diag raw_diag unix_diag af_packet_diag netlink_diag nf_tables nfnetlink tcp_diag inet_diag binfmt_misc rpcsec_gss_krb5 nfsv3 nfs_acl auth_rpcgss nfsv4 dns_resolver nfs lockd ext4 grace crc16 iscsi_tcp mbcache fscache libiscsi_tcp jbd2 netfs rpcrdma af_packet sunrpc rdma_ucm ib_iser rdma_cm iw_cm iscsi_ibft ib_cm iscsi_boot_sysfs libiscsi rfkill scsi_transport_iscsi mana_ib ib_uverbs ib_core mana hyperv_drm(X) drm_shmem_helper intel_rapl_msr drm_kms_helper intel_rapl_common syscopyarea nls_iso8859_1 sysfillrect intel_uncore_frequency_common nls_cp437 vfat fat nfit sysimgblt libnvdimm hv_netvsc(X) hv_utils(X) fb_sys_fops hv_balloon(X) joydev fuse drm dm_mod configfs ip_tables x_tables xfs libcrc32c sd_mod nvme nvme_core nvme_common t10_pi crc64_rocksoft_generic crc64_rocksoft crc64 hid_generic serio_raw pci_hyperv(X) hv_storvsc(X) scsi_transport_fc hyperv_keyboard(X) hid_hyperv(X) pci_hyperv_intf(X) crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel crypto_simd cryptd hv_vmbus(X) softdog sg scsi_mod efivarfs Supported: Yes, External CPU: 442 PID: 491615 Comm: ethtool Kdump: loaded Tainted: G X 5.14.21-150500.55.136-default #1 SLE15-SP5 a627be1b53abbfd64ad16b2685e4308c52847f42 Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 07/25/2025 RIP: 0010:xdp_rxq_info_unreg+0x44/0x70 Code: e8 91 fe ff ff c7 43 0c 02 00 00 00 48 c7 03 00 00 00 00 5b c3 cc cc cc cc e9 58 3a 1c 00 48 c7 c7 f6 5f 19 97 e8 5c a4 7e ff <0f> 0b 83 7b 0c 01 74 ca 48 c7 c7 d9 5f 19 97 e8 48 a4 7e ff 0f 0b RSP: 0018:ff3df6c8f7207818 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ff30d89f94808a80 RCX: 0000000000000027 RDX: 0000000000000000 RSI: 0000000000000002 RDI: ff30d94bdcca2908 RBP: 0000000000080000 R08: ffffffff98ed11a0 R09: ff3df6c8f72077a0 R10: dead000000000100 R11: 000000000000000a R12: 0000000000000000 R13: 0000000000002000 R14: 0000000000040000 R15: ff30d89f94800000 FS: 00007fe6d8432b80(0000) GS:ff30d94bdcc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe6d81a89b1 CR3: 00000b3b6d578001 CR4: 0000000000371ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 Call Trace: mana_destroy_rxq+0x5b/0x2f0 [mana 267acf7006bcb696095bba4d810643d1db3b9e94] mana_create_rxq.isra.55+0x3db/0x720 [mana 267acf7006bcb696095bba4d810643d1db3b9e94] ? simple_lookup+0x36/0x50 ? current_time+0x42/0x80 ? __d_free_external+0x30/0x30 mana_alloc_queues+0x32a/0x470 [mana 267acf7006bcb696095bba4d810643d1db3b9e94] ? _raw_spin_unlock+0xa/0x30 ? d_instantiate.part.29+0x2e/0x40 ? _raw_spin_unlock+0xa/0x30 ? debugfs_create_dir+0xe4/0x140 mana_attach+0x5c/0xf0 [mana 267acf7006bcb696095bba4d810643d1db3b9e94] mana_set_ringparam+0xd5/0x1a0 [mana 267acf7006bcb696095bba4d810643d1db3b9e94] ethnl_set_rings+0x292/0x320 genl_family_rcv_msg_doit.isra.15+0x11b/0x150 genl_rcv_msg+0xe3/0x1e0 ? rings_prepare_data+0x80/0x80 ? genl_family_rcv_msg_doit.isra.15+0x150/0x150 netlink_rcv_skb+0x50/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1b6/0x280 netlink_sendmsg+0x365/0x4d0 sock_sendmsg+0x5f/0x70 __sys_sendto+0x112/0x140 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x80 ? handle_mm_fault+0xd7/0x290 ? do_user_addr_fault+0x2d8/0x740 ? exc_page_fault+0x67/0x150 entry_SYSCALL_64_after_hwframe+0x6b/0xd5 RIP: 0033:0x7fe6d8122f06 Code: 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 72 f3 c3 41 57 41 56 4d 89 c7 41 55 41 54 41 RSP: 002b:00007fff2b66b068 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055771123d2a0 RCX: 00007fe6d8122f06 RDX: 0000000000000034 RSI: 000055771123d3b0 RDI: 0000000000000003 RBP: 00007fff2b66b100 R08: 00007fe6d8203360 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 000055771123d350 R13: 000055771123d340 R14: 0000000000000000 R15: 00007fff2b66b2b0 Guard the xdp_rxq_info_unreg() call with xdp_rxq_info_is_reg() so that mana_destroy_rxq() is safe to call regardless of how far initialization progressed. Fixes: ed5356b53f07 ("net: mana: Add XDP support") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20260430035935.1859220-2-dipayanroy@linux.microsoft.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a654b3699c4c..dfb4ba9f7664 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2520,7 +2520,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi_disable_locked(napi); netif_napi_del_locked(napi); } - xdp_rxq_info_unreg(&rxq->xdp_rxq); + + if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) + xdp_rxq_info_unreg(&rxq->xdp_rxq); mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); -- cgit v1.2.3 From 2a1c691182823a5c149d502ac153e249ee697b4a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Wed, 29 Apr 2026 20:57:53 -0700 Subject: net: mana: Skip WQ object destruction for uninitialized RXQ In mana_destroy_rxq(), mana_destroy_wq_obj() is called unconditionally even when the WQ object was never created (rxobj is still INVALID_MANA_HANDLE). When mana_create_rxq() fails before mana_create_wq_obj() succeeds, the error path calls mana_destroy_rxq() which sends a bogus destroy command to the hardware: mana 7870:00:00.0: HWC: Failed hw_channel req: 0x1d mana 7870:00:00.0: Failed to send mana message: -71, 0x1d mana 7870:00:00.0 eth7: Failed to destroy WQ object: -71 Guard mana_destroy_wq_obj() with an INVALID_MANA_HANDLE check so that mana_destroy_rxq() is safe to call at any stage of RXQ initialization. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20260430035935.1859220-3-dipayanroy@linux.microsoft.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index dfb4ba9f7664..f2a6ea162dc3 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2524,7 +2524,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (xdp_rxq_info_is_reg(&rxq->xdp_rxq)) xdp_rxq_info_unreg(&rxq->xdp_rxq); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + if (rxq->rxobj != INVALID_MANA_HANDLE) + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); -- cgit v1.2.3 From 3985c9a56da49af8b2e45cb1fa55c03c89b1d471 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Wed, 29 Apr 2026 20:57:54 -0700 Subject: net: mana: remove double CQ cleanup in mana_create_rxq error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mana_create_rxq(), the error cleanup path calls mana_destroy_rxq() followed by mana_deinit_cq(). This is incorrect for two reasons: 1. mana_destroy_rxq() already calls mana_deinit_cq() internally, so the CQ's GDMA queue is destroyed twice. 2. mana_destroy_rxq() frees the rxq via kfree(rxq) before returning. The subsequent mana_deinit_cq(apc, cq) then operates on freed memory since cq points to &rxq->rx_cq, which is embedded in the already-freed rxq structure — a use-after-free. Remove the redundant mana_deinit_cq() call from the error path since mana_destroy_rxq() already handles CQ cleanup. mana_deinit_cq() is itself safe for an uninitialized CQ as it checks for a NULL gdma_cq before proceeding. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Reviewed-by: Aditya Garg Link: https://patch.msgid.link/20260430035935.1859220-4-dipayanroy@linux.microsoft.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index f2a6ea162dc3..9afc786b297a 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2799,9 +2799,6 @@ out: mana_destroy_rxq(apc, rxq, false); - if (cq) - mana_deinit_cq(apc, cq); - return NULL; } -- cgit v1.2.3 From c69df06e4e26e50611190ce04eab92c5cc261b61 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Mar 2026 12:28:21 +0100 Subject: perf/core: Fix deadlock in perf_mmap() failure path Ian noted that commit 77de62ad3de3 ("perf/core: Fix refcount bug and potential UAF in perf_mmap") would cause a deadlock due to event->mmap_mutex recursion. This happens because we're now calling perf_mmap_close() under mmap_mutex, while that function itself can also take mmap_mutex. Solve this by noting that perf_mmap_close() is far more complicated than we need at this particular point, since it deals with scenarios that cannot happen in this particular case. Replace the call to perf_mmap_close() with a very narrow undo for the case of first-exposure. If this is not the first mmap(), there is no race and it is fine to drop the lock and call perf_mmap_close() to handle to more complicated scenarios. Note: move the rb->mmap_user (namespace) handling into the rb init/free code such that it does not complicate the mmap handling. Fixes: 77de62ad3de3 ("perf/core: Fix refcount bug and potential UAF in perf_mmap") Reported-by: Ian Rogers Closes: https://patch.msgid.link/CAP-5%3DfVJyVMZw%3DDqP53Kxg58nUmJ_0bxoaeOKAbC03BVc11HaA%40mail.gmail.com Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260326112821.GK3738786@noisy.programming.kicks-ass.net --- kernel/events/core.c | 70 +++++++++++++++++++++++++++++++++++---------- kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 2 ++ 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6d1f8bad7e1c..7935d5663944 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) } static void perf_pmu_output_stop(struct perf_event *event); +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb); /* * A buffer can be mmap()ed multiple times; either directly through the same @@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma) mapped_f unmapped = get_mapped(event, event_unmapped); struct perf_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; - int mmap_locked = rb->mmap_locked; - unsigned long size = perf_data_size(rb); bool detach_rest = false; /* FIXIES vs perf_pmu_unregister() */ @@ -7117,11 +7116,7 @@ again: * Aside from that, this buffer is 'fully' detached and unmapped, * undo the VM accounting. */ - - atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked, - &mmap_user->locked_vm); - atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm); - free_uid(mmap_user); + perf_mmap_unaccount(vma, rb); out_put: ring_buffer_put(rb); /* could be last */ @@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long atomic64_add(extra, &vma->vm_mm->pinned_vm); } +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb) +{ + struct user_struct *user = rb->mmap_user; + + atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked, + &user->locked_vm); + atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm); +} + static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, unsigned long nr_pages) { @@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event, if (!rb) return -ENOMEM; - refcount_set(&rb->mmap_count, 1); - rb->mmap_user = get_current_user(); rb->mmap_locked = extra; ring_buffer_attach(event, rb); @@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) mapped(event, vma->vm_mm); /* - * Try to map it into the page table. On fail, invoke - * perf_mmap_close() to undo the above, as the callsite expects - * full cleanup in this case and therefore does not invoke - * vmops::close(). + * Try to map it into the page table. On fail undo the above, + * as the callsite expects full cleanup in this case and + * therefore does not invoke vmops::close(). */ ret = map_range(event->rb, vma); - if (ret) - perf_mmap_close(vma); + if (likely(!ret)) + return 0; + + /* Error path */ + + /* + * If this is the first mmap(), then event->mmap_count should + * be stable at 1. It is only modified by: + * perf_mmap_{open,close}() and perf_mmap(). + * + * The former are not possible because this mmap() hasn't been + * successful yet, and the latter is serialized by + * event->mmap_mutex which we still hold (note that mmap_lock + * is not strictly sufficient here, because the event fd can + * be passed to another process through trivial means like + * fork(), leading to concurrent mmap() from different mm). + * + * Make sure to remove event->rb before releasing + * event->mmap_mutex, such that any concurrent mmap() will not + * attempt use this failed buffer. + */ + if (refcount_read(&event->mmap_count) == 1) { + /* + * Minimal perf_mmap_close(); there can't be AUX or + * other events on account of this being the first. + */ + mapped = get_mapped(event, event_unmapped); + if (mapped) + mapped(event, vma->vm_mm); + perf_mmap_unaccount(vma, event->rb); + ring_buffer_attach(event, NULL); /* drops last rb->refcount */ + refcount_set(&event->mmap_count, 0); + return ret; + } + + /* + * Otherwise this is an already existing buffer, and there is + * no race vs first exposure, so fall-through and call + * perf_mmap_close(). + */ } + perf_mmap_close(vma); return ret; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d9cc57083091..c03c4f2eea57 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head) struct perf_buffer *rb; rb = container_of(rcu_head, struct perf_buffer, rcu_head); + free_uid(rb->mmap_user); rb_free(rb); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 3e7de2661417..9fe92161715e 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) rb->paused = 1; mutex_init(&rb->aux_mutex); + rb->mmap_user = get_current_user(); + refcount_set(&rb->mmap_count, 1); } void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) -- cgit v1.2.3 From 5ad732a56be46aabf158c16aa0c095291727aaef Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 30 Apr 2026 08:25:54 +0800 Subject: perf/x86/intel: Improve validation and configuration of ACR masks Currently there are several issues on the user space ACR mask validation and configuration. - The validation for user space ACR mask (attr.config2) is incomplete, e.g., the ACR mask could include the index which belongs to another ACR events group, but it's not validated. - An early return on an invalid ACR mask caused all subsequent ACR groups to be skipped. - The stale hardware ACR mask (hw.config1) is not cleared before setting new hardware ACR mask. The following changes address all of the above issues. - Figure out the event index group of an ACR group. Any bits in the user-space mask not present in the index group are now dropped. - Instead of an early return on invalid bits, drop only the invalid portions and continue iterating through all ACR events to ensure full configuration. - Explicitly clear the stale hardware ACR mask for each event prior to writing the new configuration. Besides, a non-leader event member of ACR group could be disabled in theory. This could cause bit-shifting errors in the acr_mask of remaining group members. But since ACR sampling requires all events to be active, this should not be a big concern in real use case. Add a "FIXME" comment to notice this risk. Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260430002558.712334-2-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d9488ade0f8e..f8deb67b3c51 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3332,23 +3332,41 @@ static void intel_pmu_enable_event(struct perf_event *event) static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) { struct perf_event *event, *leader; - int i, j, idx; + int i, j, k, bit, idx; + /* + * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only). + * Disabling an ACR event causes bit-shifting errors in the acr_mask of + * remaining group members. As ACR sampling requires all events to be active, + * this limitation is acceptable for now. Revisit if independent event toggling + * is required. + */ for (i = 0; i < cpuc->n_events; i++) { leader = cpuc->event_list[i]; if (!is_acr_event_group(leader)) continue; - /* The ACR events must be contiguous. */ + /* Find the last event of the ACR group. */ for (j = i; j < cpuc->n_events; j++) { event = cpuc->event_list[j]; if (event->group_leader != leader->group_leader) break; - for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { - if (i + idx >= cpuc->n_events || - !is_acr_event_group(cpuc->event_list[i + idx])) - return; - __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); + } + + /* + * Translate the user-space ACR mask (attr.config2) into the physical + * counter bitmask (hw.config1) for each ACR event in the group. + * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config(). + */ + for (k = i; k < j; k++) { + event = cpuc->event_list[k]; + event->hw.config1 = 0; + for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { + idx = i + bit; + /* Event index of ACR group must locate in [i, j). */ + if (idx >= j || !is_acr_event_group(cpuc->event_list[idx])) + continue; + __set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1); } } i = j - 1; -- cgit v1.2.3 From 8ba0b706a485b1e607594cf4210786d517ad1611 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 30 Apr 2026 08:25:55 +0800 Subject: perf/x86/intel: Always reprogram ACR events to prevent stale masks Members of an ACR group are logically linked via a bitmask of their hardware counter indices. If some members of the group are assigned new hardware counters during rescheduling, even events that keep their original counter index must be updated with a new mask. Without this, an event will continue to use a stale acr_mask that references the old indices of its group peers. Ensure all ACR events are reprogrammed during the scheduling path to maintain consistency across the group. Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260430002558.712334-3-dapeng1.mi@linux.intel.com --- arch/x86/events/core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 810ab21ffd99..4b9e105309c6 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_event *event) return event->hw.event_base_rdpmc; } -static inline int match_prev_assignment(struct hw_perf_event *hwc, +static inline int match_prev_assignment(struct perf_event *event, struct cpu_hw_events *cpuc, int i) { + struct hw_perf_event *hwc = &event->hw; + return hwc->idx == cpuc->assign[i] && - hwc->last_cpu == smp_processor_id() && - hwc->last_tag == cpuc->tags[i]; + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i] && + !is_acr_event_group(event); } static void x86_pmu_start(struct perf_event *event, int flags); @@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *pmu) * - no other event has used the counter since */ if (hwc->idx == -1 || - match_prev_assignment(hwc, cpuc, i)) + match_prev_assignment(event, cpuc, i)) continue; /* @@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *pmu) event = cpuc->event_list[i]; hwc = &event->hw; - if (!match_prev_assignment(hwc, cpuc, i)) + if (!match_prev_assignment(event, cpuc, i)) x86_assign_hw_event(event, cpuc, i); else if (i < n_running) continue; -- cgit v1.2.3 From 1271aeccc307066315b2d3b0d5af2510e27018b5 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 30 Apr 2026 08:25:56 +0800 Subject: perf/x86/intel: Disable PMI for self-reloaded ACR events On platforms with Auto Counter Reload (ACR) support, such as NVL, a "NMI received for unknown reason 30" warning is observed when running multiple events in a group with ACR enabled: $ perf record -e '{instructions/period=20000,acr_mask=0x2/u,\ cycles/period=40000,acr_mask=0x3/u}' ./test The warning occurs because the Performance Monitoring Interrupt (PMI) is enabled for the self-reloaded event (the cycles event in this case). According to the Intel SDM, the overflow bit (IA32_PERF_GLOBAL_STATUS.PMCn_OVF) is never set for self-reloaded events. Since the bit is not set, the perf NMI handler cannot identify the source of the interrupt, leading to the "unknown reason" message. Furthermore, enabling PMI for self-reloaded events is unnecessary and can lead to extraneous records that pollute the user's requested data. Disable the interrupt bit for all events configured with ACR self-reload. Fixes: ec980e4facef ("perf/x86/intel: Support auto counter reload") Reported-by: Andi Kleen Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260430002558.712334-4-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 17 +++++++++++++---- arch/x86/events/perf_event.h | 10 ++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f8deb67b3c51..ead6d95cec6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event) intel_set_masks(event, idx); /* - * Enable IRQ generation (0x8), if not PEBS, - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: + * Enable IRQ generation (0x8), if not PEBS or self-reloaded + * ACR event, and enable ring-3 counting (0x2) and ring-0 + * counting (0x1) if requested: */ - if (!event->attr.precise_ip) + if (!event->attr.precise_ip && !is_acr_self_reload_event(event)) bits |= INTEL_FIXED_0_ENABLE_PMI; if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) bits |= INTEL_FIXED_0_USER; @@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struct perf_event *event) intel_set_masks(event, idx); static_call_cond(intel_pmu_enable_acr_event)(event); static_call_cond(intel_pmu_enable_event_ext)(event); + /* + * For self-reloaded ACR event, don't enable PMI since + * HW won't set overflow bit in GLOBAL_STATUS. Otherwise, + * the PMI would be recognized as a suspicious NMI. + */ + if (is_acr_self_reload_event(event)) + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + else if (!event->attr.precise_ip) + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index fad87d3c8b2c..524668dcf4cc 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -137,6 +137,16 @@ static inline bool is_acr_event_group(struct perf_event *event) return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); } +static inline bool is_acr_self_reload_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx < 0) + return false; + + return test_bit(hwc->idx, (unsigned long *)&hwc->config1); +} + struct amd_nb { int nb_id; /* NorthBridge id */ int refcnt; /* reference count */ -- cgit v1.2.3 From aa4384bc8f4360167f3c3d5322121fe892289ea2 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 30 Apr 2026 08:25:57 +0800 Subject: perf/x86/intel: Enable auto counter reload for DMR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Panther cove µarch starts to support auto counter reload (ACR), but the static_call intel_pmu_enable_acr_event() is not updated for the Panther Cove µarch used by DMR. It leads to the auto counter reload is not really enabled on DMR. Update static_call intel_pmu_enable_acr_event() in intel_pmu_init_pnc(). Fixes: d345b6bb8860 ("perf/x86/intel: Add core PMU support for DMR") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260430002558.712334-5-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ead6d95cec6a..dd1e3aa75ee9 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -7531,6 +7531,7 @@ static __always_inline void intel_pmu_init_pnc(struct pmu *pmu) hybrid(pmu, event_constraints) = intel_pnc_event_constraints; hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints; hybrid(pmu, extra_regs) = intel_pnc_extra_regs; + static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); } static __always_inline void intel_pmu_init_skt(struct pmu *pmu) -- cgit v1.2.3 From 78538047717bdeabe8481ef611c9131e455e61df Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 5 May 2026 11:51:22 +0100 Subject: ASoC: wm_adsp_fw_find_test: Redirect wm_adsp_release_firmware_files() Redirect wm_adsp_release_firmware_files() to a replacement function that handles the dummy firmware created by the tests. Use the same cleanup function to cleanup in the test exit() function. Also call it on each loop in wm_adsp_fw_find_test_find_firmware_byindex() to free the created strings before reusing priv->found_fw on the next loop. wm_adsp_release_firmware_files() will pass the struct firmware* pointers to release_firmware(). But the pointers created by the tests are dummies and must not be passed to release_firmware(). The test never invokes wm_adsp_release_firmware_files() so it wasn't redirected. But the error handling in wm_adsp_request_firmware_files() calls wm_adsp_release_firmware_files(). The redirected function makes this safe. Using the same cleanup function to perform cleanup from the test exit() handler and wm_adsp_fw_find_test_find_firmware_byindex() avoids the risk of duplicate cleanup code that all needs updating if there is any change to the cleanup requirements. This problem was found by https://sashiko.dev. Fixes: bf2d44d07de7 ("ASoC: wm_adsp: Add kunit test for firmware file search") Closes: https://sashiko.dev/#/patchset/20260326100853.1582886-1-rf%40opensource.cirrus.com Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260505105123.3539778-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp_fw_find_test.c | 56 +++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/wm_adsp_fw_find_test.c b/sound/soc/codecs/wm_adsp_fw_find_test.c index d0c7fb30a95d..516ba08eb5ba 100644 --- a/sound/soc/codecs/wm_adsp_fw_find_test.c +++ b/sound/soc/codecs/wm_adsp_fw_find_test.c @@ -45,6 +45,34 @@ struct wm_adsp_fw_find_test_params { /* Dummy struct firmware to return from wm_adsp_request_firmware_files */ static const struct firmware wm_adsp_find_test_dummy_firmware; +static void wm_adsp_fw_find_test_release_firmware_files_stub(struct wm_adsp_fw_files *fw) +{ + /* + * fw->wmfw.firmware and fw->coeff.firmware allocated by this KUnit + * test are dummies not allocated by the real request_firmware() call + * so they must not be passed to release_firmware(). + * This function replaces wm_adsp_release_firmware_files(). + */ + + if (!fw) + return; + + kfree(fw->wmfw.filename); + kfree(fw->coeff.filename); + + fw->wmfw.firmware = NULL; + fw->coeff.firmware = NULL; + fw->wmfw.filename = NULL; + fw->coeff.filename = NULL; +} + +static void wm_adsp_free_found_fw(struct kunit *test) +{ + struct wm_adsp_fw_find_test *priv = test->priv; + + wm_adsp_fw_find_test_release_firmware_files_stub(&priv->found_fw); +} + /* Simple lookup of a filename in a list of names */ static int wm_adsp_fw_find_test_firmware_request_simple_stub(const struct firmware **firmware, const char *filename, @@ -97,9 +125,14 @@ static void wm_adsp_fw_find_test_pick_file(struct kunit *test) kunit_activate_static_stub(test, wm_adsp_firmware_request, wm_adsp_fw_find_test_firmware_request_simple_stub); + kunit_activate_static_stub(test, + wm_adsp_release_firmware_files, + wm_adsp_fw_find_test_release_firmware_files_stub); ret = wm_adsp_request_firmware_files(dsp, &priv->found_fw); kunit_deactivate_static_stub(test, wm_adsp_firmware_request); + kunit_deactivate_static_stub(test, wm_adsp_release_firmware_files); + KUNIT_EXPECT_EQ_MSG(test, ret, (params->expect_wmfw || params->expect_bin) ? 0 : -ENOENT, "%s\n", priv->searched_fw_files); @@ -173,10 +206,13 @@ static void wm_adsp_fw_find_test_search_order(struct kunit *test) kunit_activate_static_stub(test, wm_adsp_firmware_request, wm_adsp_fw_find_test_firmware_request_stub); + kunit_activate_static_stub(test, + wm_adsp_release_firmware_files, + wm_adsp_fw_find_test_release_firmware_files_stub); wm_adsp_request_firmware_files(dsp, &priv->found_fw); - kunit_deactivate_static_stub(test, wm_adsp_firmware_request); + kunit_deactivate_static_stub(test, wm_adsp_release_firmware_files); KUNIT_EXPECT_STREQ(test, priv->searched_fw_files, params->expected_searches); @@ -201,6 +237,7 @@ static void wm_adsp_fw_find_test_find_firmware_byindex(struct kunit *test) dsp->cs_dsp.name = "cs1234"; dsp->part = "dsp1"; + for (dsp->fw = 0;; dsp->fw++) { fw_name = wm_adsp_get_fwf_name_by_index(dsp->fw); if (!fw_name) @@ -209,14 +246,21 @@ static void wm_adsp_fw_find_test_find_firmware_byindex(struct kunit *test) kunit_activate_static_stub(test, wm_adsp_firmware_request, wm_adsp_fw_find_test_firmware_request_stub); + kunit_activate_static_stub(test, + wm_adsp_release_firmware_files, + wm_adsp_fw_find_test_release_firmware_files_stub); wm_adsp_request_firmware_files(dsp, &priv->found_fw); + kunit_deactivate_static_stub(test, wm_adsp_firmware_request); + kunit_deactivate_static_stub(test, wm_adsp_release_firmware_files); KUNIT_EXPECT_NOT_NULL_MSG(test, strstr(priv->searched_fw_files, fw_name), "fw#%d Did not find '%s' in '%s'\n", dsp->fw, fw_name, priv->searched_fw_files); + + wm_adsp_free_found_fw(test); } } @@ -255,15 +299,7 @@ static int wm_adsp_fw_find_test_case_init(struct kunit *test) static void wm_adsp_fw_find_test_case_exit(struct kunit *test) { - struct wm_adsp_fw_find_test *priv = test->priv; - - /* - * priv->found_wmfw_firmware and priv->found_bin_firmware are - * dummies not allocated by the real request_firmware() call they - * must not be passed to release_firmware(). - */ - kfree(priv->found_fw.wmfw.filename); - kfree(priv->found_fw.coeff.filename); + wm_adsp_free_found_fw(test); } static void wm_adsp_fw_find_test_param_desc(const struct wm_adsp_fw_find_test_params *param, -- cgit v1.2.3 From af64f790969973b325efda7264d6860167623cdd Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 5 May 2026 11:51:23 +0100 Subject: ASoC: wm_adsp_fw_find_test: Clear searched_fw_files in find-by-index test In wm_adsp_fw_find_test_find_firmware_byindex() the content of priv->searched_fw_files must be cleared before starting the next iteration. The files searched for are appended to priv->searched_fw_files, so if it is not cleared on each iteration it will still contain the searches from the previous iteration. Fixes: bf2d44d07de7 ("ASoC: wm_adsp: Add kunit test for firmware file search") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260505105123.3539778-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp_fw_find_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm_adsp_fw_find_test.c b/sound/soc/codecs/wm_adsp_fw_find_test.c index 516ba08eb5ba..ae686dc4fa94 100644 --- a/sound/soc/codecs/wm_adsp_fw_find_test.c +++ b/sound/soc/codecs/wm_adsp_fw_find_test.c @@ -261,6 +261,7 @@ static void wm_adsp_fw_find_test_find_firmware_byindex(struct kunit *test) dsp->fw, fw_name, priv->searched_fw_files); wm_adsp_free_found_fw(test); + memset(priv->searched_fw_files, 0, sizeof(priv->searched_fw_files)); } } -- cgit v1.2.3 From 50987d4e6c55929aa2d4d3976e74ccbae22d5017 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 27 Mar 2026 10:17:28 +0800 Subject: drm/panel: himax-hx83121a: Fix incorrect error check for devm_drm_panel_alloc() Check devm_drm_panel_alloc() return value for ERR_PTR instead of NULL. devm_drm_panel_alloc() returns an ERR_PTR on failure, never NULL. Using a NULL check skips the error path and may cause a NULL pointer dereference. Fixes: a7c61963b727 ("drm/panel: Add Himax HX83121A panel driver") Signed-off-by: Chen Ni Reviewed-by: Pengyu Luo Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260327021728.647182-1-nichen@iscas.ac.cn --- drivers/gpu/drm/panel/panel-himax-hx83121a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83121a.c b/drivers/gpu/drm/panel/panel-himax-hx83121a.c index ebe643ba4184..bed79aa06f46 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83121a.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83121a.c @@ -596,8 +596,8 @@ static int himax_probe(struct mipi_dsi_device *dsi) ctx = devm_drm_panel_alloc(dev, struct himax, panel, &himax_panel_funcs, DRM_MODE_CONNECTOR_DSI); - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = devm_regulator_bulk_get_const(&dsi->dev, ARRAY_SIZE(himax_supplies), -- cgit v1.2.3 From defab7b01e0848e004077d7d8dcc04d305ea1a27 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Apr 2026 09:10:19 +0200 Subject: drm/panel: hx83121a: select DRM_DISPLAY_DSC_HELPER Like a number of other panel drivers, this newly merged driver needs DRM_DISPLAY_DSC_HELPER to be enabled: arm-linux-gnueabi-ld: drivers/gpu/drm/panel/panel-himax-hx83121a.o: in function `himax_prepare': panel-himax-hx83121a.c:(.text+0x1024): undefined reference to `drm_dsc_pps_payload_pack' Fixes: a7c61963b727 ("drm/panel: Add Himax HX83121A panel driver") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Reviewed-by: David Heidelberg Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260413071043.3829868-1-arnd@kernel.org --- drivers/gpu/drm/panel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d6863b28ddc5..d592f4f4b939 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -208,6 +208,7 @@ config DRM_PANEL_HIMAX_HX83121A depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DSC_HELPER select DRM_KMS_HELPER help Say Y here if you want to enable support for Himax HX83121A-based -- cgit v1.2.3 From c67e8787f6743101c90c7a9c4bb7cf6f1f739f83 Mon Sep 17 00:00:00 2001 From: Christian Van Date: Sat, 25 Apr 2026 01:39:48 -0400 Subject: drm/panel: feiyang-fy07024di26a30d: return display-on error mipi_dsi_dcs_set_display_on() returns an error code, but feiyang_enable() currently ignores it and always reports success. Return the DCS command result so callers can observe enable failures. Signed-off-by: Christian Van Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260425053948.117714-1-cvan20191@gmail.com --- drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c index 4f8d6d8c07e4..dbdb7e3cb7b6 100644 --- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c +++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c @@ -98,9 +98,7 @@ static int feiyang_enable(struct drm_panel *panel) /* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */ msleep(200); - mipi_dsi_dcs_set_display_on(ctx->dsi); - - return 0; + return mipi_dsi_dcs_set_display_on(ctx->dsi); } static int feiyang_disable(struct drm_panel *panel) -- cgit v1.2.3 From 570cf799e87ae805eacfab3b4ba66676b5fccdb6 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 3 May 2026 17:17:08 +0800 Subject: drm/panel: boe-tv101wum-nl6: restore MODE_LPM after sending disable cmds When preparing the panel, it seems that it always expects commands to be transferred in LP mode. However, the disable function removes the MIPI_DSI_MODE_LPM flag, and no other function re-adds it. As the unprepare function contains no DSI commands, re-adding the flag just after disabling the panel should be safe. Add the code re-adding the flag after the two commands for disabling the panel are sent. This fixes error messages shown in kernel log when unblanking on mt8183-kukui-kodama-sku32 device. Cc: stable@vger.kernel.org Fixes: a869b9db7adf ("drm/panel: support for boe tv101wum-nl6 wuxga dsi video mode panel") Signed-off-by: Icenowy Zheng Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260503091708.1079962-1-zhengxingda@iscas.ac.cn --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index d5fe105bdbdd..658ce64c71eb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1324,6 +1324,8 @@ static int boe_panel_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&ctx); + boe->dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&ctx, 150); return ctx.accum_err; -- cgit v1.2.3 From 2d4e80271f784aa0c7b17676e9762c7e8156be1c Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sun, 26 Apr 2026 00:57:51 +0800 Subject: drm/panel: himax-hx83102: restore MODE_LPM after sending disable cmds When preparing the panel, it seems that it always expects commands to be transferred in LP mode. However, the disable function removes the MIPI_DSI_MODE_LPM flag, and no other function re-adds it. As the unprepare function contains no DSI commands, re-adding the flag just after disabling the panel should be safe. Add the code re-adding the flag after the two commands for disabling the panel are sent. This fixes screen unblanking (after blanking once) on mt8188-geralt-ciri-sku1 device. Cc: stable@vger.kernel.org # 6.11+ Fixes: 0ef94554dc40 ("drm/panel: himax-hx83102: Break out as separate driver") Signed-off-by: Icenowy Zheng Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Neil Armstrong Link: https://patch.msgid.link/20260425165751.1716569-1-zhengxingda@iscas.ac.cn --- drivers/gpu/drm/panel/panel-himax-hx83102.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b2a68ee851e..a5e5c9ea7a73 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -937,6 +937,8 @@ static int hx83102_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&dsi_ctx, 150); return dsi_ctx.accum_err; -- cgit v1.2.3 From 8cf5dd235eff6008cb04c3d8064d2acfa90616f1 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar T S M Date: Wed, 1 Apr 2026 04:18:56 -0700 Subject: EDAC/versalnet: Fix device name memory leak The device name allocated via kzalloc() in init_one_mc() is assigned to dev->init_name but never freed on the normal removal path. device_register() copies init_name and then sets dev->init_name to NULL, so the name pointer becomes unreachable from the device. Thus leaking memory. Use a stack-local char array instead of using kzalloc() for name. Fixes: d5fe2fec6c40 ("EDAC: Add a driver for the AMD Versal NET DDR controller") Signed-off-by: Prasanna Kumar T S M Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260401111856.2342975-1-ptsm@linux.microsoft.com --- drivers/edac/versalnet_edac.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index ec1315582414..97ec05d68bbb 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -777,9 +777,9 @@ static int init_one_mc(struct mc_priv *priv, struct platform_device *pdev, int i u32 num_chans, rank, dwidth, config; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; + char name[MC_NAME_LEN]; struct device *dev; enum dev_type dt; - char *name; int rc; config = priv->adec[CONF + i * ADEC_NUM]; @@ -813,13 +813,9 @@ static int init_one_mc(struct mc_priv *priv, struct platform_device *pdev, int i layers[1].is_virt_csrow = false; rc = -ENOMEM; - name = kzalloc(MC_NAME_LEN, GFP_KERNEL); - if (!name) - return rc; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) - goto err_name_free; + return rc; mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers, sizeof(struct mc_priv)); if (!mci) { @@ -858,8 +854,6 @@ err_mc_free: edac_mc_free(mci); err_dev_free: kfree(dev); -err_name_free: - kfree(name); return rc; } -- cgit v1.2.3 From 83861c48ba122f85cc8384780764b3a791341678 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 30 Apr 2026 23:32:50 +0200 Subject: openvswitch: vport: fix race between tunnel creation and linking When a tunnel vport is created it first creates the tunnel device, e.g., with geneve_dev_create_fb(), then it calls ovs_netdev_link() to take a reference and link it to the device that represents openvswitch datapath. The creation of the device is happening under RTNL, but then RTNL is released and re-acquired to find the device by name. It is technically possible for the tunnel device to be re-named or deleted within that window while RTNL is not held, and some other device created in its place. This will cause a non-tunnel device to be referenced in the vport and tunnel-specific functions used on it, e.g. vxlan_get_options() that directly casts the private netdev data into a struct vxlan_dev causing an invalid memory access: BUG: KASAN: slab-use-after-free in vxlan_get_options+0x323/0x3a0 vxlan_get_options+0x323/0x3a0 ovs_vport_cmd_new+0x6e3/0xd30 Fix that by taking a reference to the just created device before releasing RTNL. This ensures that the device in the vport is always the one that was just created. The search by name is only needed for a standard vport-netdev that links pre-existing devices, so that functionality and device type checks are moved to netdev_create(). It is also awkward that ovs_netdev_link() takes ownership of the vport and destroys it on failure. It doesn't know the type of the port it is dealing with, so we need to pass down the indicator that it's a tunnel, so the link can be properly deleted on failure. It's possible to refactor the logic to make the ovs_netdev_link() do only the linking part and let the callers perform a proper destruction, but it will be much more code for each legacy tunnel port type, so it is not worth it for the bug fix. Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reported-by: Yang Yang Signed-off-by: Ilya Maximets Acked-by: Eelco Chaudron Link: https://patch.msgid.link/20260430213349.407991-1-i.maximets@ovn.org Signed-off-by: Paolo Abeni --- net/openvswitch/vport-geneve.c | 5 +++- net/openvswitch/vport-gre.c | 5 +++- net/openvswitch/vport-netdev.c | 58 +++++++++++++++++++++++++----------------- net/openvswitch/vport-netdev.h | 2 +- net/openvswitch/vport-vxlan.c | 5 +++- 5 files changed, 48 insertions(+), 27 deletions(-) diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index b10e1602c6b1..cb5ea4424ffc 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -97,6 +97,9 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -111,7 +114,7 @@ static struct vport *geneve_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_geneve_vport_ops = { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 4014c9b5eb79..6cb5a697b396 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -63,6 +63,9 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_PTR(err); } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; } @@ -75,7 +78,7 @@ static struct vport *gre_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_gre_vport_ops = { diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 12055af832dc..a92ca8b37f96 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -73,37 +73,21 @@ static struct net_device *get_dpdev(const struct datapath *dp) return local->dev; } -struct vport *ovs_netdev_link(struct vport *vport, const char *name) +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel) { int err; - vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name); - if (!vport->dev) { + if (WARN_ON_ONCE(!vport->dev)) { err = -ENODEV; goto error_free_vport; } - /* Ensure that the device exists and that the provided - * name is not one of its aliases. - */ - if (strcmp(name, ovs_vport_name(vport))) { - err = -ENODEV; - goto error_put; - } - netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); - if (vport->dev->flags & IFF_LOOPBACK || - (vport->dev->type != ARPHRD_ETHER && - vport->dev->type != ARPHRD_NONE) || - ovs_is_internal_dev(vport->dev)) { - err = -EINVAL; - goto error_put; - } rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, get_dpdev(vport->dp), NULL, NULL, NULL); if (err) - goto error_unlock; + goto error_put_unlock; err = netdev_rx_handler_register(vport->dev, netdev_frame_hook, vport); @@ -119,10 +103,11 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) error_master_upper_dev_unlink: netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp)); -error_unlock: - rtnl_unlock(); -error_put: +error_put_unlock: + if (tunnel && vport->dev->reg_state == NETREG_REGISTERED) + rtnl_delete_link(vport->dev, 0, NULL); netdev_put(vport->dev, &vport->dev_tracker); + rtnl_unlock(); error_free_vport: ovs_vport_free(vport); return ERR_PTR(err); @@ -132,12 +117,39 @@ EXPORT_SYMBOL_GPL(ovs_netdev_link); static struct vport *netdev_create(const struct vport_parms *parms) { struct vport *vport; + int err; vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms); if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); + if (!vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + netdev_tracker_alloc(vport->dev, &vport->dev_tracker, GFP_KERNEL); + + /* Ensure that the provided name is not an alias. */ + if (strcmp(parms->name, ovs_vport_name(vport))) { + err = -ENODEV; + goto error_put; + } + + if (vport->dev->flags & IFF_LOOPBACK || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || + ovs_is_internal_dev(vport->dev)) { + err = -EINVAL; + goto error_put; + } + + return ovs_netdev_link(vport, false); +error_put: + netdev_put(vport->dev, &vport->dev_tracker); +error_free_vport: + ovs_vport_free(vport); + return ERR_PTR(err); } static void vport_netdev_free(struct rcu_head *rcu) diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index c5d83a43bfc4..6c0d7366f986 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -13,7 +13,7 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev); -struct vport *ovs_netdev_link(struct vport *vport, const char *name); +struct vport *ovs_netdev_link(struct vport *vport, bool tunnel); void ovs_netdev_detach_dev(struct vport *); int __init ovs_netdev_init(void); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 0b881b043bcf..c1b37b50d29e 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -126,6 +126,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) goto error; } + vport->dev = dev; + netdev_hold(vport->dev, &vport->dev_tracker, GFP_KERNEL); + rtnl_unlock(); return vport; error: @@ -140,7 +143,7 @@ static struct vport *vxlan_create(const struct vport_parms *parms) if (IS_ERR(vport)) return vport; - return ovs_netdev_link(vport, parms->name); + return ovs_netdev_link(vport, true); } static struct vport_ops ovs_vxlan_netdev_vport_ops = { -- cgit v1.2.3 From aa69918bd418e700309fdd08509dba324fb24296 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 1 May 2026 01:38:37 +0200 Subject: openvswitch: vport: fix self-deadlock on release of tunnel ports vports are used concurrently and protected by RCU, so netdev_put() must happen after the RCU grace period. So, either in an RCU call or after the synchronize_net(). The rtnl_delete_link() must happen under RTNL and so can't be executed in RCU context. Calling synchronize_net() while holding RTNL is not a good idea for performance and system stability under load in general, so calling netdev_put() in RCU call is the right solution here. However, when the device is deleted, rtnl_unlock() will call netdev_run_todo() and block until all the references are gone. In the current code this means that we never reach the call_rcu() and the vport is never freed and the reference is never released, causing a self-deadlock on device removal. Fix that by moving the rcu_call() before the rtnl_unlock(), so the scheduled RCU callback will be executed when synchronize_net() is called from the rtnl_unlock()->netdev_run_todo() while the RTNL itself is already released. Fixes: 6931d21f87bc ("openvswitch: defer tunnel netdev_put to RCU release") Cc: stable@vger.kernel.org Acked-by: Eelco Chaudron Signed-off-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/20260430233848.440994-2-i.maximets@ovn.org Signed-off-by: Paolo Abeni --- net/openvswitch/vport-netdev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index a92ca8b37f96..c42642075685 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -208,9 +208,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) */ if (vport->dev->reg_state == NETREG_REGISTERED) rtnl_delete_link(vport->dev, 0, NULL); - rtnl_unlock(); + /* We can't put the device reference yet, since it can still be in + * use, but rtnl_unlock()->netdev_run_todo() will block until all + * the references are released, so the RCU call must be before it. + */ call_rcu(&vport->rcu, vport_netdev_free); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy); -- cgit v1.2.3 From 05416ada37aa4efe93f25b0532f551d424fb7b3d Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 1 May 2026 01:38:38 +0200 Subject: selftests: openvswitch: add tests for tunnel vport refcounting There were a few issues found with the tunnel vport types around the vport destruction code. Add some basic tests, so at least we know that they can be properly added and removed without obvious issues. The test creates OVS datapath, adds a non-LWT tunnel port, makes sure they are created, and then removes the datapath and waits for all the ports to be gone. The dpctl script had a few bugs in the none-lwt tunnel creation code, so fixing them as well to make the testing possible: - The type of the --lwt option changed in order to properly disable it. - Removed byte order conversion for the port numbers, as the value supposed to be in the host order. - Added missing 'gre' choice for the tunnel type. Signed-off-by: Ilya Maximets Acked-by: Eelco Chaudron Acked-by: Aaron Conole Link: https://patch.msgid.link/20260430233848.440994-3-i.maximets@ovn.org Signed-off-by: Paolo Abeni --- .../selftests/net/openvswitch/openvswitch.sh | 37 ++++++++++++++++++++++ .../testing/selftests/net/openvswitch/ovs-dpctl.py | 19 +++++++---- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index b327d3061ed5..3cdd953f6813 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -26,6 +26,7 @@ tests=" netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces tunnel_metadata ovs: test extraction of tunnel metadata + tunnel_refcount ovs: test tunnel vport reference cleanup drop_reason drop: test drop reasons are emitted psample psample: Sampling packets with psample" @@ -830,6 +831,42 @@ test_tunnel_metadata() { return 0 } +test_tunnel_refcount() { + sbxname="test_tunnel_refcount" + sbx_add "${sbxname}" || return 1 + + ovs_sbx "${sbxname}" ip netns add trefns || return 1 + on_exit "ovs_sbx ${sbxname} ip netns del trefns" + + for tun_type in gre vxlan geneve; do + info "testing ${tun_type} tunnel vport refcount" + + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + add-dp dp-${tun_type} || return 1 + + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + add-if --no-lwt -t ${tun_type} \ + dp-${tun_type} ovs-${tun_type}0 || return 1 + + ovs_wait ip -netns trefns link show \ + ovs-${tun_type}0 >/dev/null 2>&1 || return 1 + + info "deleting dp - may hang if reference counting is broken" + ovs_sbx "${sbxname}" ip netns exec trefns \ + python3 $ovs_base/ovs-dpctl.py \ + del-dp dp-${tun_type} & + + dev_removed() { + ! ip -netns trefns link show "$1" >/dev/null 2>&1 + } + ovs_wait dev_removed dp-${tun_type} || return 1 + ovs_wait dev_removed ovs-${tun_type}0 || return 1 + done + return 0 +} + run_test() { ( tname="$1" diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 848f61fdcee0..bbe35e2718d2 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -11,7 +11,6 @@ import logging import math import multiprocessing import re -import socket import struct import sys import time @@ -2069,7 +2068,7 @@ class OvsVport(GenericNetlinkSocket): elif vport_type == "internal": return OvsVport.OVS_VPORT_TYPE_INTERNAL elif vport_type == "gre": - return OvsVport.OVS_VPORT_TYPE_INTERNAL + return OvsVport.OVS_VPORT_TYPE_GRE elif vport_type == "vxlan": return OvsVport.OVS_VPORT_TYPE_VXLAN elif vport_type == "geneve": @@ -2121,6 +2120,7 @@ class OvsVport(GenericNetlinkSocket): ) TUNNEL_DEFAULTS = [("geneve", 6081), + ("gre", 0), ("vxlan", 4789)] for tnl in TUNNEL_DEFAULTS: @@ -2129,9 +2129,13 @@ class OvsVport(GenericNetlinkSocket): dport = tnl[1] if not lwt: + if tnl[0] == "gre": + # GRE tunnels have no options. + break + vportopt = OvsVport.ovs_vport_msg.vportopts() vportopt["attrs"].append( - ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)] + ["OVS_TUNNEL_ATTR_DST_PORT", dport] ) msg["attrs"].append( ["OVS_VPORT_ATTR_OPTIONS", vportopt] @@ -2145,6 +2149,9 @@ class OvsVport(GenericNetlinkSocket): geneve_port=dport, geneve_collect_metadata=True, geneve_udp_zero_csum6_rx=1) + elif tnl[0] == "gre": + ipr.link("add", ifname=vport_ifname, kind="gretap", + gre_collect_metadata=True) elif tnl[0] == "vxlan": ipr.link("add", ifname=vport_ifname, kind=tnl[0], vxlan_learning=0, vxlan_collect_metadata=1, @@ -2563,7 +2570,7 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): if vpo: dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT") if dpo: - opts += " tnl-dport:%s" % socket.ntohs(dpo) + opts += " tnl-dport:%s" % dpo print( " port %d: %s (%s%s)" % ( @@ -2632,7 +2639,7 @@ def main(argv): "--ptype", type=str, default="netdev", - choices=["netdev", "internal", "geneve", "vxlan"], + choices=["netdev", "internal", "gre", "geneve", "vxlan"], help="Interface type (default netdev)", ) addifcmd.add_argument( @@ -2645,7 +2652,7 @@ def main(argv): addifcmd.add_argument( "-l", "--lwt", - type=bool, + action=argparse.BooleanOptionalAction, default=True, help="Use LWT infrastructure instead of vport (default true)." ) -- cgit v1.2.3 From 44b550d88b267320459d518c0743a241ab2108fa Mon Sep 17 00:00:00 2001 From: Nan Li Date: Fri, 1 May 2026 09:08:44 +0800 Subject: net/rds: handle zerocopy send cleanup before the message is queued A zerocopy send can fail after user pages have been pinned but before the message is attached to the sending socket. The purge path currently infers zerocopy state from rm->m_rs, so an unqueued message can be cleaned up as if it owned normal payload pages. However, zerocopy ownership is really determined by the presence of op_mmp_znotifier, regardless of whether the message has reached the socket queue. Capture op_mmp_znotifier up front in rds_message_purge() and use it as the cleanup discriminator. If the message is already associated with a socket, keep the existing completion path. Otherwise, drop the pinned page accounting directly and release the notifier before putting the payload pages. This keeps early send failure cleanup consistent with the zerocopy lifetime rules without changing the normal queued completion path. Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Co-developed-by: Xiao Liu Signed-off-by: Xiao Liu Signed-off-by: Nan Li Signed-off-by: Ren Wei Reviewed-by: Allison Henderson Link: https://patch.msgid.link/d2ea98a6313d5467bac00f7c9fef8c7acddb9258.1777550074.git.tonanli66@gmail.com Signed-off-by: Paolo Abeni --- net/rds/message.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index eaa6f22601a4..25fedcb3cd00 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -131,24 +131,34 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, */ static void rds_message_purge(struct rds_message *rm) { + struct rds_znotifier *znotifier; unsigned long i, flags; - bool zcopy = false; + bool zcopy; if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) return; spin_lock_irqsave(&rm->m_rs_lock, flags); + znotifier = rm->data.op_mmp_znotifier; + rm->data.op_mmp_znotifier = NULL; + zcopy = !!znotifier; + if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; - if (rm->data.op_mmp_znotifier) { - zcopy = true; - rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + if (znotifier) { + rds_rm_zerocopy_callback(rs, znotifier); rds_wake_sk_sleep(rs); - rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); rm->m_rs = NULL; + } else if (znotifier) { + /* + * Zerocopy can fail before the message is queued on the + * socket, so there is no rs to carry the notification. + */ + mm_unaccount_pinned_pages(&znotifier->z_mmp); + kfree(rds_info_from_znotifier(znotifier)); } spin_unlock_irqrestore(&rm->m_rs_lock, flags); -- cgit v1.2.3 From 95084f1883a760e0d4290698346759d58e2b944a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Thu, 30 Apr 2026 19:47:12 -0700 Subject: net: mana: Fix crash from unvalidated SHM offset read from BAR0 during FLR During Function Level Reset recovery, the MANA driver reads hardware BAR0 registers that may temporarily contain garbage values. The SHM (Shared Memory) offset read from GDMA_REG_SHM_OFFSET is used to compute gc->shm_base, which is later dereferenced via readl() in mana_smc_poll_register(). If the hardware returns an unaligned or out-of-range value, the driver must not blindly use it, as this would propagate the hardware error into a kernel crash. The following crash was observed on an arm64 Hyper-V guest running kernel 6.17.0-3013-azure during VF reset recovery triggered by HWC timeout. [13291.785274] Unable to handle kernel paging request at virtual address ffff8000a200001b [13291.785311] Mem abort info: [13291.785332] ESR = 0x0000000096000021 [13291.785343] EC = 0x25: DABT (current EL), IL = 32 bits [13291.785355] SET = 0, FnV = 0 [13291.785363] EA = 0, S1PTW = 0 [13291.785372] FSC = 0x21: alignment fault [13291.785382] Data abort info: [13291.785391] ISV = 0, ISS = 0x00000021, ISS2 = 0x00000000 [13291.785404] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [13291.785412] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [13291.785421] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000014df3a1000 [13291.785432] [ffff8000a200001b] pgd=1000000100438403, p4d=1000000100438403, pud=1000000100439403, pmd=0068000fc2000711 [13291.785703] Internal error: Oops: 0000000096000021 [#1] SMP [13291.830975] Modules linked in: tls qrtr mana_ib ib_uverbs ib_core xt_owner xt_tcpudp xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_compat nf_tables cfg80211 8021q garp mrp stp llc binfmt_misc joydev serio_raw nls_iso8859_1 hid_generic aes_ce_blk aes_ce_cipher polyval_ce ghash_ce sm4_ce_gcm sm4_ce_ccm sm4_ce sm4_ce_cipher hid_hyperv sm4 sm3_ce sha3_ce hv_netvsc hid vmgenid hyperv_keyboard hyperv_drm sch_fq_codel nvme_fabrics efi_pstore dm_multipath nfnetlink vsock_loopback vmw_vsock_virtio_transport_common hv_sock vmw_vsock_vmci_transport vmw_vmci vsock dmi_sysfs ip_tables x_tables autofs4 [13291.862630] CPU: 122 UID: 0 PID: 61796 Comm: kworker/122:2 Tainted: G W 6.17.0-3013-azure #13-Ubuntu VOLUNTARY [13291.869902] Tainted: [W]=WARN [13291.871901] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 01/08/2026 [13291.878086] Workqueue: events mana_serv_func [13291.880718] pstate: 62400005 (nZCv daif +PAN -UAO +TCO -DIT -SSBS BTYPE=--) [13291.884835] pc : mana_smc_poll_register+0x48/0xb0 [13291.887902] lr : mana_smc_setup_hwc+0x70/0x1c0 [13291.890493] sp : ffff8000ab79bbb0 [13291.892364] x29: ffff8000ab79bbb0 x28: ffff00410c8b5900 x27: ffff00410d630680 [13291.896252] x26: ffff004171f9fd80 x25: 000000016ed55000 x24: 000000017f37e000 [13291.899990] x23: 0000000000000000 x22: 000000016ed55000 x21: 0000000000000000 [13291.904497] x20: ffff8000a200001b x19: 0000000000004e20 x18: ffff8000a6183050 [13291.908308] x17: 0000000000000000 x16: 0000000000000000 x15: 000000000000000a [13291.912542] x14: 0000000000000004 x13: 0000000000000000 x12: 0000000000000000 [13291.916298] x11: 0000000000000000 x10: 0000000000000001 x9 : ffffc45006af1bd8 [13291.920945] x8 : ffff000151129000 x7 : 0000000000000000 x6 : 0000000000000000 [13291.925293] x5 : 000000015f214000 x4 : 000000017217a000 x3 : 000000016ed50000 [13291.930436] x2 : 000000016ed55000 x1 : 0000000000000000 x0 : ffff8000a1ffffff [13291.934342] Call trace: [13291.935736] mana_smc_poll_register+0x48/0xb0 (P) [13291.938611] mana_smc_setup_hwc+0x70/0x1c0 [13291.941113] mana_hwc_create_channel+0x1a0/0x3a0 [13291.944283] mana_gd_setup+0x16c/0x398 [13291.946584] mana_gd_resume+0x24/0x70 [13291.948917] mana_do_service+0x13c/0x1d0 [13291.951583] mana_serv_func+0x34/0x68 [13291.953732] process_one_work+0x168/0x3d0 [13291.956745] worker_thread+0x2ac/0x480 [13291.959104] kthread+0xf8/0x110 [13291.961026] ret_from_fork+0x10/0x20 [13291.963560] Code: d2807d00 9417c551 71000673 54000220 (b9400281) [13291.967299] ---[ end trace 0000000000000000 ]--- Disassembly of mana_smc_poll_register() around the crash site: Disassembly of section .text: 00000000000047c8 : 47c8: d503201f nop 47cc: d503201f nop 47d0: d503233f paciasp 47d4: f800865e str x30, [x18], #8 47d8: a9bd7bfd stp x29, x30, [sp, #-48]! 47dc: 910003fd mov x29, sp 47e0: a90153f3 stp x19, x20, [sp, #16] 47e4: 91007014 add x20, x0, #0x1c 47e8: 5289c413 mov w19, #0x4e20 47ec: f90013f5 str x21, [sp, #32] 47f0: 12001c35 and w21, w1, #0xff 47f4: 14000008 b 4814 47f8: 36f801e1 tbz w1, #31, 4834 47fc: 52800042 mov w2, #0x2 4800: d280fa01 mov x1, #0x7d0 4804: d2807d00 mov x0, #0x3e8 4808: 94000000 bl 0 480c: 71000673 subs w19, w19, #0x1 4810: 54000200 b.eq 4850 4814: b9400281 ldr w1, [x20] <-- **** CRASHED HERE ***** 4818: d50331bf dmb oshld 481c: 2a0103e2 mov w2, w1 ... From the crash signature x20 = ffff8000a200001b, this address ends in 0x1b which is not 4-byte aligned, so the 'ldr w1, [x20]' instruction (readl) triggers the arm64 alignment fault (FSC = 0x21). The root cause is in mana_gd_init_vf_regs(), which computes: gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); The offset is used without any validation. The same problem exists in mana_gd_init_pf_regs() for sriov_base_off and sriov_shm_off. Fix this by validating all offsets before use: - VF: check shm_off is within BAR0, properly aligned to 4 bytes (readl requirement), and leaves room for the full 256-bit (32-byte) SMC aperture. - PF: check sriov_base_off is within BAR0, aligned to 8 bytes (readq requirement), and leaves room to safely read the sriov_shm_off register at sriov_base_off + GDMA_PF_REG_SHM_OFF. Then check sriov_shm_off leaves room for the full SMC aperture. All arithmetic uses subtraction rather than addition to avoid integer overflow on garbage values. Define SMC_APERTURE_SIZE (32 bytes, derived from the 256-bit aperture width) Return -EPROTO on invalid values. The existing recovery path in mana_serv_reset() already handles -EPROTO by falling through to PCI device rescan, giving the hardware another chance to present valid register values after reset. Fixes: 9bf66036d686 ("net: mana: Handle hardware recovery events when probing the device") Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/afQUMClyjmBVfD+u@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 40 ++++++++++++++++++++--- drivers/net/ethernet/microsoft/mana/shm_channel.c | 5 --- include/net/mana/shm_channel.h | 6 ++++ 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 098fbda0d128..d8e816882f02 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -43,8 +43,9 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset) static int mana_gd_init_pf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); - void __iomem *sriov_base_va; + u64 remaining_barsize; u64 sriov_base_off; + u64 sriov_shm_off; gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; @@ -73,10 +74,28 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); + if (sriov_base_off >= gc->bar0_size || + gc->bar0_size - sriov_base_off < + GDMA_PF_REG_SHM_OFF + sizeof(u64) || + !IS_ALIGNED(sriov_base_off, sizeof(u64))) { + dev_err(gc->dev, + "SRIOV base offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + sriov_base_off, (u64)gc->bar0_size); + return -EPROTO; + } - sriov_base_va = gc->bar0_va + sriov_base_off; - gc->shm_base = sriov_base_va + - mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + remaining_barsize = gc->bar0_size - sriov_base_off; + sriov_shm_off = mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + if (sriov_shm_off >= remaining_barsize || + remaining_barsize - sriov_shm_off < SMC_APERTURE_SIZE || + !IS_ALIGNED(sriov_shm_off, sizeof(u32))) { + dev_err(gc->dev, + "SRIOV SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + sriov_shm_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->shm_base = gc->bar0_va + sriov_base_off + sriov_shm_off; return 0; } @@ -84,6 +103,7 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) static int mana_gd_init_vf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); + u64 shm_off; gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; @@ -111,7 +131,17 @@ static int mana_gd_init_vf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + gc->db_page_off; gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; - gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + shm_off = mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + if (shm_off >= gc->bar0_size || + gc->bar0_size - shm_off < SMC_APERTURE_SIZE || + !IS_ALIGNED(shm_off, sizeof(u32))) { + dev_err(gc->dev, + "SHM offset 0x%llx out of range or unaligned (BAR0 size 0x%llx)\n", + shm_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->shm_base = gc->bar0_va + shm_off; return 0; } diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 0f1679ebad96..d21b5db06e50 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -61,11 +61,6 @@ union smc_proto_hdr { }; }; /* HW DATA */ -#define SMC_APERTURE_BITS 256 -#define SMC_BASIC_UNIT (sizeof(u32)) -#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) -#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) - static int mana_smc_poll_register(void __iomem *base, bool reset) { void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT; diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h index 5199b41497ff..dbabcfb95daf 100644 --- a/include/net/mana/shm_channel.h +++ b/include/net/mana/shm_channel.h @@ -4,6 +4,12 @@ #ifndef _SHM_CHANNEL_H #define _SHM_CHANNEL_H +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) +#define SMC_APERTURE_SIZE (SMC_APERTURE_BITS / 8) + struct shm_channel { struct device *dev; void __iomem *base; -- cgit v1.2.3 From b9eac6a9d93c952c4b7775a24d5c7a1bbf4c3c00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Apr 2026 00:47:54 +0200 Subject: rseq: Revert to historical performance killing behaviour The recent RSEQ optimization work broke the TCMalloc abuse of the RSEQ ABI as it not longer unconditionally updates the CPU, node, mm_cid fields, which are documented as read only for user space. Due to the observed behavior of the kernel it was possible for TCMalloc to overwrite the cpu_id_start field for their own purposes and rely on the kernel to update it unconditionally after each context switch and before signal delivery. The RSEQ ABI only guarantees that these fields are updated when the data changes, i.e. the task is migrated or the MMCID of the task changes due to switching from or to per CPU ownership mode. The optimization work eliminated the unconditional updates and reduced them to the documented ABI guarantees, which results in a massive performance win for syscall, scheduling heavy work loads, which in turn breaks the TCMalloc expectations. There have been several options discussed to restore the TCMalloc functionality while preserving the optimization benefits. They all end up in a series of hard to maintain workarounds, which in the worst case introduce overhead for everyone, e.g. in the scheduler. The requirements of TCMalloc and the optimization work are diametral and the required work arounds are a maintainence burden. They end up as fragile constructs, which are blocking further optimization work and are pretty much guaranteed to cause more subtle issues down the road. The optimization work heavily depends on the generic entry code, which is not used by all architectures yet. So the rework preserved the original mechanism moslty unmodified to keep the support for architectures, which handle rseq in their own exit to user space loop. That code is currently optimized out by the compiler on architectures which use the generic entry code. This allows to revert back to the original behaviour by replacing the compile time constant conditions with a runtime condition where required, which disables the optimization and the dependend time slice extension feature until the run-time condition can be enabled in the RSEQ registration code on a per task basis again. The following changes are required to restore the original behavior, which makes TCMalloc work again: 1) Replace the compile time constant conditionals with runtime conditionals where appropriate to prevent the compiler from optimizing the legacy mode out 2) Enforce unconditional update of IDs on context switch for the non-optimized v1 mode 3) Enforce update of IDs in the pre signal delivery path for the non-optimized v1 mode 4) Enforce update of IDs in the membarrier(RSEQ) IPI for the non-optimized v1 mode 5) Make time slice and future extensions depend on optimized v2 mode This brings back the full performance problems, but preserves the v2 optimization code and for generic entry code using architectures also the TIF_RSEQ optimization which avoids a full evaluation of the exit to user mode loop in many cases. Fixes: 566d8015f7ee ("rseq: Avoid CPU/MM CID updates when no event pending") Reported-by: Mathias Stearn Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Closes: https://lore.kernel.org/CAHnCjA25b+nO2n5CeifknSKHssJpPrjnf+dtr7UgzRw4Zgu=oA@mail.gmail.com Link: https://patch.msgid.link/20260428224427.517051752%40kernel.org Cc: stable@vger.kernel.org --- include/linux/rseq.h | 35 ++++++++++++++++++++++++----------- include/linux/rseq_entry.h | 39 +++++++++++++++++++++++++++++---------- include/linux/rseq_types.h | 9 ++++++++- kernel/rseq.c | 40 +++++++++++++++++++++++++++++++++------- kernel/sched/membarrier.c | 11 ++++++++++- 5 files changed, 104 insertions(+), 30 deletions(-) diff --git a/include/linux/rseq.h b/include/linux/rseq.h index f446909551df..7ef79b25e714 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -9,6 +9,11 @@ void __rseq_handle_slowpath(struct pt_regs *regs); +static __always_inline bool rseq_v2(struct task_struct *t) +{ + return IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && likely(t->rseq.event.has_rseq > 1); +} + /* Invoked from resume_user_mode_work() */ static inline void rseq_handle_slowpath(struct pt_regs *regs) { @@ -16,8 +21,7 @@ static inline void rseq_handle_slowpath(struct pt_regs *regs) if (current->rseq.event.slowpath) __rseq_handle_slowpath(regs); } else { - /* '&' is intentional to spare one conditional branch */ - if (current->rseq.event.sched_switch & current->rseq.event.has_rseq) + if (current->rseq.event.sched_switch && current->rseq.event.has_rseq) __rseq_handle_slowpath(regs); } } @@ -30,9 +34,9 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs); */ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { - /* '&' is intentional to spare one conditional branch */ - if (current->rseq.event.has_rseq & current->rseq.event.user_irq) + if (rseq_v2(current)) { + /* has_rseq is implied in rseq_v2() */ + if (current->rseq.event.user_irq) __rseq_signal_deliver(ksig->sig, regs); } else { if (current->rseq.event.has_rseq) @@ -50,15 +54,22 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) { struct rseq_event *ev = &t->rseq.event; - if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) { + /* + * Only apply the user_irq optimization for RSEQ ABI V2 registrations. + * Legacy users like TCMalloc rely on the original ABI V1 behaviour + * which updates IDs on every context swtich. + */ + if (rseq_v2(t)) { /* - * Avoid a boat load of conditionals by using simple logic - * to determine whether NOTIFY_RESUME needs to be raised. + * Avoid a boat load of conditionals by using simple logic to + * determine whether TIF_NOTIFY_RESUME or TIF_RSEQ needs to be + * raised. * - * It's required when the CPU or MM CID has changed or - * the entry was from user space. + * It's required when the CPU or MM CID has changed or the entry + * was via interrupt from user space. ev->has_rseq does not have + * to be evaluated here because rseq_v2() implies has_rseq. */ - bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq; + bool raise = ev->user_irq | ev->ids_changed; if (raise) { ev->sched_switch = true; @@ -66,6 +77,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) } } else { if (ev->has_rseq) { + t->rseq.event.ids_changed = true; t->rseq.event.sched_switch = true; rseq_raise_notify_resume(t); } @@ -161,6 +173,7 @@ static inline unsigned int rseq_alloc_align(void) } #else /* CONFIG_RSEQ */ +static inline bool rseq_v2(struct task_struct *t) { return false; } static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_sched_switch_event(struct task_struct *t) { } diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index f11ebd34f8b9..934db41ec782 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -111,6 +111,20 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) t->rseq.slice.state.granted = false; } +/* + * Open coded, so it can be invoked within a user access region. + * + * This clears the user space state of the time slice extensions field only when + * the task has registered the optimized RSEQ_ABI V2. Some legacy registrations, + * e.g. TCMalloc, have conflicting non-ABI fields in struct RSEQ, which would be + * overwritten by an unconditional write. + */ +#define rseq_slice_clear_user(rseq, efault) \ +do { \ + if (rseq_slice_extension_enabled()) \ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); \ +} while (0) + static __always_inline bool __rseq_grant_slice_extension(bool work_pending) { struct task_struct *curr = current; @@ -230,6 +244,7 @@ static __always_inline bool rseq_slice_extension_enabled(void) { return false; } static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; } static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { } static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } +#define rseq_slice_clear_user(rseq, efault) do { } while (0) #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); @@ -517,11 +532,9 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); - /* Open coded, so it's in the same user access region */ - if (rseq_slice_extension_enabled()) { - /* Unconditionally clear it, no point in conditionals */ - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); - } + /* RSEQ ABI V2 only operations */ + if (rseq_v2(t)) + rseq_slice_clear_user(rseq, efault); } rseq_slice_clear_grant(t); @@ -612,6 +625,14 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t * interrupts disabled */ guard(pagefault)(); + /* + * This optimization is only valid when the task registered for the + * optimized RSEQ_ABI_V2 variant. Some legacy users rely on the original + * RSEQ implementation behaviour which unconditionally updated the IDs. + * rseq_sched_switch_event() ensures that legacy registrations always + * have both sched_switch and ids_changed set, which is compatible with + * the historical TIF_NOTIFY_RESUME behaviour. + */ if (likely(!t->rseq.event.ids_changed)) { struct rseq __user *rseq = t->rseq.usrptr; /* @@ -623,11 +644,9 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t scoped_user_rw_access(rseq, efault) { unsafe_get_user(csaddr, &rseq->rseq_cs, efault); - /* Open coded, so it's in the same user access region */ - if (rseq_slice_extension_enabled()) { - /* Unconditionally clear it, no point in conditionals */ - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); - } + /* RSEQ ABI V2 only operations */ + if (rseq_v2(t)) + rseq_slice_clear_user(rseq, efault); } rseq_slice_clear_grant(t); diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 0b42045988db..a469c1870849 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -9,6 +9,12 @@ #ifdef CONFIG_RSEQ struct rseq; +/* + * rseq_event::has_rseq contains the ABI version number so preserving it + * in AND operations requires a mask. + */ +#define RSEQ_HAS_RSEQ_VERSION_MASK 0xff + /** * struct rseq_event - Storage for rseq related event management * @all: Compound to initialize and clear the data efficiently @@ -17,7 +23,8 @@ struct rseq; * exit to user * @ids_changed: Indicator that IDs need to be updated * @user_irq: True on interrupt entry from user mode - * @has_rseq: True if the task has a rseq pointer installed + * @has_rseq: Greater than 0 if the task has a rseq pointer installed. + * Contains the RSEQ version number * @error: Compound error code for the slow path to analyze * @fatal: User space data corrupted or invalid * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME diff --git a/kernel/rseq.c b/kernel/rseq.c index 586f58f652c6..aa25753ea135 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -253,11 +253,14 @@ efault: static void rseq_slowpath_update_usr(struct pt_regs *regs) { /* - * Preserve rseq state and user_irq state. The generic entry code - * clears user_irq on the way out, the non-generic entry - * architectures are not having user_irq. + * Preserve has_rseq and user_irq state. The generic entry code clears + * user_irq on the way out, the non-generic entry architectures are not + * setting user_irq. */ - const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, }; + const struct rseq_event evt_mask = { + .has_rseq = RSEQ_HAS_RSEQ_VERSION_MASK, + .user_irq = true, + }; struct task_struct *t = current; struct rseq_ids ids; u32 node_id; @@ -330,8 +333,9 @@ void __rseq_handle_slowpath(struct pt_regs *regs) void __rseq_signal_deliver(int sig, struct pt_regs *regs) { rseq_stat_inc(rseq_stats.signal); + /* - * Don't update IDs, they are handled on exit to user if + * Don't update IDs yet, they are handled on exit to user if * necessary. The important thing is to abort a critical section of * the interrupted context as after this point the instruction * pointer in @regs points to the signal handler. @@ -344,6 +348,13 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs) current->rseq.event.error = 0; force_sigsegv(sig); } + + /* + * In legacy mode, force the update of IDs before returning to user + * space to stay compatible. + */ + if (!rseq_v2(current)) + rseq_force_update(); } /* @@ -408,6 +419,7 @@ efault: SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) { u32 rseqfl = 0; + u8 version = 1; if (flags & RSEQ_FLAG_UNREGISTER) { if (flags & ~RSEQ_FLAG_UNREGISTER) @@ -461,7 +473,11 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 if (!access_ok(rseq, rseq_len)) return -EFAULT; - if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { + /* + * The version check effectivly disables time slice extensions until the + * RSEQ ABI V2 registration are implemented. + */ + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION) && version > 1) { if (rseq_slice_extension_enabled()) { rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; if (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) @@ -484,7 +500,15 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault); unsafe_put_user(0U, &rseq->node_id, efault); unsafe_put_user(0U, &rseq->mm_cid, efault); - unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + + /* + * All fields past mm_cid are only valid for non-legacy v2 + * registrations. + */ + if (version > 1) { + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } } /* @@ -712,6 +736,8 @@ int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3) return -ENOTSUPP; if (!current->rseq.usrptr) return -ENXIO; + if (!rseq_v2(current)) + return -ENOTSUPP; /* No change? */ if (enable == !!current->rseq.slice.state.enabled) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 623445603725..226a6329f3e9 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -199,7 +199,16 @@ static void ipi_rseq(void *info) * is negligible. */ smp_mb(); - rseq_sched_switch_event(current); + /* + * Legacy mode requires that IDs are written and the critical section is + * evaluated. V2 optimized mode handles the critical section and IDs are + * only updated if they change as a consequence of preemption after + * return from this IPI. + */ + if (rseq_v2(current)) + rseq_sched_switch_event(current); + else + rseq_force_update(); } static void ipi_sync_rq_state(void *info) -- cgit v1.2.3 From 02b44d943b3adddc3a15c1da97045e205b7d14c1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Apr 2026 15:46:06 +0200 Subject: selftests/rseq: Skip tests if time slice extensions are not available Don't fail, skip the test if the extensions are not enabled at compile or runtime. Fixes: 830969e7821a ("selftests/rseq: Implement time slice extension test") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.597838491%40kernel.org Cc: stable@vger.kernel.org --- tools/testing/selftests/rseq/slice_test.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rseq/slice_test.c b/tools/testing/selftests/rseq/slice_test.c index 357122dcb487..77e668ff74d7 100644 --- a/tools/testing/selftests/rseq/slice_test.c +++ b/tools/testing/selftests/rseq/slice_test.c @@ -124,6 +124,13 @@ FIXTURE_SETUP(slice_ext) { cpu_set_t affinity; + if (rseq_register_current_thread()) + SKIP(return, "RSEQ not supported\n"); + + if (prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, + PR_RSEQ_SLICE_EXT_ENABLE, 0, 0)) + SKIP(return, "Time slice extension not supported\n"); + ASSERT_EQ(sched_getaffinity(0, sizeof(affinity), &affinity), 0); /* Pin it on a single CPU. Avoid CPU 0 */ @@ -137,11 +144,6 @@ FIXTURE_SETUP(slice_ext) break; } - ASSERT_EQ(rseq_register_current_thread(), 0); - - ASSERT_EQ(prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, - PR_RSEQ_SLICE_EXT_ENABLE, 0, 0), 0); - self->noise_params.noise_nsecs = variant->noise_nsecs; self->noise_params.sleep_nsecs = variant->sleep_nsecs; self->noise_params.run = 1; -- cgit v1.2.3 From d97cb2ef0b221b068e90b6058aa97faa0626bdab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 26 Apr 2026 18:13:54 +0200 Subject: selftests/rseq: Make registration flexible for legacy and optimized mode rseq_register_current_thread() either uses the glibc registered RSEQ region or registers it's own region with the legacy size of 32 bytes. That worked so far, but becomes a problem when the kernel implements a distinction between legacy and performance optimized behavior based on the registration size as that does not allow to test both modes with the self test suite. Add two arguments to the function. One to enforce that the registration is not using libc provided mode and one to tell the registration to use the legacy size and not the kernel advertised size. Rename it and make the original one a inline wrapper which preserves the existing behavior. Fixes: 566d8015f7ee ("rseq: Avoid CPU/MM CID updates when no event pending") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.677889423%40kernel.org Cc: stable@vger.kernel.org --- tools/testing/selftests/rseq/rseq-abi.h | 7 +++++- tools/testing/selftests/rseq/rseq.c | 39 +++++++++++++++------------------ tools/testing/selftests/rseq/rseq.h | 8 ++++++- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h index ecef315204b2..5f4ea2152c2f 100644 --- a/tools/testing/selftests/rseq/rseq-abi.h +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -191,10 +191,15 @@ struct rseq_abi { */ struct rseq_abi_slice_ctrl slice_ctrl; + /* + * Place holder to push the size above 32 bytes. + */ + __u8 __reserved; + /* * Flexible array member at end of structure, after last feature field. */ char end[]; -} __attribute__((aligned(4 * sizeof(__u64)))); +} __attribute__((aligned(256))); #endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index a736727b83c1..be0d0a97031e 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -56,6 +56,7 @@ ptrdiff_t rseq_offset; * unsuccessful. */ unsigned int rseq_size = -1U; +static unsigned int rseq_alloc_size; /* Flags used during rseq registration. */ unsigned int rseq_flags; @@ -115,29 +116,17 @@ bool rseq_available(void) } } -/* The rseq areas need to be at least 32 bytes. */ -static -unsigned int get_rseq_min_alloc_size(void) -{ - unsigned int alloc_size = rseq_size; - - if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) - alloc_size = ORIG_RSEQ_ALLOC_SIZE; - return alloc_size; -} - /* * Return the feature size supported by the kernel. * * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): * - * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). * * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. */ -static -unsigned int get_rseq_kernel_feature_size(void) +static unsigned int get_rseq_kernel_feature_size(void) { unsigned long auxv_rseq_feature_size, auxv_rseq_align; @@ -152,15 +141,24 @@ unsigned int get_rseq_kernel_feature_size(void) return ORIG_RSEQ_FEATURE_SIZE; } -int rseq_register_current_thread(void) +int __rseq_register_current_thread(bool nolibc, bool legacy) { + unsigned int size; int rc; if (!rseq_ownership) { /* Treat libc's ownership as a successful registration. */ - return 0; + return nolibc ? -EBUSY : 0; } - rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + + /* The minimal allocation size is 32, which is the legacy allocation size */ + size = get_rseq_kernel_feature_size(); + if (legacy || size < ORIG_RSEQ_ALLOC_SIZE) + rseq_alloc_size = ORIG_RSEQ_ALLOC_SIZE; + else + rseq_alloc_size = size; + + rc = sys_rseq(&__rseq.abi, rseq_alloc_size, 0, RSEQ_SIG); if (rc) { /* * After at least one thread has registered successfully @@ -179,9 +177,8 @@ int rseq_register_current_thread(void) * The first thread to register sets the rseq_size to mimic the libc * behavior. */ - if (RSEQ_READ_ONCE(rseq_size) == 0) { - RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); - } + if (RSEQ_READ_ONCE(rseq_size) == 0) + RSEQ_WRITE_ONCE(rseq_size, size); return 0; } @@ -194,7 +191,7 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, rseq_alloc_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index f51a5fdb0444..c62ebb9290c0 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -8,6 +8,7 @@ #ifndef RSEQ_H #define RSEQ_H +#include #include #include #include @@ -142,7 +143,12 @@ static inline struct rseq_abi *rseq_get_abi(void) * succeed. A restartable sequence executed from a non-registered * thread will always fail. */ -int rseq_register_current_thread(void); +int __rseq_register_current_thread(bool nolibc, bool legacy); + +static inline int rseq_register_current_thread(void) +{ + return __rseq_register_current_thread(false, false); +} /* * Unregister rseq for current thread. -- cgit v1.2.3 From 9b4e3495d1bd2469bf94b74930c153c2d534ddb7 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 20 Apr 2026 11:55:57 -0400 Subject: drm/amdkfd: Make all TLB-flushes heavy-weight With only one sequence number we cannot track the need for legacy vs heavy-weight flushes reliably. Always use heavy-weight. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher (cherry picked from commit c1a3ff1d327820cd9a52bc1056b98681fc088949) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f829d65a79b4..f95bf6d95534 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1360,7 +1360,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(peer_pdd); } kfree(devices_arr); @@ -1455,7 +1455,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, if (WARN_ON_ONCE(!peer_pdd)) continue; if (flush_tlb) - kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(peer_pdd); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ab3b2e7be9bd..9185ebe4c079 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -572,7 +572,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, @@ -610,7 +610,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) dev_err(dev, "Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -1284,7 +1284,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->adev, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } /* Take a safe reference to the mm_struct, which may otherwise diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 163d665a6074..7b5b12206919 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1554,13 +1554,13 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); void kfd_signal_process_terminate_event(struct kfd_process *p); -static inline void kfd_flush_tlb(struct kfd_process_device *pdd, - enum TLB_FLUSH_TYPE type) +static inline void kfd_flush_tlb(struct kfd_process_device *pdd) { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); + amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT, + pdd->dev->xcc_mask); } static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 38085a0a0f58..35ec67d9739b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1424,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(pdd); } return r; @@ -1571,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, } } - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } return r; -- cgit v1.2.3 From 7bbfb2559bcec39d1a4e1182d931a2046112c352 Mon Sep 17 00:00:00 2001 From: "John B. Moore" Date: Tue, 28 Apr 2026 11:35:12 -0500 Subject: drm/amdgpu/gfx9: drop unnecessary 64-bit fence flag check in KIQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT) assertion from gfx_v9_0_ring_emit_fence_kiq(). The KIQ hardware supports 64-bit fence writes; the 32-bit writeback address constraint is an upper-layer convention, not a hardware limitation. The check serves no purpose and should not be present. Found by code inspection while investigating related BUG_ON assertions in the GFX and compute ring emission paths. Reviewed-by: Christian König Signed-off-by: John B. Moore Signed-off-by: Alex Deucher (cherry picked from commit 1b1101a46a426bb4328116bb5273c326a2780389) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 95be105671ec..86c7c2a429b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | -- cgit v1.2.3 From 2a561b361b7681509710f3cfc3d95d54c87ac69f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Apr 2026 11:38:58 -0400 Subject: drm/amdgpu/pm: add missing revision check for CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ci_populate_all_memory_levels() workaround only applies to revision 0 SKUs. Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/1816 Fixes: 9f4b35411cfe ("drm/amd/powerplay: add CI asics support to smumgr (v3)") Reviewed-by: Timur Kristóf Reviewed-by: Kent Russell Signed-off-by: Alex Deucher (cherry picked from commit 1db15ba8f72f400bbad8ae0ce24fafc43429d4bd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 731355bdb9bc..0a3a0722b5c9 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -1333,8 +1333,9 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) dev_id = adev->pdev->device; - if ((dpm_table->mclk_table.count >= 2) - && ((dev_id == 0x67B0) || (dev_id == 0x67B1))) { + if ((dpm_table->mclk_table.count >= 2) && + ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && + (adev->pdev->revision == 0)) { smu_data->smc_state_table.MemoryLevel[1].MinVddci = smu_data->smc_state_table.MemoryLevel[0].MinVddci; smu_data->smc_state_table.MemoryLevel[1].MinMvdd = -- cgit v1.2.3 From 1987c79b4fe5789dfa14423e78b5c25f6acf3e9d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Apr 2026 10:42:49 -0400 Subject: drm/amdgpu/pm: align Hawaii mclk workaround with radeon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the hawaii mclk workaround with radeon and windows. Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/1816 Fixes: 9f4b35411cfe ("drm/amd/powerplay: add CI asics support to smumgr (v3)") Reviewed-by: Timur Kristóf Reviewed-by: Kent Russell Signed-off-by: Alex Deucher (cherry picked from commit 9649528b637f668c5af9f2b83ca4ad8576ae2121) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 0a3a0722b5c9..3650e7beeb67 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -1336,10 +1336,10 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) if ((dpm_table->mclk_table.count >= 2) && ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && (adev->pdev->revision == 0)) { - smu_data->smc_state_table.MemoryLevel[1].MinVddci = - smu_data->smc_state_table.MemoryLevel[0].MinVddci; - smu_data->smc_state_table.MemoryLevel[1].MinMvdd = - smu_data->smc_state_table.MemoryLevel[0].MinMvdd; + smu_data->smc_state_table.MemoryLevel[1].MinVddc = + smu_data->smc_state_table.MemoryLevel[0].MinVddc; + smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases = + smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases; } smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); -- cgit v1.2.3 From 17223816498f7b117d138d18eb0eba63604dc74e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Apr 2026 11:40:25 -0400 Subject: drm/radeon: add missing revision check for CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory level workarounds only apply to revision 0 SKUs. Link: https://gitlab.freedesktop.org/drm/amd/-/work_items/1816 Fixes: 127e056e2a82 ("drm/radeon: fix mclk vddc configuration for cards for hawaii") Fixes: 21b8a369046f ("drm/radeon: fix dram timing for certain hawaii boards") Fixes: 90b2fee35cb9 ("drm/radeon: fix dpm mc init for certain hawaii boards") Reviewed-by: Timur Kristóf Reviewed-by: Kent Russell Signed-off-by: Alex Deucher (cherry picked from commit 4d8dcc14311515077062b5740f39f427075de5c9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 22321eb95b7d..703848fac189 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { if ((memory_clock > 100000) && (memory_clock <= 125000)) { tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff; *dram_timimg2 &= ~0x00ff0000; @@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; if ((dpm_table->mclk_table.count >= 2) && - ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) { + ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { pi->smc_state_table.MemoryLevel[1].MinVddc = pi->smc_state_table.MemoryLevel[0].MinVddc; pi->smc_state_table.MemoryLevel[1].MinVddcPhases = @@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { for (i = 0; i < table->last; i++) { if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; -- cgit v1.2.3 From 78d2e624fa073c14970aa097adcf3ea31c157a66 Mon Sep 17 00:00:00 2001 From: "John B. Moore" Date: Mon, 27 Apr 2026 16:06:28 -0500 Subject: drm/amdgpu/sdma4: replace BUG_ON with WARN_ON in fence emission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sdma_v4_0_ring_emit_fence() contains two BUG_ON(addr & 0x3) assertions that verify fence writeback addresses are dword-aligned. These assertions can be reached from unprivileged userspace via crafted DRM_IOCTL_AMDGPU_CS submissions, causing a fatal kernel panic in a scheduler worker thread. Replace both BUG_ON() calls with WARN_ON() to log the condition without crashing the kernel. A misaligned fence address at this point indicates a driver bug, but crashing the kernel is never the correct response when the assertion is reachable from userspace. The CS IOCTL path is the correct place to filter invalid submissions; the ring emission callback is too late to do anything about it. Fixes: 2130f89ced2c ("drm/amdgpu: add SDMA v4.0 implementation (v2)") Reviewed-by: Christian König Signed-off-by: John B. Moore Signed-off-by: Alex Deucher (cherry picked from commit b90250bd933afd1ba94d86d6b13821997b22b18e) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 44f0f23e1148..e64f2f6df9a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se addr += 4; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); -- cgit v1.2.3 From e6c2e6c2e1fa066968a16aca1cb66cd1bdde7741 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 27 Apr 2026 09:30:23 -0400 Subject: drm/amdgpu: zero-initialize GART table on allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GART TLB is flushed after unmapping but not after mapping. Since amdgpu_bo_create_kernel() does not zero-initialize the buffer, when a single PTE is written the TLB may speculatively load other uninitialized entries from the same cacheline. Those garbage entries can appear valid, and a subsequent write to another PTE in the same cacheline may cause the GPU to use a stale garbage PTE from the TLB. Fix this by calling memset_io() to zero-initialize the GART table with gart_pte_flags immediately after allocation. Using AMDGPU_GEM_CREATE_VRAM_CLEARED, SDMA-based clear will not work since SDMA needs GART to be initialized to work. Suggested-by: Felix Kuehling Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit d9af8263b82b6eaa60c5718e0c6631c5037e4b24) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index bc772ca3dab7..b6f849d51c2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { + int r; + if (adev->gart.bo != NULL) return 0; - return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, - NULL, (void *)&adev->gart.ptr); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, + NULL, (void *)&adev->gart.ptr); + if (r) + return r; + + memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size); + return 0; } /** -- cgit v1.2.3 From 81665e35f143d93adef654f3be1360def9196e72 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 24 Apr 2026 13:47:01 -0500 Subject: drm/amdkfd: Check if there are kfd porcesses using adev by kfd_processes_count During gpu hot-unplug need check if there are kfd porcesses still using the being removed gpu before clean resources of the device. Current driver checks if kfd_processes_table is empty. kfd processes are not terminated after removed from kfd_processes_table immediately. They are still alive and may access the device until kfd_process_wq work queue got ran. Check kfd->kfd_processes_count value that is updated after kfd process got uninitialized when its ref becomes zero. Fixes: 6cca686dfce7 ("drm/amdkfd: kfd driver supports hot unplug/replug amdgpu devices") Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit d12d05c4bc4c15585130af43e897923ff292df7b) --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8ff97bf7d95a..b7f8f7ff8198 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr return false; } -/* check if there is kfd process still uses adev */ -static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev) -{ - struct kfd_process *p; - struct hlist_node *p_temp; - unsigned int temp; - struct kfd_node *dev; - - mutex_lock(&kfd_processes_mutex); - - if (hash_empty(kfd_processes_table)) { - mutex_unlock(&kfd_processes_mutex); - return true; - } - - /* check if there is device still use adev */ - hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { - for (int i = 0; i < p->n_pdds; i++) { - dev = p->pdds[i]->dev; - if (dev->adev == adev) { - mutex_unlock(&kfd_processes_mutex); - return false; - } - } - } - - mutex_unlock(&kfd_processes_mutex); - - return true; -} - /** kgd2kfd_teardown_processes - gracefully tear down existing * kfd processes that use adev * @@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev) mutex_unlock(&kfd_processes_mutex); /* wait all kfd processes use adev terminate */ - while (!kgd2kfd_check_device_idle(adev)) + while (!!atomic_read(&adev->kfd.dev->kfd_processes_count)) cond_resched(); } -- cgit v1.2.3 From 6da7b1242da4455b11c24ce667d1cab1a348c8ea Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 4 May 2026 18:21:17 +0530 Subject: drm/amdgpu/userq: fix access to stale wptr mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use drm_exec to take both locks i.e vm root bo and wptr_obj bo to access the mapping data properly. This fixes the security issue of unmap the wptr_obj while a queue creation is in progress and passing other bo at same address. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 1fc6c8ab45dbee096469c08c13f6099d57a52d6c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 95 ++++++++++++------------------ 1 file changed, 37 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2fc39a6938f6..5b4121ddc78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -30,34 +30,6 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE -static int -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) -{ - int ret; - - ret = amdgpu_bo_reserve(bo, true); - if (ret) { - DRM_ERROR("Failed to reserve bo. ret %d\n", ret); - goto err_reserve_bo_failed; - } - - ret = amdgpu_ttm_alloc_gart(&bo->tbo); - if (ret) { - DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto err_map_bo_gart_failed; - } - - amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); - - return 0; - -err_map_bo_gart_failed: - amdgpu_bo_unreserve(bo); -err_reserve_bo_failed: - return ret; -} - static int mes_userq_create_wptr_mapping(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uq_mgr, @@ -65,55 +37,62 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev, uint64_t wptr) { struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + struct amdgpu_bo *obj; + struct amdgpu_vm *vm = queue->vm; + struct drm_exec exec; int ret; - wptr_vm = queue->vm; - ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (ret) - return ret; - wptr &= AMDGPU_GMC_HOLE_MASK; - wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - DRM_ERROR("Failed to lookup wptr bo\n"); - return -EINVAL; - } - wptr_obj->obj = wptr_mapping->bo_va->base.bo; - if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { - DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); - return -EINVAL; - } + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT); + if (!wptr_mapping) { + ret = -EINVAL; + goto fail_lock; + } - ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); - if (ret) { - DRM_ERROR("Failed to map wptr bo to GART\n"); - return ret; + obj = wptr_mapping->bo_va->base.bo; + ret = drm_exec_lock_obj(&exec, &obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; } - ret = amdgpu_bo_reserve(wptr_obj->obj, true); - if (ret) { - DRM_ERROR("Failed to reserve wptr bo\n"); - return ret; + wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo); + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + ret = -EINVAL; + goto fail_map; } /* TODO use eviction fence instead of pinning. */ ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT); if (ret) { - drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n"); - goto unresv_bo; + DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret); + goto fail_map; + } + + ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto fail_map; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); - amdgpu_bo_unreserve(wptr_obj->obj); + drm_exec_fini(&exec); return 0; -unresv_bo: - amdgpu_bo_unreserve(wptr_obj->obj); +fail_map: + amdgpu_bo_unref(&wptr_obj->obj); +fail_lock: + drm_exec_fini(&exec); return ret; } -- cgit v1.2.3 From 4e02e0afa95f691dc7cc17538cdd648089a843f0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 13 Oct 2025 15:26:02 +0200 Subject: drm/amdgpu: nuke amdgpu_userq_fence_slab v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As preparation for independent fences remove the extra slab, kmalloc should do just fine. v2: use GFP_KERNEL instead of GFP_ATOMIC Signed-off-by: Christian König Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 0d831487b5be0ae59cac865a0aa87b0acc3dc717) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 28 ++++--------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 --- 3 files changed, 7 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 46aae3fad4bf..60debd543e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3149,11 +3149,7 @@ static int __init amdgpu_init(void) r = amdgpu_sync_init(); if (r) - goto error_sync; - - r = amdgpu_userq_fence_slab_init(); - if (r) - goto error_fence; + return r; amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -3161,7 +3157,7 @@ static int __init amdgpu_init(void) /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */ r = amdgpu_amdkfd_init(); if (r && r != -ENOENT) - goto error_fence; + goto error_fini_sync; if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); @@ -3172,10 +3168,8 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); -error_fence: +error_fini_sync: amdgpu_sync_fini(); - -error_sync: return r; } @@ -3186,7 +3180,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index da39ac862f37..e2d5f04296e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -32,29 +32,9 @@ #include "amdgpu.h" #include "amdgpu_userq_fence.h" -static const struct dma_fence_ops amdgpu_userq_fence_ops; -static struct kmem_cache *amdgpu_userq_fence_slab; - #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) -int amdgpu_userq_fence_slab_init(void) -{ - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", - sizeof(struct amdgpu_userq_fence), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!amdgpu_userq_fence_slab) - return -ENOMEM; - - return 0; -} - -void amdgpu_userq_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_userq_fence_slab); -} +static const struct dma_fence_ops amdgpu_userq_fence_ops; static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) { @@ -231,7 +211,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) { - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); return *userq_fence ? 0 : -ENOMEM; } @@ -342,7 +322,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu) amdgpu_userq_fence_driver_put(fence_drv); kvfree(userq_fence->fence_drv_array); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); } static void amdgpu_userq_fence_release(struct dma_fence *f) @@ -545,7 +525,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); if (r) { mutex_unlock(&userq_mgr->userq_mutex); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); goto put_gobj_write; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d56246ad8c26..d355a0eecc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -58,9 +58,6 @@ struct amdgpu_userq_fence_driver { char timeline_name[TASK_COMM_LEN]; }; -int amdgpu_userq_fence_slab_init(void); -void amdgpu_userq_fence_slab_fini(void); - void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, -- cgit v1.2.3 From 26f6654a9a60eb4d241f42a0ec85412e8821480b Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 23 Apr 2026 22:06:20 +0200 Subject: drm/exynos: remove bridge when component_add fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use devm_drm_bridge_add() so the bridge is released if probe fails after registration, and drop the manual drm_bridge_remove() in remove(). Check the return value of devm_drm_bridge_add(). Signed-off-by: Osama Abdelkader Fixes: 576d72fbfb45 ("drm/exynos: mic: add a bridge at probe") Cc: stable@vger.kernel.org Reviewed-by: Raphaël Gallais-Pou Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260423200622.325076-2-osama.abdelkader@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/exynos/exynos_drm_mic.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 29a8366513fa..e68c954ec3e6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -423,7 +423,9 @@ static int exynos_mic_probe(struct platform_device *pdev) mic->bridge.of_node = dev->of_node; - drm_bridge_add(&mic->bridge); + ret = devm_drm_bridge_add(dev, &mic->bridge); + if (ret) + goto err; pm_runtime_enable(dev); @@ -443,12 +445,8 @@ err: static void exynos_mic_remove(struct platform_device *pdev) { - struct exynos_mic *mic = platform_get_drvdata(pdev); - component_del(&pdev->dev, &exynos_mic_component_ops); pm_runtime_disable(&pdev->dev); - - drm_bridge_remove(&mic->bridge); } static const struct of_device_id exynos_mic_of_match[] = { -- cgit v1.2.3 From 3974ea1938406f9bfa7c1f48d4e43533f447bb08 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:30 +0100 Subject: firmware: arm_ffa: Bound PARTITION_INFO_GET_REGS copies The register-based PARTITION_INFO_GET path trusted the firmware-provided indices when copying partition descriptors into the caller buffer. Reject inconsistent counts or index progressions so the copy loop cannot write past the allocated array. Fixes: ba85c644ac8d ("firmware: arm_ffa: Add support for FFA_PARTITION_INFO_GET_REGS") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-6-8595ae450034@kernel.org (fixed cur_idx when exactly one descriptor in the first fragment) Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index a122814eb6d7..33b417e78684 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -323,6 +323,12 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, #define PART_INFO_ID_MASK GENMASK(15, 0) #define PART_INFO_EXEC_CXT_MASK GENMASK(31, 16) #define PART_INFO_PROPS_MASK GENMASK(63, 32) +#define FFA_PART_INFO_GET_REGS_FIRST_REG 3 +#define FFA_PART_INFO_GET_REGS_REGS_PER_DESC 3 +#define FFA_PART_INFO_GET_REGS_MAX_DESC \ + (((sizeof(ffa_value_t) / sizeof_field(ffa_value_t, a0)) - \ + FFA_PART_INFO_GET_REGS_FIRST_REG) / \ + FFA_PART_INFO_GET_REGS_REGS_PER_DESC) #define PART_INFO_ID(x) ((u16)(FIELD_GET(PART_INFO_ID_MASK, (x)))) #define PART_INFO_EXEC_CXT(x) ((u16)(FIELD_GET(PART_INFO_EXEC_CXT_MASK, (x)))) #define PART_INFO_PROPERTIES(x) ((u32)(FIELD_GET(PART_INFO_PROPS_MASK, (x)))) @@ -330,15 +336,13 @@ static int __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, struct ffa_partition_info *buffer, int num_parts) { - u16 buf_sz, start_idx, cur_idx, count = 0, prev_idx = 0, tag = 0; + u16 buf_sz, start_idx = 0, cur_idx, count = 0, tag = 0; struct ffa_partition_info *buf = buffer; ffa_value_t partition_info; do { __le64 *regs; - int idx; - - start_idx = prev_idx ? prev_idx + 1 : 0; + int idx, nr_desc, buf_idx; invoke_ffa_fn((ffa_value_t){ .a0 = FFA_PARTITION_INFO_GET_REGS, @@ -354,15 +358,28 @@ __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, count = PARTITION_COUNT(partition_info.a2); if (!buffer || !num_parts) /* count only */ return count; + if (count > num_parts) + return -EINVAL; cur_idx = CURRENT_INDEX(partition_info.a2); + if (cur_idx < start_idx || cur_idx >= count) + return -EINVAL; + + nr_desc = cur_idx - start_idx + 1; + if (nr_desc > FFA_PART_INFO_GET_REGS_MAX_DESC) + return -EINVAL; + + buf_idx = buf - buffer; + if (buf_idx + nr_desc > num_parts) + return -EINVAL; + tag = UUID_INFO_TAG(partition_info.a2); buf_sz = PARTITION_INFO_SZ(partition_info.a2); if (buf_sz > sizeof(*buffer)) buf_sz = sizeof(*buffer); regs = (void *)&partition_info.a3; - for (idx = 0; idx < cur_idx - start_idx + 1; idx++, buf++) { + for (idx = 0; idx < nr_desc; idx++, buf++) { union { uuid_t uuid; u64 regs[2]; @@ -380,7 +397,7 @@ __ffa_partition_info_get_regs(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, uuid_copy(&buf->uuid, &uuid_regs.uuid); regs += 3; } - prev_idx = cur_idx; + start_idx = cur_idx + 1; } while (cur_idx < (count - 1)); -- cgit v1.2.3 From 2af18f8e36b277730527cacc2256b1332f56aa28 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:31 +0100 Subject: firmware: arm_ffa: Keep framework RX release under lock The framework notification handler drops rx_lock before issuing FFA_RX_RELEASE, leaving a window where another RX-buffer user can start a new FF-A transaction before ownership has actually been returned to firmware. Move the FFA_RX_RELEASE calls so they execute while rx_lock is still held on both the kmemdup() failure path and the normal success path. While doing that, switch the handler to scoped_guard() to keep the critical section explicit. Fixes: 285a5ea0f542 ("firmware: arm_ffa: Add support for handling framework notifications") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-7-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 33b417e78684..d1e70866a425 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1492,25 +1492,22 @@ static void handle_fwk_notif_callbacks(u32 bitmap) if (!(bitmap & FRAMEWORK_NOTIFY_RX_BUFFER_FULL)) return; - mutex_lock(&drv_info->rx_lock); + scoped_guard(mutex, &drv_info->rx_lock) { + msg = drv_info->rx_buffer; + buf = kmemdup((void *)msg + msg->offset, msg->size, GFP_KERNEL); + if (!buf) { + ffa_rx_release(); + return; + } - msg = drv_info->rx_buffer; - buf = kmemdup((void *)msg + msg->offset, msg->size, GFP_KERNEL); - if (!buf) { - mutex_unlock(&drv_info->rx_lock); - return; + target = SENDER_ID(msg->send_recv_id); + if (msg->offset >= sizeof(*msg)) + uuid_copy(&uuid, &msg->uuid); + else + uuid_copy(&uuid, &uuid_null); + ffa_rx_release(); } - target = SENDER_ID(msg->send_recv_id); - if (msg->offset >= sizeof(*msg)) - uuid_copy(&uuid, &msg->uuid); - else - uuid_copy(&uuid, &uuid_null); - - mutex_unlock(&drv_info->rx_lock); - - ffa_rx_release(); - read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid); read_unlock(&drv_info->notify_lock); -- cgit v1.2.3 From 4a1cc9e96b311d2609a6f963a5e35bd4ae730d97 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:32 +0100 Subject: firmware: arm_ffa: Validate framework notification message layout Framework notifications carry an indirect message in the shared RX buffer. Validate the reported offset and size before using them, reject zero-length payloads, and ensure that any non-header payload starts at the UUID field rather than in the middle of the message header. Use the validated offset and size values for both kmemdup() and the UUID parsing path so malformed firmware data cannot drive an out-of-bounds read or an oversized allocation. Fixes: 285a5ea0f542 ("firmware: arm_ffa: Add support for handling framework notifications") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-8-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index d1e70866a425..7287423faceb 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1487,21 +1487,35 @@ static void handle_fwk_notif_callbacks(u32 bitmap) int notify_id = 0, target; struct ffa_indirect_msg_hdr *msg; struct notifier_cb_info *cb_info = NULL; + size_t min_offset = offsetof(struct ffa_indirect_msg_hdr, uuid); /* Only one framework notification defined and supported for now */ if (!(bitmap & FRAMEWORK_NOTIFY_RX_BUFFER_FULL)) return; scoped_guard(mutex, &drv_info->rx_lock) { + u32 offset, size; + msg = drv_info->rx_buffer; - buf = kmemdup((void *)msg + msg->offset, msg->size, GFP_KERNEL); + offset = msg->offset; + size = msg->size; + + if (!size || (offset != min_offset && offset < sizeof(*msg)) || + offset > drv_info->rxtx_bufsz || + size > drv_info->rxtx_bufsz - offset) { + pr_err("invalid framework notification message\n"); + ffa_rx_release(); + return; + } + + buf = kmemdup((void *)msg + offset, size, GFP_KERNEL); if (!buf) { ffa_rx_release(); return; } target = SENDER_ID(msg->send_recv_id); - if (msg->offset >= sizeof(*msg)) + if (offset >= sizeof(*msg)) uuid_copy(&uuid, &msg->uuid); else uuid_copy(&uuid, &uuid_null); -- cgit v1.2.3 From 0399e3f872ca3d78044bb715a73ea645806d2c7b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:33 +0100 Subject: firmware: arm_ffa: Align RxTx buffer size before mapping Commit 83210251fd70 ("firmware: arm_ffa: Use the correct buffer size during RXTX_MAP") advertises PAGE_ALIGN(rxtx_bufsz) to firmware when mapping the buffers but the driver continues to stores the minimum FF-A buffer size in drv_info->rxtx_bufsz which is used elsewhere in the driver. Align the size before storing it so that the allocation, validation and FFA_RXTX_MAP all use the same buffer size. Fixes: 83210251fd70 ("firmware: arm_ffa: Use the correct buffer size during RXTX_MAP") Cc: Sebastian Ene Link: https://sashiko.dev/#/patchset/20260402113939.930221-1-sebastianene@google.com Reviewed-by: Sebastian Ene Link: https://patch.msgid.link/20260428-ffa_fixes-v2-9-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 7287423faceb..66ed98e32bd6 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -2109,6 +2109,7 @@ static int __init ffa_init(void) rxtx_bufsz = SZ_4K; } + rxtx_bufsz = PAGE_ALIGN(rxtx_bufsz); drv_info->rxtx_bufsz = rxtx_bufsz; drv_info->rx_buffer = alloc_pages_exact(rxtx_bufsz, GFP_KERNEL); if (!drv_info->rx_buffer) { @@ -2124,7 +2125,7 @@ static int __init ffa_init(void) ret = ffa_rxtx_map(virt_to_phys(drv_info->tx_buffer), virt_to_phys(drv_info->rx_buffer), - PAGE_ALIGN(rxtx_bufsz) / FFA_PAGE_SIZE); + rxtx_bufsz / FFA_PAGE_SIZE); if (ret) { pr_err("failed to register FFA RxTx buffers\n"); goto free_pages; -- cgit v1.2.3 From 38290b180a4d5746baed796d49f88d56d2f336cd Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:34 +0100 Subject: firmware: arm_ffa: Snapshot notifier callbacks under lock Both notification handlers currently look up a notifier callback under notify_lock, drop the lock, and then dereference the returned notifier entry. A concurrent unregister can delete and free that entry in the gap, leaving the handler to dereference stale memory. Copy the callback pointer and callback data while notify_lock is still held and invoke the callback only after the lock is dropped. This keeps the existing callback execution model while removing the use-after-free window in both the framework and non-framework notification paths. Fixes: 285a5ea0f542 ("firmware: arm_ffa: Add support for handling framework notifications") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-10-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 66ed98e32bd6..98ead7ed28ca 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1463,20 +1463,25 @@ static int ffa_notify_send(struct ffa_device *dev, int notify_id, static void handle_notif_callbacks(u64 bitmap, enum notify_type type) { + ffa_notifier_cb cb; + void *cb_data; int notify_id; - struct notifier_cb_info *cb_info = NULL; for (notify_id = 0; notify_id <= FFA_MAX_NOTIFICATIONS && bitmap; notify_id++, bitmap >>= 1) { if (!(bitmap & 1)) continue; - read_lock(&drv_info->notify_lock); - cb_info = notifier_hnode_get_by_type(notify_id, type); - read_unlock(&drv_info->notify_lock); + scoped_guard(read_lock, &drv_info->notify_lock) { + struct notifier_cb_info *cb_info; + + cb_info = notifier_hnode_get_by_type(notify_id, type); + cb = cb_info ? cb_info->cb : NULL; + cb_data = cb_info ? cb_info->cb_data : NULL; + } - if (cb_info && cb_info->cb) - cb_info->cb(notify_id, cb_info->cb_data); + if (cb) + cb(notify_id, cb_data); } } @@ -1484,9 +1489,10 @@ static void handle_fwk_notif_callbacks(u32 bitmap) { void *buf; uuid_t uuid; + void *fwk_cb_data; int notify_id = 0, target; + ffa_fwk_notifier_cb fwk_cb; struct ffa_indirect_msg_hdr *msg; - struct notifier_cb_info *cb_info = NULL; size_t min_offset = offsetof(struct ffa_indirect_msg_hdr, uuid); /* Only one framework notification defined and supported for now */ @@ -1522,12 +1528,17 @@ static void handle_fwk_notif_callbacks(u32 bitmap) ffa_rx_release(); } - read_lock(&drv_info->notify_lock); - cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid); - read_unlock(&drv_info->notify_lock); + scoped_guard(read_lock, &drv_info->notify_lock) { + struct notifier_cb_info *cb_info; + + cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, + &uuid); + fwk_cb = cb_info ? cb_info->fwk_cb : NULL; + fwk_cb_data = cb_info ? cb_info->cb_data : NULL; + } - if (cb_info && cb_info->fwk_cb) - cb_info->fwk_cb(notify_id, cb_info->cb_data, buf); + if (fwk_cb) + fwk_cb(notify_id, fwk_cb_data, buf); kfree(buf); } -- cgit v1.2.3 From a6848a50404eefb6f0b131c21881a2d8d21b31a9 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Apr 2026 19:33:35 +0100 Subject: firmware: arm_ffa: Fix sched-recv callback partition lookup ffa_sched_recv_cb_update() used list_for_each_entry_safe() to search for a matching partition and then tested the iterator against NULL. That is not a valid end-of-list check for circular lists and can fall through with an invalid pointer. Use a normal iterator and detect the not-found case correctly before touching the partition state. Fixes: be61da938576 ("firmware: arm_ffa: Allow multiple UUIDs per partition to register SRI callback") Link: https://patch.msgid.link/20260428-ffa_fixes-v2-11-8595ae450034@kernel.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 98ead7ed28ca..b9f17fda7243 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1207,7 +1207,7 @@ static int ffa_sched_recv_cb_update(struct ffa_device *dev, ffa_sched_recv_cb callback, void *cb_data, bool is_registration) { - struct ffa_dev_part_info *partition = NULL, *tmp; + struct ffa_dev_part_info *partition = NULL; struct list_head *phead; bool cb_valid; @@ -1220,11 +1220,11 @@ ffa_sched_recv_cb_update(struct ffa_device *dev, ffa_sched_recv_cb callback, return -EINVAL; } - list_for_each_entry_safe(partition, tmp, phead, node) + list_for_each_entry(partition, phead, node) if (partition->dev == dev) break; - if (!partition) { + if (&partition->node == phead) { pr_err("%s: No such partition ID 0x%x\n", __func__, dev->vm_id); return -EINVAL; } -- cgit v1.2.3 From ac8eb3e18f41e2cc8492cc1d358bcb786c850270 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 5 May 2026 15:15:40 +0200 Subject: wifi: mac80211: use safe list iteration in radar detect work The call to ieee80211_dfs_cac_cancel can cause the iterated chanctx to be freed and removed from the list. Guard against this to avoid a slab-use-after-free error. Cc: stable@vger.kernel.org Fixes: bca8bc0399ac ("wifi: mac80211: handle ieee80211_radar_detected() for MLO") Signed-off-by: Benjamin Berg Link: https://patch.msgid.link/20260505151539.236d63a1b736.I35dbb9e96a2d4a480be208770fdd99ba3b817b79@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b093bc203c81..2529b01e2cd5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3700,11 +3700,11 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); struct cfg80211_chan_def chandef; - struct ieee80211_chanctx *ctx; + struct ieee80211_chanctx *ctx, *tmp; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) { + list_for_each_entry_safe(ctx, tmp, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; -- cgit v1.2.3 From 644132a48f4e28a1d949d162160869286f3e75de Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 08:49:48 -0400 Subject: selinux: prune /sys/fs/selinux/checkreqprot commit a7e4676e8e2cb ("selinux: remove the 'checkreqprot' functionality") removed the ability to modify the checkreqprot setting but left everything except the updating of the checkreqprot value intact. Aside from unnecessary processing, this could produce a local DoS from log spam and incorrectly calls selinux_ima_measure_state() on each write even though no state has changed. Prune it to just log an error message once and return count (i.e. all bytes written successfully) so that userspace never breaks. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 47 +++++++------------------------------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 83aa765a09f9..6f74f87cb2b0 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -689,46 +689,13 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *page; - ssize_t length; - unsigned int new_value; - - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, - SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, - NULL); - if (length) - return length; - - if (count >= PAGE_SIZE) - return -ENOMEM; - - /* No partial writes. */ - if (*ppos != 0) - return -EINVAL; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) - return PTR_ERR(page); - - if (sscanf(page, "%u", &new_value) != 1) { - length = -EINVAL; - goto out; - } - length = count; - - if (new_value) { - char comm[sizeof(current->comm)]; - - strscpy(comm, current->comm); - pr_err("SELinux: %s (%d) set checkreqprot to 1. This is no longer supported.\n", - comm, current->pid); - } - - selinux_ima_measure_state(); - -out: - kfree(page); - return length; + /* + * Setting checkreqprot is no longer supported, see + * https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-checkreqprot + */ + pr_err_once("SELinux: %s (%d) wrote to checkreqprot. This is no longer supported.\n", + current->comm, current->pid); + return count; } static const struct file_operations sel_checkreqprot_ops = { .read = sel_read_checkreqprot, -- cgit v1.2.3 From 19cfa0099024bb9cd40f6d950caa7f47ff8e77f6 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 08:49:49 -0400 Subject: selinux: prune /sys/fs/selinux/disable Commit f22f9aaf6c3d ("selinux: remove the runtime disable functionality") removed the underlying SELinux runtime disable functionality but left everything else intact and started logging an error message to warn any residual users. Prune it to just log an error message once and to return count (i.e. all bytes written successfully) to avoid breaking userspace. This also fixes a local DoS from logspam. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 6f74f87cb2b0..343303b73d6f 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -272,35 +272,13 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *page; - ssize_t length; - int new_value; - - if (count >= PAGE_SIZE) - return -ENOMEM; - - /* No partial writes. */ - if (*ppos != 0) - return -EINVAL; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) - return PTR_ERR(page); - - if (sscanf(page, "%d", &new_value) != 1) { - length = -EINVAL; - goto out; - } - length = count; - - if (new_value) { - pr_err("SELinux: https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable\n"); - pr_err("SELinux: Runtime disable is not supported, use selinux=0 on the kernel cmdline.\n"); - } - -out: - kfree(page); - return length; + /* + * Setting disable is no longer supported, see + * https://github.com/SELinuxProject/selinux-kernel/wiki/DEPRECATE-runtime-disable + */ + pr_err_once("SELinux: %s (%d) wrote to disable. This is no longer supported.\n", + current->comm, current->pid); + return count; } static const struct file_operations sel_disable_ops = { -- cgit v1.2.3 From ad1ac3d740cc6b858a99ab9c45c8c0574be7d1d3 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 08:49:50 -0400 Subject: selinux: prune /sys/fs/selinux/user Remove the previously deprecated /sys/fs/selinux/user interface aside from a residual stub for userspace compatibility. Commit d7b6918e22c7 ("selinux: Deprecate /sys/fs/selinux/user") started the deprecation process for /sys/fs/selinux/user: The selinuxfs "user" node allows userspace to request a list of security contexts that can be reached for a given SELinux user from a given starting context. This was used by libselinux when various login-style programs requested contexts for users, but libselinux stopped using it in 2020. Kernel support will be removed no sooner than Dec 2025. A pr_warn() message has been in place since Linux v6.13, and a 5 second sleep was introduced since Linux v6.17 to help make it more noticeable. We are now past the stated deadline of Dec 2025, so remove the underlying functionality and replace it with a stub that returns a '0\0' buffer to avoid breaking userspace. This also avoids a local DoS from logspam and an uninterruptible sleep delay. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- Documentation/ABI/obsolete/sysfs-selinux-user | 12 --- Documentation/ABI/removed/sysfs-selinux-user | 12 +++ security/selinux/include/security.h | 2 - security/selinux/selinuxfs.c | 68 ++------------ security/selinux/ss/services.c | 125 -------------------------- 5 files changed, 17 insertions(+), 202 deletions(-) delete mode 100644 Documentation/ABI/obsolete/sysfs-selinux-user create mode 100644 Documentation/ABI/removed/sysfs-selinux-user diff --git a/Documentation/ABI/obsolete/sysfs-selinux-user b/Documentation/ABI/obsolete/sysfs-selinux-user deleted file mode 100644 index 8ab7557f283f..000000000000 --- a/Documentation/ABI/obsolete/sysfs-selinux-user +++ /dev/null @@ -1,12 +0,0 @@ -What: /sys/fs/selinux/user -Date: April 2005 (predates git) -KernelVersion: 2.6.12-rc2 (predates git) -Contact: selinux@vger.kernel.org -Description: - - The selinuxfs "user" node allows userspace to request a list - of security contexts that can be reached for a given SELinux - user from a given starting context. This was used by libselinux - when various login-style programs requested contexts for - users, but libselinux stopped using it in 2020. - Kernel support will be removed no sooner than Dec 2025. diff --git a/Documentation/ABI/removed/sysfs-selinux-user b/Documentation/ABI/removed/sysfs-selinux-user new file mode 100644 index 000000000000..8ab7557f283f --- /dev/null +++ b/Documentation/ABI/removed/sysfs-selinux-user @@ -0,0 +1,12 @@ +What: /sys/fs/selinux/user +Date: April 2005 (predates git) +KernelVersion: 2.6.12-rc2 (predates git) +Contact: selinux@vger.kernel.org +Description: + + The selinuxfs "user" node allows userspace to request a list + of security contexts that can be reached for a given SELinux + user from a given starting context. This was used by libselinux + when various login-style programs requested contexts for + users, but libselinux stopped using it in 2020. + Kernel support will be removed no sooner than Dec 2025. diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index d1f16d7f684d..0babb8992181 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -312,8 +312,6 @@ int security_context_to_sid_default(const char *scontext, u32 scontext_len, int security_context_to_sid_force(const char *scontext, u32 scontext_len, u32 *sid); -int security_get_user_sids(u32 fromsid, const char *username, u32 **sids, u32 *nel); - int security_port_sid(u8 protocol, u16 port, u32 *out_sid); int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 343303b73d6f..6ed669309132 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1018,69 +1018,11 @@ out: static ssize_t sel_write_user(struct file *file, char *buf, size_t size) { - char *con = NULL, *user = NULL, *ptr; - u32 sid, *sids = NULL; - ssize_t length; - char *newcon; - int rc; - u32 i, len, nsids; - - pr_warn_ratelimited("SELinux: %s (%d) wrote to /sys/fs/selinux/user!" - " This will not be supported in the future; please update your" - " userspace.\n", current->comm, current->pid); - ssleep(5); - - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, - SECCLASS_SECURITY, SECURITY__COMPUTE_USER, - NULL); - if (length) - goto out; - - length = -ENOMEM; - con = kzalloc(size + 1, GFP_KERNEL); - if (!con) - goto out; - - length = -ENOMEM; - user = kzalloc(size + 1, GFP_KERNEL); - if (!user) - goto out; - - length = -EINVAL; - if (sscanf(buf, "%s %s", con, user) != 2) - goto out; - - length = security_context_str_to_sid(con, &sid, GFP_KERNEL); - if (length) - goto out; - - length = security_get_user_sids(sid, user, &sids, &nsids); - if (length) - goto out; - - length = sprintf(buf, "%u", nsids) + 1; - ptr = buf + length; - for (i = 0; i < nsids; i++) { - rc = security_sid_to_context(sids[i], &newcon, &len); - if (rc) { - length = rc; - goto out; - } - if ((length + len) >= SIMPLE_TRANSACTION_LIMIT) { - kfree(newcon); - length = -ERANGE; - goto out; - } - memcpy(ptr, newcon, len); - kfree(newcon); - ptr += len; - length += len; - } -out: - kfree(sids); - kfree(user); - kfree(con); - return length; + pr_err_once("SELinux: %s (%d) wrote to user. This is no longer supported.\n", + current->comm, current->pid); + buf[0] = '0'; + buf[1] = 0; + return 2; } static ssize_t sel_write_member(struct file *file, char *buf, size_t size) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index e8e7ccbd1e44..143021c5e326 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2746,131 +2746,6 @@ out: return rc; } -#define SIDS_NEL 25 - -/** - * security_get_user_sids - Obtain reachable SIDs for a user. - * @fromsid: starting SID - * @username: username - * @sids: array of reachable SIDs for user - * @nel: number of elements in @sids - * - * Generate the set of SIDs for legal security contexts - * for a given user that can be reached by @fromsid. - * Set *@sids to point to a dynamically allocated - * array containing the set of SIDs. Set *@nel to the - * number of elements in the array. - */ - -int security_get_user_sids(u32 fromsid, - const char *username, - u32 **sids, - u32 *nel) -{ - struct selinux_policy *policy; - struct policydb *policydb; - struct sidtab *sidtab; - struct context *fromcon, usercon; - u32 *mysids = NULL, *mysids2, sid; - u32 i, j, mynel, maxnel = SIDS_NEL; - struct user_datum *user; - struct role_datum *role; - struct ebitmap_node *rnode, *tnode; - int rc; - - *sids = NULL; - *nel = 0; - - if (!selinux_initialized()) - return 0; - - mysids = kcalloc(maxnel, sizeof(*mysids), GFP_KERNEL); - if (!mysids) - return -ENOMEM; - -retry: - mynel = 0; - rcu_read_lock(); - policy = rcu_dereference(selinux_state.policy); - policydb = &policy->policydb; - sidtab = policy->sidtab; - - context_init(&usercon); - - rc = -EINVAL; - fromcon = sidtab_search(sidtab, fromsid); - if (!fromcon) - goto out_unlock; - - rc = -EINVAL; - user = symtab_search(&policydb->p_users, username); - if (!user) - goto out_unlock; - - usercon.user = user->value; - - ebitmap_for_each_positive_bit(&user->roles, rnode, i) { - role = policydb->role_val_to_struct[i]; - usercon.role = i + 1; - ebitmap_for_each_positive_bit(&role->types, tnode, j) { - usercon.type = j + 1; - - if (mls_setup_user_range(policydb, fromcon, user, - &usercon)) - continue; - - rc = sidtab_context_to_sid(sidtab, &usercon, &sid); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out_unlock; - if (mynel < maxnel) { - mysids[mynel++] = sid; - } else { - rc = -ENOMEM; - maxnel += SIDS_NEL; - mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC); - if (!mysids2) - goto out_unlock; - memcpy(mysids2, mysids, mynel * sizeof(*mysids2)); - kfree(mysids); - mysids = mysids2; - mysids[mynel++] = sid; - } - } - } - rc = 0; -out_unlock: - rcu_read_unlock(); - if (rc || !mynel) { - kfree(mysids); - return rc; - } - - rc = -ENOMEM; - mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL); - if (!mysids2) { - kfree(mysids); - return rc; - } - for (i = 0, j = 0; i < mynel; i++) { - struct av_decision dummy_avd; - rc = avc_has_perm_noaudit(fromsid, mysids[i], - SECCLASS_PROCESS, /* kernel value */ - PROCESS__TRANSITION, AVC_STRICT, - &dummy_avd); - if (!rc) - mysids2[j++] = mysids[i]; - cond_resched(); - } - kfree(mysids); - *sids = mysids2; - *nel = j; - return 0; -} - /** * __security_genfs_sid - Helper to obtain a SID for a file in a filesystem * @policy: policy -- cgit v1.2.3 From a02cd6805562305f936e807da83e253b719dd965 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 5 May 2026 10:06:38 -0400 Subject: selinux: allow multiple opens of /sys/fs/selinux/policy Currently there can only be a single open of /sys/fs/selinux/policy at any time. This allows any process to block any other process from reading the kernel policy. The original motivation seems to have been a mix of preventing an inconsistent view of the policy size and preventing userspace from allocating kernel memory without bound, but this is arguably equally bad. Eliminate the policy_opened flag and shrink the critical section that the policy mutex is held. While we are making changes here, drop a couple of extraneous BUG_ONs. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/selinux/20100726193414.19538.64028.stgit@paris.rdu.redhat.com/ Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 6ed669309132..a43a38a3ae25 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -76,7 +76,6 @@ struct selinux_fs_info { int *bool_pending_values; struct dentry *class_dir; unsigned long last_class_ino; - bool policy_opened; unsigned long last_ino; struct super_block *sb; }; @@ -340,44 +339,31 @@ struct policy_load_memory { static int sel_open_policy(struct inode *inode, struct file *filp) { - struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; struct policy_load_memory *plm = NULL; int rc; - BUG_ON(filp->private_data); - - mutex_lock(&selinux_state.policy_mutex); - rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (rc) - goto err; - - rc = -EBUSY; - if (fsi->policy_opened) - goto err; + return rc; - rc = -ENOMEM; plm = kzalloc_obj(*plm); if (!plm) - goto err; + return -ENOMEM; + mutex_lock(&selinux_state.policy_mutex); rc = security_read_policy(&plm->data, &plm->len); if (rc) goto err; - if ((size_t)i_size_read(inode) != plm->len) { inode_lock(inode); i_size_write(inode, plm->len); inode_unlock(inode); } - - fsi->policy_opened = 1; + mutex_unlock(&selinux_state.policy_mutex); filp->private_data = plm; - mutex_unlock(&selinux_state.policy_mutex); - return 0; err: mutex_unlock(&selinux_state.policy_mutex); @@ -390,13 +376,8 @@ err: static int sel_release_policy(struct inode *inode, struct file *filp) { - struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; - BUG_ON(!plm); - - fsi->policy_opened = 0; - vfree(plm->data); kfree(plm); -- cgit v1.2.3 From 868f31e4061eca8c3cd607d79d954d5e54f204aa Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 30 Apr 2026 14:36:52 -0400 Subject: selinux: shrink critical section in sel_write_load() Currently sel_write_load() takes the policy mutex earlier than necessary. Move the taking of the mutex later. This avoids holding it unnecessarily across the vmalloc() and copy_from_user() of the policy data. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index a43a38a3ae25..25ca7d714014 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -553,34 +553,31 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (!count) return -EINVAL; - mutex_lock(&selinux_state.policy_mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); if (length) - goto out; + return length; data = vmalloc(count); - if (!data) { - length = -ENOMEM; - goto out; - } + if (!data) + return -ENOMEM; if (copy_from_user(data, buf, count) != 0) { length = -EFAULT; goto out; } + mutex_lock(&selinux_state.policy_mutex); length = security_load_policy(data, count, &load_state); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); - goto out; + goto out_unlock; } fsi = file_inode(file)->i_sb->s_fs_info; length = sel_make_policy_nodes(fsi, load_state.policy); if (length) { pr_warn_ratelimited("SELinux: failed to initialize selinuxfs\n"); selinux_policy_cancel(&load_state); - goto out; + goto out_unlock; } selinux_policy_commit(&load_state); @@ -590,8 +587,9 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); -out: +out_unlock: mutex_unlock(&selinux_state.policy_mutex); +out: vfree(data); return length; } -- cgit v1.2.3 From 60a1e131a811b68703da58fd805ab359b704ab03 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 16 Apr 2026 15:17:19 -0300 Subject: drm/xe/hdcp: Add NULL check for media_gt in intel_hdcp_gsc_check_status() When media GT is disabled via configfs, there is no allocation for media_gt, which is kept as NULL. In such scenario, intel_hdcp_gsc_check_status() results in a kernel pagefault error due to >->uc.gsc being evaluated as an invalid memory address. Fix that by introducing a NULL check on media_gt and bailing out early if so. While at it, also drop the NULL check for gsc, since it can't be NULL if media_gt is not NULL. v2: - Get address for gsc only after checking that gt is not NULL. (Shuicheng) - Drop the NULL check for gsc. (Shuicheng) v3: - Add "Fixes" and "Cc: " tags. (Matt) Fixes: 4af50beb4e0f ("drm/xe: Use gsc_proxy_init_done to check proxy status") Cc: # v6.10+ Reviewed-by: Matt Roper Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260416-check-for-null-media_gt-in-intel_hdcp_gsc_check_status-v2-1-9adb9fd3b621@intel.com Signed-off-by: Gustavo Sousa (cherry picked from commit bfaf87e84ca3ca3f6e275f9ae56da47a8b55ffd1) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 29c72aa4b0d2..33494b86205d 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -37,9 +37,17 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; - struct xe_gsc *gsc = >->uc.gsc; + struct xe_gsc *gsc; + + if (!gt) { + drm_dbg_kms(&xe->drm, + "not checking GSC status for HDCP2.x: media GT not present or disabled\n"); + return false; + } + + gsc = >->uc.gsc; - if (!gsc || !xe_uc_fw_is_available(&gsc->fw)) { + if (!xe_uc_fw_is_available(&gsc->fw)) { drm_dbg_kms(&xe->drm, "GSC Components not ready for HDCP2.x\n"); return false; -- cgit v1.2.3 From d01012c740bbb298b957e30cc0848e482c6f486f Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 28 Apr 2026 20:14:48 +0000 Subject: drm/xe/pf: Fix EAGAIN sign in pf_migration_consume() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PTR_ERR() returns a negative value, so comparing against the positive EAGAIN is always true for ERR_PTR(-EAGAIN), causing pf_migration_consume() to bail out instead of continuing to the remaining GTs. On multi-GT platforms this can skip GTs that already have data ready. Compare against -EAGAIN to match the intent (and the following line that correctly uses -EAGAIN). While at it, gate PTR_ERR() with IS_ERR(). v2: add IS_ERR() guard before PTR_ERR(). (Gustavo) Fixes: 67df4a5cbc58 ("drm/xe/pf: Add data structures and handlers for migration rings") Cc: Michał Winiarski Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20260428201448.3999428-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 9d770e72e1edb54beacfce5f402edb51632811e3) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/xe_sriov_pf_migration.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c index 6c4b16409cc9..150a241110fb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid) for_each_gt(gt, xe, gt_id) { data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); - if (data && PTR_ERR(data) != EAGAIN) + if (!data) + continue; + if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN) return data; - if (PTR_ERR(data) == -EAGAIN) - more_data = true; + more_data = true; } if (!more_data) -- cgit v1.2.3 From b87951a0ae9f95ca6590bf0939edced7d36929dd Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 29 Apr 2026 19:22:59 +0000 Subject: drm/xe/pf: Fix MMIO access using PF view instead of VF view during migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pf_migration_mmio_save() and pf_migration_mmio_restore() initialize a local VF-specific MMIO view via xe_mmio_init_vf_view() but then pass >->mmio (the PF base) to all xe_mmio_read32()/xe_mmio_write32() calls instead of the local &mmio. This causes the PF own SW flag registers to be saved/restored rather than the target VF registers, silently corrupting migration state. Use the VF MMIO view for all register accesses, matching the correct pattern used in pf_clear_vf_scratch_regs(). Fixes: b7c1b990f719 ("drm/xe/pf: Handle MMIO migration data as part of PF control") Cc: Michał Winiarski Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Michal Wajdeczko Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260429192259.4009211-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 7d9c39cfb31ff389490ca1308767c2807a9829a6) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 87a164efcc33..01fe03b9efe8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n)); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n)); return 0; } @@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]); return 0; } -- cgit v1.2.3 From b29987dfd943e655df6e3b641ecffad5cc1509c2 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Mon, 4 May 2026 09:49:26 +0000 Subject: drm/xe/guc: Exclude indirect ring state page from ADS engine state size The engine state size reported to GuC via ADS should only include the engine state portion and should not include the indirect ring state page that comes after it in the context image. The GuC uses this size to overwrite the engine state in the LRC on watchdog resets and we don't want it to overwrite the indirect ring state as well. Fixes: d6219e1cd5e3 ("drm/xe: Add Indirect Ring State support") Suggested-by: Daniele Ceraolo Spurio Signed-off-by: Satyanarayana K V P Cc: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260504094924.3760713-4-satyanarayana.k.v.p@intel.com (cherry picked from commit 3ec5f003f6c377beda8bd5438941f5a7795e1848) Signed-off-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_ads.c | 5 +---- drivers/gpu/drm/xe/xe_lrc.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_lrc.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 81b5f01b1f65..2b835d48b565 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -512,12 +512,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads) * that starts after the execlists LRC registers. This is * required to allow the GuC to restore just the engine state * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). */ ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); + xe_lrc_engine_state_size(gt, class)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], addr_ggtt); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c725cde4508d..4af9f0d7c6f3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -746,9 +746,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe) return 80 * sizeof(u32); } -size_t xe_lrc_skip_size(struct xe_device *xe) +/** + * xe_lrc_engine_state_size() - Get size of the engine state within LRC + * @gt: the &xe_gt struct instance + * @class: Hardware engine class + * + * Returns: Size of the engine state + */ +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) { - return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); + return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e7c975f9e2d9..5440663183f6 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -130,7 +130,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_reg_size(struct xe_device *xe); -size_t xe_lrc_skip_size(struct xe_device *xe); +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, -- cgit v1.2.3 From 901a7d9e2f280a9e76e6c58406a519cb11ad5ff8 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Sun, 3 May 2026 21:25:16 +0200 Subject: ipv6: default IPV6_SIT to m This basically defaulted to m until recently, since IPV6 defaulted to m. Since IPV6 was changed to a boolean with a default of y, IPV6_SIT started defaulting to built-in as well. This results in a surprise sit0 device by default for defconfig (and defconfig-derived config) users at boot. For me, this broke an (admittedly non-robust) script. Preserve the behaviour of most configs by avoiding building this module, that's probably overall seldom used compared to IPv6 as a whole, into the kernel. Fixes: 309b905deee59 ("ipv6: convert CONFIG_IPV6 to built-in only and clean up Kconfigs") Signed-off-by: Alyssa Ross Reviewed-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260503192515.290900-2-hi@alyssa.is Signed-off-by: Jakub Kicinski --- net/ipv6/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index c024aa77f25b..c3806c6ac96f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -164,7 +164,7 @@ config IPV6_SIT select INET_TUNNEL select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE - default y + default m help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -172,7 +172,7 @@ config IPV6_SIT into IPv4 packets. This is useful if you want to connect two IPv6 networks over an IPv4-only path. - Saying M here will produce a module called sit. If unsure, say Y. + Saying M here will produce a module called sit. If unsure, say M. config IPV6_SIT_6RD bool "IPv6: IPv6 Rapid Deployment (6RD)" -- cgit v1.2.3 From 5ad509c1fdad4bf0993b72d1b3d462f036d8a0d8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 4 May 2026 06:43:13 +0000 Subject: ipv6: Fix null-ptr-deref in fib6_mtu(). syzbot reported null-ptr-deref in fib6_mtu(). [0] When res->f6i->fib6_pmtu is 0 in fib6_mtu(), it fetches MTU from __in6_dev_get(nh->fib_nh_dev)->cnf.mtu6. However, __in6_dev_get() could return NULL when the device is being unregistered. Let's return 0 MTU if __in6_dev_get() returns NULL in fib6_mtu(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc00000000bc: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x00000000000005e0-0x00000000000005e7] CPU: 0 UID: 0 PID: 7890 Comm: syz.2.502 Tainted: G L syzkaller #0 PREEMPT(full) Tainted: [L]=SOFTLOCKUP Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:fib6_mtu net/ipv6/route.c:1648 [inline] RIP: 0010:rt6_insert_exception+0x9eb/0x10a0 net/ipv6/route.c:1753 Code: 3b 14 cf f7 45 85 f6 0f 85 1d 02 00 00 e8 7d 19 cf f7 48 8d bb e0 05 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 89 RSP: 0000:ffffc9000610f120 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffc9000c001000 RDX: 00000000000000bc RSI: ffffffff8a38bc83 RDI: 00000000000005e0 RBP: ffff888052f06000 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888042d16c00 R13: ffff888042d16cc8 R14: 0000000000000001 R15: 0000000000000500 FS: 0000000000000000(0000) GS:ffff88809717d000(0063) knlGS:00000000f540db40 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 00000000f73c6d50 CR3: 000000006eff0000 CR4: 0000000000352ef0 Call Trace: __ip6_rt_update_pmtu+0x555/0xd60 net/ipv6/route.c:2982 ip6_update_pmtu+0x34f/0x3b0 net/ipv6/route.c:3014 icmpv6_err+0x2a2/0x3f0 net/ipv6/icmp.c:82 icmpv6_notify+0x35e/0x820 net/ipv6/icmp.c:1087 icmpv6_rcv+0x10bf/0x1ae0 net/ipv6/icmp.c:1228 ip6_protocol_deliver_rcu+0xf97/0x1500 net/ipv6/ip6_input.c:478 ip6_input_finish+0x1e4/0x4a0 net/ipv6/ip6_input.c:529 NF_HOOK include/linux/netfilter.h:318 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] ip6_input+0x105/0x2f0 net/ipv6/ip6_input.c:540 ip6_mc_input+0x513/0xf50 net/ipv6/ip6_input.c:630 dst_input include/net/dst.h:480 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:119 [inline] NF_HOOK include/linux/netfilter.h:318 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] ipv6_rcv+0x34c/0x3d0 net/ipv6/ip6_input.c:351 __netif_receive_skb_one_core+0x12d/0x1e0 net/core/dev.c:6202 __netif_receive_skb+0x1f/0x120 net/core/dev.c:6315 netif_receive_skb_internal net/core/dev.c:6401 [inline] netif_receive_skb+0x13b/0x7f0 net/core/dev.c:6460 tun_rx_batched.isra.0+0x3f6/0x750 drivers/net/tun.c:1511 tun_get_user+0x1e31/0x3c20 drivers/net/tun.c:1955 tun_chr_write_iter+0xdc/0x200 drivers/net/tun.c:2001 new_sync_write fs/read_write.c:595 [inline] vfs_write+0x6ac/0x1070 fs/read_write.c:688 ksys_write+0x12a/0x250 fs/read_write.c:740 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] do_int80_emulation+0x141/0x700 arch/x86/entry/syscall_32.c:172 asm_int80_emulation+0x1a/0x20 arch/x86/include/asm/idtentry.h:621 RIP: 0023:0xf715616b Code: 57 56 53 8b 44 24 14 f6 00 08 75 23 8b 44 24 18 8b 5c 24 1c 8b 4c 24 20 8b 54 24 24 8b 74 24 28 8b 7c 24 2c 8b 6c 24 30 cd 80 <5b> 5e 5f 5d c3 5b 5e 5f 5d e9 f7 a1 ff ff 66 90 66 90 66 90 90 53 RSP: 002b:00000000f540d44c EFLAGS: 00000246 ORIG_RAX: 0000000000000004 RAX: ffffffffffffffda RBX: 00000000000000c8 RCX: 0000000080000640 RDX: 000000000000007a RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: dcd1f572954f ("net/ipv6: Remove fib6_idev") Reported-by: syzbot+01f005f9c6387ca6f6dd@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69f83f22.170a0220.13cc2.0004.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260504064316.3820775-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0dc0316530ca..e3d355d1fbd6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1645,6 +1645,10 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); + if (!idev) { + rcu_read_unlock(); + return 0; + } mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } -- cgit v1.2.3 From 07f44433355f70fa97d4c44b4c0d2e86adc082fb Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 May 2026 14:06:08 +0530 Subject: bnxt_en: Delay for 5 seconds after AER DPC for all chips The FW on all chips is requiring a 5-second delay after Downstream Port Containment (DPC) AER. The previously added 900 msec delay was not long enough in all cases because the chip's CRS (Configuration Request Retry Status) mechanism is not always reliable. Fixes: d5ab32e9b02d ("bnxt_en: Add delay to handle Downstream Port Containment (DPC) AER") Reviewed-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: Pavan Chebbi Link: https://patch.msgid.link/20260504083611.1383776-2-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8c55874f44ca..3db951d0c690 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -17360,9 +17360,14 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) netdev_info(bp->dev, "PCI Slot Reset\n"); - if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && - test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) - msleep(900); + if (test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) { + /* After DPC, the chip should return CRS when the vendor ID + * config register is read until it is ready. On all chips, + * this is not happening reliably so add a 5-second delay as a + * workaround. + */ + msleep(5000); + } netdev_lock(netdev); -- cgit v1.2.3 From 54c28fab2fa5afd681c9c4b10f4f6da1efdd397a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 4 May 2026 14:06:09 +0530 Subject: bnxt_en: Set bp->max_tpa according to what the FW supports Fix the logic to set bp->max_tpa no higher than what the FW supports. On P5 chips, some older FW sets max_tpa very low so we override it to prevent performance regressions with the older FW. Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.") Reviewed-by: Kalesh AP Reviewed-by: Colin Winegarden Reviewed-by: Rukhsana Ansari Signed-off-by: Michael Chan Signed-off-by: Pavan Chebbi Link: https://patch.msgid.link/20260504083611.1383776-3-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3db951d0c690..008c34cff7b4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3825,7 +3825,10 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (!bp->max_tpa_v2) return 0; - bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + bp->max_tpa = min_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + /* Older P5 FW sets max_tpa_v2 low by mistake except NPAR */ + if (bp->max_tpa <= 32 && BNXT_CHIP_P5(bp) && !BNXT_NPAR(bp)) + bp->max_tpa = MAX_TPA_P5; } for (i = 0; i < bp->rx_nr_rings; i++) { -- cgit v1.2.3 From 16517bc98a56004274472cc9949194cb4d2ad0b7 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 4 May 2026 14:06:10 +0530 Subject: bnxt_en: Check return value of bnxt_hwrm_vnic_cfg When the bnxt RDMA driver is loaded, it calls bnxt_register_dev(). As part of this, driver sends HWRM_VNIC_CFG firmware command to configure the VNIC to operate in dual VNIC mode. Currently the driver ignores the result of this firmware command. The RDMA driver must know the result since it affects its functioning. Check return value of call to bnxt_hwrm_vnic_cfg() in bnxt_register_dev() and return failure on error. Fixes: a588e4580a7e ("bnxt_en: Add interface to support RDMA driver.") Reviewed-by: Michael Chan Signed-off-by: Kalesh AP Signed-off-by: Pavan Chebbi Link: https://patch.msgid.link/20260504083611.1383776-4-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 052bf69cfa4c..5c751933da6a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -175,8 +175,14 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, ulp->handle = handle; rcu_assign_pointer(ulp->ulp_ops, ulp_ops); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { + rc = bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + if (rc) { + netdev_err(dev, "Failed to configure dual VNIC mode\n"); + RCU_INIT_POINTER(ulp->ulp_ops, NULL); + goto exit; + } + } edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); -- cgit v1.2.3 From bd279e104e5f5400307d56116a36756b35ab345a Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 4 May 2026 14:06:11 +0530 Subject: bnxt_en: Use absolute target ns from ptp_clock_request There is no need to calculate the target PHC cycles required to make phase adjustment on the PPS OUT signal. This is because the application supplies absolute n_sec value in the future and is already the actual desired target value. Remove the unnecessary code. Fixes: 9e518f25802c ("bnxt_en: 1PPS functions to configure TSIO pins") Reviewed-by: Kalesh AP Cc: Richard Cochran Signed-off-by: Pavan Chebbi Reviewed-by: Vadim Fedorenko Tested-by: Vadim Fedorenko Link: https://patch.msgid.link/20260504083611.1383776-5-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 29 +++++---------------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 53f336db4fcc..5d41dc1bc782 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -419,31 +419,13 @@ void bnxt_ptp_reapply_pps(struct bnxt *bp) } } -static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns, - u64 *cycles_delta) -{ - u64 cycles_now; - u64 nsec_now, nsec_delta; - int rc; - - rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now); - if (rc) - return rc; - - nsec_now = bnxt_timecounter_cyc2time(ptp, cycles_now); - - nsec_delta = target_ns - nsec_now; - *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult); - return 0; -} - static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, struct ptp_clock_request *rq) { struct hwrm_func_ptp_cfg_input *req; struct bnxt *bp = ptp->bp; struct timespec64 ts; - u64 target_ns, delta; + u64 target_ns; u16 enables; int rc; @@ -451,10 +433,6 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, ts.tv_nsec = rq->perout.start.nsec; target_ns = timespec64_to_ns(&ts); - rc = bnxt_get_target_cycles(ptp, target_ns, &delta); - if (rc) - return rc; - rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG); if (rc) return rc; @@ -468,7 +446,10 @@ static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp, req->ptp_freq_adj_dll_phase = 0; req->ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC); req->ptp_freq_adj_ext_up = 0; - req->ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta); + req->ptp_freq_adj_ext_phase_lower = + cpu_to_le32(lower_32_bits(target_ns)); + req->ptp_freq_adj_ext_phase_upper = + cpu_to_le32(upper_32_bits(target_ns)); return hwrm_req_send(bp, req); } -- cgit v1.2.3 From f83e07b29246f468bc7c99f98ca1897843fa8167 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 May 2026 16:38:42 +0000 Subject: net/sched: sch_fq_codel: annotate data-races from fq_codel_dump_class_stats() fq_codel_dump_class_stats() acquires qdisc spinlock only when requested to follow flow->head chain. As we did in sch_cake recently, add the missing READ_ONCE()/WRITE_ONCE() annotations. Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260504163842.1162001-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq_codel.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 0664b2f2d6f2..24db54684e8a 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -117,7 +117,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) { struct sk_buff *skb = flow->head; - flow->head = skb->next; + WRITE_ONCE(flow->head, skb->next); skb_mark_not_on_list(skb); return skb; } @@ -127,7 +127,7 @@ static inline void flow_queue_add(struct fq_codel_flow *flow, struct sk_buff *skb) { if (flow->head == NULL) - flow->head = skb; + WRITE_ONCE(flow->head, skb); else flow->tail->next = skb; flow->tail = skb; @@ -173,8 +173,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, } while (++i < max_packets && len < threshold); /* Tell codel to increase its signal strength also */ - flow->cvars.count += i; - q->backlogs[idx] -= len; + WRITE_ONCE(flow->cvars.count, flow->cvars.count + i); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] - len); q->memory_usage -= mem; sch->qstats.drops += i; sch->qstats.backlog -= len; @@ -204,13 +204,13 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, codel_set_enqueue_time(skb); flow = &q->flows[idx]; flow_queue_add(flow, skb); - q->backlogs[idx] += qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[idx], q->backlogs[idx] + qdisc_pkt_len(skb)); qdisc_qstats_backlog_inc(sch, skb); if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; - flow->deficit = q->quantum; + WRITE_ONCE(flow->deficit, q->quantum); } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -263,7 +263,8 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) flow = container_of(vars, struct fq_codel_flow, cvars); if (flow->head) { skb = dequeue_head(flow); - q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); + WRITE_ONCE(q->backlogs[flow - q->flows], + q->backlogs[flow - q->flows] - qdisc_pkt_len(skb)); q->memory_usage -= get_codel_cb(skb)->mem_usage; sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -296,7 +297,7 @@ begin: flow = list_first_entry(head, struct fq_codel_flow, flowchain); if (flow->deficit <= 0) { - flow->deficit += q->quantum; + WRITE_ONCE(flow->deficit, flow->deficit + q->quantum); list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } @@ -314,7 +315,7 @@ begin: goto begin; } qdisc_bstats_update(sch, skb); - flow->deficit -= qdisc_pkt_len(skb); + WRITE_ONCE(flow->deficit, flow->deficit - qdisc_pkt_len(skb)); if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, @@ -328,7 +329,7 @@ begin: static void fq_codel_flow_purge(struct fq_codel_flow *flow) { rtnl_kfree_skbs(flow->head, flow->tail); - flow->head = NULL; + WRITE_ONCE(flow->head, NULL); } static void fq_codel_reset(struct Qdisc *sch) @@ -656,21 +657,21 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, memset(&xstats, 0, sizeof(xstats)); xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; - xstats.class_stats.deficit = flow->deficit; + xstats.class_stats.deficit = READ_ONCE(flow->deficit); xstats.class_stats.ldelay = - codel_time_to_us(flow->cvars.ldelay); - xstats.class_stats.count = flow->cvars.count; - xstats.class_stats.lastcount = flow->cvars.lastcount; - xstats.class_stats.dropping = flow->cvars.dropping; - if (flow->cvars.dropping) { - codel_tdiff_t delta = flow->cvars.drop_next - + codel_time_to_us(READ_ONCE(flow->cvars.ldelay)); + xstats.class_stats.count = READ_ONCE(flow->cvars.count); + xstats.class_stats.lastcount = READ_ONCE(flow->cvars.lastcount); + xstats.class_stats.dropping = READ_ONCE(flow->cvars.dropping); + if (xstats.class_stats.dropping) { + codel_tdiff_t delta = READ_ONCE(flow->cvars.drop_next) - codel_get_time(); xstats.class_stats.drop_next = (delta >= 0) ? codel_time_to_us(delta) : -codel_time_to_us(-delta); } - if (flow->head) { + if (READ_ONCE(flow->head)) { sch_tree_lock(sch); skb = flow->head; while (skb) { @@ -679,7 +680,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, } sch_tree_unlock(sch); } - qs.backlog = q->backlogs[idx]; + qs.backlog = READ_ONCE(q->backlogs[idx]); qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) -- cgit v1.2.3 From 54d54f33813d7911555226b4220737177a2ba8d6 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sat, 14 Mar 2026 18:54:30 +0530 Subject: powerpc/pseries/htmdump: Free the global buffers in htmdump module exit htmdump modules uses global memory buffers to capture details like capabilities, status of specified HTM, read the trace buffer. These are initialized during module init and hence needs to be freed in module exit. Patch adds freeing of the memory in module exit. The change also includes minor clean up for the variable name. The read call back for the debugfs interface file saves filp->private_data to local variable name which is same as global variable name for the memory buffers. Rename these local variable names. Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260314132432.25581-1-atrajeev@linux.ibm.com --- arch/powerpc/platforms/pseries/htmdump.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c index 742ec52c9d4d..93f0cc2dc7fb 100644 --- a/arch/powerpc/platforms/pseries/htmdump.c +++ b/arch/powerpc/platforms/pseries/htmdump.c @@ -86,7 +86,7 @@ static ssize_t htm_return_check(long rc) static ssize_t htmdump_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_buf = filp->private_data; + void *htm_buf_data = filp->private_data; unsigned long page, read_size, available; loff_t offset; long rc, ret; @@ -100,7 +100,7 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf, * - last three values are address, size and offset */ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf), + htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf_data), PAGE_SIZE, page); ret = htm_return_check(rc); @@ -112,7 +112,7 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf, available = PAGE_SIZE; read_size = min(count, available); *ppos += read_size; - return simple_read_from_buffer(ubuf, count, &offset, htm_buf, available); + return simple_read_from_buffer(ubuf, count, &offset, htm_buf_data, available); } static const struct file_operations htmdump_fops = { @@ -226,7 +226,7 @@ static int htmstart_get(void *data, u64 *val) static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_status_buf = filp->private_data; + void *htm_status_data = filp->private_data; long rc, ret; u64 *num_entries; u64 to_copy; @@ -238,7 +238,7 @@ static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, * - last three values as addr, size and offset */ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_buf), + htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_data), PAGE_SIZE, 0); ret = htm_return_check(rc); @@ -255,13 +255,13 @@ static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, * So total count to copy is: * 32 bytes (for first 7 fields) + (number of HTM entries * entry size) */ - num_entries = htm_status_buf + 0x10; + num_entries = htm_status_data + 0x10; if (htmtype == 0x2) htmstatus_flag = 0x8; else htmstatus_flag = 0x6; to_copy = 32 + (be64_to_cpu(*num_entries) * htmstatus_flag); - return simple_read_from_buffer(ubuf, count, ppos, htm_status_buf, to_copy); + return simple_read_from_buffer(ubuf, count, ppos, htm_status_data, to_copy); } static const struct file_operations htmstatus_fops = { @@ -273,7 +273,7 @@ static const struct file_operations htmstatus_fops = { static ssize_t htminfo_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_info_buf = filp->private_data; + void *htm_info_data = filp->private_data; long rc, ret; u64 *num_entries; u64 to_copy; @@ -284,7 +284,7 @@ static ssize_t htminfo_read(struct file *filp, char __user *ubuf, * - last three values as addr, size and offset */ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_buf), + htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_data), PAGE_SIZE, 0); ret = htm_return_check(rc); @@ -301,15 +301,15 @@ static ssize_t htminfo_read(struct file *filp, char __user *ubuf, * So total count to copy is: * 32 bytes (for first 5 fields) + (number of HTM entries * entry size) */ - num_entries = htm_info_buf + 0x10; + num_entries = htm_info_data + 0x10; to_copy = 32 + (be64_to_cpu(*num_entries) * 16); - return simple_read_from_buffer(ubuf, count, ppos, htm_info_buf, to_copy); + return simple_read_from_buffer(ubuf, count, ppos, htm_info_data, to_copy); } static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { - void *htm_caps_buf = filp->private_data; + void *htm_caps_data = filp->private_data; long rc, ret; /* @@ -319,7 +319,7 @@ static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, * and zero */ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, - htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_buf), + htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_data), 0x80, 0); ret = htm_return_check(rc); @@ -328,7 +328,7 @@ static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, return ret; } - return simple_read_from_buffer(ubuf, count, ppos, htm_caps_buf, 0x80); + return simple_read_from_buffer(ubuf, count, ppos, htm_caps_data, 0x80); } static const struct file_operations htminfo_fops = { @@ -482,6 +482,9 @@ static void __exit htmdump_exit(void) { debugfs_remove_recursive(htmdump_debugfs_dir); kfree(htm_buf); + kfree(htm_status_buf); + kfree(htm_info_buf); + kfree(htm_caps_buf); } module_init(htmdump_init); -- cgit v1.2.3 From 4ec6ade73f1626a74d46e61ff247cf2b1b96446b Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sat, 14 Mar 2026 18:54:31 +0530 Subject: powerpc/pseries/htmdump: Fix the offset value used in processor configuration dump H_HTM call is invoked using three parameters specifying the address of the buffer, size of the buffer and offset where to read from. offset used was always zero. "offset" is value from output buffer header that points to next entry to dump. zero is the first entry to dump. next entry is read from the output bufferbyte offset 0x8. Update htminfo_read() function to use right offset. Return when offset points to -1 Fixes: dea7384e14e7 ("powerpc/pseries/htmdump: Add htm info support to htmdump module") Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260314132432.25581-2-atrajeev@linux.ibm.com --- arch/powerpc/platforms/pseries/htmdump.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c index 93f0cc2dc7fb..34978a794eba 100644 --- a/arch/powerpc/platforms/pseries/htmdump.c +++ b/arch/powerpc/platforms/pseries/htmdump.c @@ -277,15 +277,26 @@ static ssize_t htminfo_read(struct file *filp, char __user *ubuf, long rc, ret; u64 *num_entries; u64 to_copy; + loff_t offset = 0; + u64 info_offset = 0; /* * Invoke H_HTM call with: * - operation as htm status (H_HTM_OP_STATUS) * - last three values as addr, size and offset + * "offset" is value from output buffer header + * that points to next entry to dump. 0 is the first + * entry to dump. next entry is read from the output + * bufferbyte offset 0x8. */ + if (*ppos) { + info_offset = *(u64 *)(htm_info_data + 0x8); + if (info_offset == -1) + return 0; + } rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_data), - PAGE_SIZE, 0); + PAGE_SIZE, be64_to_cpu(info_offset)); ret = htm_return_check(rc); if (ret <= 0) { @@ -303,7 +314,9 @@ static ssize_t htminfo_read(struct file *filp, char __user *ubuf, */ num_entries = htm_info_data + 0x10; to_copy = 32 + (be64_to_cpu(*num_entries) * 16); - return simple_read_from_buffer(ubuf, count, ppos, htm_info_data, to_copy); + + *ppos += to_copy; + return simple_read_from_buffer(ubuf, count, &offset, htm_info_data, to_copy); } static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, -- cgit v1.2.3 From 0031424cbc0a6eafc56a8f9201a4a69d3649981e Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sat, 14 Mar 2026 18:54:32 +0530 Subject: powerpc/pseries/htmdump: Fix the offset value used in htm status dump H_HTM call is invoked using three parameters specifying the address of the buffer, size of the buffer and offset where to read from. offset used was always zero. "offset" is value from output buffer header that points to next entry to dump. zero is the first entry to dump. next entry is read from the output bufferbyte offset 0x8. Update htmstatus_read() function to use right offset. Return when offset points to -1 Fixes: 627cf584f4c3 ("powerpc/pseries/htmdump: Add htm status support to htmdump module") Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260314132432.25581-3-atrajeev@linux.ibm.com --- arch/powerpc/platforms/pseries/htmdump.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c index 34978a794eba..76444c6c5cc1 100644 --- a/arch/powerpc/platforms/pseries/htmdump.c +++ b/arch/powerpc/platforms/pseries/htmdump.c @@ -231,15 +231,26 @@ static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, u64 *num_entries; u64 to_copy; int htmstatus_flag; + loff_t offset = 0; + u64 status_offset = 0; /* * Invoke H_HTM call with: * - operation as htm status (H_HTM_OP_STATUS) - * - last three values as addr, size and offset + * - last three values as addr, size and offset. + * "offset" is value from output buffer header + * that points to next entry to dump. 0 is the first + * entry to dump. next entry is read from the output + * bufferbyte offset 0x8. */ + if (*ppos) { + status_offset = *(u64 *)(htm_status_data + 0x8); + if (status_offset == -1) + return 0; + } rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_data), - PAGE_SIZE, 0); + PAGE_SIZE, be64_to_cpu(status_offset)); ret = htm_return_check(rc); if (ret <= 0) { @@ -261,7 +272,9 @@ static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, else htmstatus_flag = 0x6; to_copy = 32 + (be64_to_cpu(*num_entries) * htmstatus_flag); - return simple_read_from_buffer(ubuf, count, ppos, htm_status_data, to_copy); + *ppos += to_copy; + + return simple_read_from_buffer(ubuf, count, &offset, htm_status_data, to_copy); } static const struct file_operations htmstatus_fops = { -- cgit v1.2.3 From 0342545a29bc2edfbe132c68c68b51c92a12444c Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Sat, 14 Mar 2026 18:59:53 +0530 Subject: powerpc/pseries/htmdump: Add memory configuration dump support to htmdump module H_HTM (Hardware Trace Macro) hypervisor call has capability to capture SystemMemory Configuration. This information helps to understand the address mapping for the partitions in the system. Support dumping system memory configuration from Hardware Trace Macro (HTM) function via debugfs interface. Under debugfs folder "/sys/kernel/debug/powerpc/htmdump", add file "htmsystem_mem". The interface allows only read of this file which will present the content of HTM buffer from the hcall. The 16th offset of HTM buffer has value for the number of entries for array of processors. Use this information to copy data to the debugfs file Signed-off-by: Athira Rajeev Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260314132953.27269-1-atrajeev@linux.ibm.com --- arch/powerpc/platforms/pseries/htmdump.c | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c index 76444c6c5cc1..489a80e87082 100644 --- a/arch/powerpc/platforms/pseries/htmdump.c +++ b/arch/powerpc/platforms/pseries/htmdump.c @@ -16,6 +16,7 @@ static void *htm_buf; static void *htm_status_buf; static void *htm_info_buf; static void *htm_caps_buf; +static void *htm_mem_buf; static u32 nodeindex; static u32 nodalchipindex; static u32 coreindexonchip; @@ -115,12 +116,72 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, count, &offset, htm_buf_data, available); } +static ssize_t htmsystem_mem_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + void *htm_mem_data = filp->private_data; + long rc, ret; + u64 *num_entries; + u64 to_copy = 0; + loff_t offset = 0; + u64 mem_offset = 0; + + /* + * Invoke H_HTM call with: + * - operation as htm status (H_HTM_OP_STATUS) + * - last three values as addr, size and offset. "offset" + * is value from output buffer header that points to next + * entry to dump. 0 is the first entry to dump. next entry + * is read from the output bufferbyte offset 0x8. + * + * When first time hcall is invoked, mem_offset should be + * zero because zero is the first entry. + * In the next hcall, offset of next entry to read from is + * picked from output buffer header itself. So don't fill + * mem_offset for first read. + * + * If there is no further data to read in next iteration, + * offset value from output buffer header will point to -1. + */ + if (*ppos) { + mem_offset = *(u64 *)(htm_mem_data + 0x8); + if (mem_offset == -1) + return 0; + } + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_DUMP_SYSMEM_CONF, virt_to_phys(htm_mem_data), + PAGE_SIZE, be64_to_cpu(mem_offset)); + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall returned for op: H_HTM_OP_DUMP_SYSMEM_CONF with hcall returning %ld\n", ret); + return ret; + } + + /* + * HTM system mem buffer, start of buffer + 0x10 gives the + * number of HTM entries in the buffer. + * So total count to copy is: + * 32 bytes (for first 5 fields) + (number of HTM entries * entry size) + */ + num_entries = htm_mem_data + 0x10; + to_copy = 32 + (be64_to_cpu(*num_entries) * 32); + + *ppos += to_copy; + return simple_read_from_buffer(ubuf, count, &offset, htm_mem_data, to_copy); +} + static const struct file_operations htmdump_fops = { .llseek = NULL, .read = htmdump_read, .open = simple_open, }; +static const struct file_operations htmsystem_mem_fops = { + .llseek = NULL, + .read = htmsystem_mem_read, + .open = simple_open, +}; + static int htmconfigure_set(void *data, u64 val) { long rc, ret; @@ -483,9 +544,17 @@ static int htmdump_init_debugfs(void) return -ENOMEM; } + /* Memory to present HTM system memory configuration */ + htm_mem_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!htm_mem_buf) { + pr_err("Failed to allocate htm mem buf\n"); + return -ENOMEM; + } + debugfs_create_file("htmstatus", 0400, htmdump_debugfs_dir, htm_status_buf, &htmstatus_fops); debugfs_create_file("htminfo", 0400, htmdump_debugfs_dir, htm_info_buf, &htminfo_fops); debugfs_create_file("htmcaps", 0400, htmdump_debugfs_dir, htm_caps_buf, &htmcaps_fops); + debugfs_create_file("htmsystem_mem", 0400, htmdump_debugfs_dir, htm_mem_buf, &htmsystem_mem_fops); return 0; } @@ -511,6 +580,7 @@ static void __exit htmdump_exit(void) kfree(htm_status_buf); kfree(htm_info_buf); kfree(htm_caps_buf); + kfree(htm_mem_buf); } module_init(htmdump_init); -- cgit v1.2.3 From 7a4f0846ee6cc8cf44ae0046ed42e3259d1dd45b Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:40 +0530 Subject: pseries/papr-hvpipe: Fix race with interrupt handler While executing ->ioctl handler or ->release handler, if an interrupt fires on the same cpu, then we can enter into a deadlock. This patch fixes both these handlers to take spin_lock_irq{save|restore} versions of the lock to prevent this deadlock. Cc: stable@vger.kernel.org Fixes: 814ef095f12c9 ("powerpc/pseries: Add papr-hvpipe char driver for HVPIPE interfaces") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/e4ed435c44fc191f2eb23c7907ba6f72f193e6aa.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 14ae480d060a..c41d45e1986d 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -444,13 +444,14 @@ static int papr_hvpipe_handle_release(struct inode *inode, struct file *file) { struct hvpipe_source_info *src_info; + unsigned long flags; /* * Hold the lock, remove source from src_list, reset the * hvpipe status and release the lock to prevent any race * with message event IRQ. */ - spin_lock(&hvpipe_src_list_lock); + spin_lock_irqsave(&hvpipe_src_list_lock, flags); src_info = file->private_data; list_del(&src_info->list); file->private_data = NULL; @@ -461,10 +462,10 @@ static int papr_hvpipe_handle_release(struct inode *inode, */ if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE) { src_info->hvpipe_status = 0; - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); hvpipe_rtas_recv_msg(NULL, 0); } else - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); kfree(src_info); return 0; @@ -480,20 +481,21 @@ static const struct file_operations papr_hvpipe_handle_ops = { static int papr_hvpipe_dev_create_handle(u32 srcID) { struct hvpipe_source_info *src_info __free(kfree) = NULL; + unsigned long flags; - spin_lock(&hvpipe_src_list_lock); + spin_lock_irqsave(&hvpipe_src_list_lock, flags); /* * Do not allow more than one process communicates with * each source. */ src_info = hvpipe_find_source(srcID); if (src_info) { - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); pr_err("pid(%d) is already using the source(%d)\n", src_info->tsk->pid, srcID); return -EALREADY; } - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); src_info = kzalloc_obj(*src_info, GFP_KERNEL_ACCOUNT); if (!src_info) @@ -510,18 +512,18 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) return fdf.err; retain_and_null_ptr(src_info); - spin_lock(&hvpipe_src_list_lock); + spin_lock_irqsave(&hvpipe_src_list_lock, flags); /* * If two processes are executing ioctl() for the same * source ID concurrently, prevent the second process to * acquire FD. */ if (hvpipe_find_source(srcID)) { - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); return -EALREADY; } list_add(&src_info->list, &hvpipe_src_list); - spin_unlock(&hvpipe_src_list_lock); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); return fd_publish(fdf); } -- cgit v1.2.3 From cefeed44296261173a806bef988b26bc565da4be Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:41 +0530 Subject: pseries/papr-hvpipe: Prevent kernel stack memory leak to userspace The hdr variable is allocated on the stack and only hdr.version and hdr.flags are initialized explicitly. Because the struct papr_hvpipe_hdr contains reserved padding bytes (reserved[3] and reserved2[40]), these could leak the uninitialized bytes to userspace after copy_to_user(). This patch fixes that by initializing the whole struct to 0. Cc: stable@vger.kernel.org Fixes: cebdb522fd3ed ("powerpc/pseries: Receive payload with ibm,receive-hvpipe-msg RTAS") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/7bfe03b65a282c856ed8182d1871bb973c0b78f2.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index c41d45e1986d..3392874ebdf6 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -327,7 +327,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, { struct hvpipe_source_info *src_info = file->private_data; - struct papr_hvpipe_hdr hdr; + struct papr_hvpipe_hdr hdr = {}; long ret; /* -- cgit v1.2.3 From 1b9f7aafa44f5ce852c00509104d10fd9eb0f402 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:42 +0530 Subject: pseries/papr-hvpipe: Fix null ptr deref in papr_hvpipe_dev_create_handle() commit 6d3789d347a7 ("papr-hvpipe: convert papr_hvpipe_dev_create_handle() to FD_PREPARE()"), changed the create handle to FD_PREPARE(), but it caused kernel null-ptr-deref because after call to retain_and_null_ptr(src_info), src_info is re-used for adding it to the global list. Getting the following kernel panic in papr_hvpipe_dev_create_handle() when trying to add src_info to the list. Kernel attempted to write user page (0) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on write at 0x00000000 Faulting instruction address: 0xc0000000001b44a0 Oops: Kernel access of bad area, sig: 11 [#1] ... Call Trace: papr_hvpipe_dev_ioctl+0x1f4/0x48c (unreliable) sys_ioctl+0x528/0x1064 system_call_exception+0x128/0x360 system_call_vectored_common+0x15c/0x2ec Now, the error handling with FD_PREPARE's file cleanup and __free(kfree) auto cleanup is getting too convoluted. This is mainly because we need to ensure only 1 user get the srcID handle. To simplify this, we allocate prepare the src_info in the beginning and add it to the global list under a spinlock after checking that no duplicates exist. This simplify the error handling where if the FD_ADD fails, we can simply remove the src_info from the list and consume any pending msg in hvpipe to be cleared, after src_info became visible in the global list. Cc: stable@vger.kernel.org Fixes: 6d3789d347a7 ("papr-hvpipe: convert papr_hvpipe_dev_create_handle() to FD_PREPARE()") Reported-by: Haren Myneni Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/31ad94bc89d44156ee700c5bd006cb47a748e3cb.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 57 +++++++++++++++------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 3392874ebdf6..402781299497 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -480,23 +480,10 @@ static const struct file_operations papr_hvpipe_handle_ops = { static int papr_hvpipe_dev_create_handle(u32 srcID) { - struct hvpipe_source_info *src_info __free(kfree) = NULL; + struct hvpipe_source_info *src_info; + int fd; unsigned long flags; - spin_lock_irqsave(&hvpipe_src_list_lock, flags); - /* - * Do not allow more than one process communicates with - * each source. - */ - src_info = hvpipe_find_source(srcID); - if (src_info) { - spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); - pr_err("pid(%d) is already using the source(%d)\n", - src_info->tsk->pid, srcID); - return -EALREADY; - } - spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); - src_info = kzalloc_obj(*src_info, GFP_KERNEL_ACCOUNT); if (!src_info) return -ENOMEM; @@ -505,26 +492,42 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) src_info->tsk = current; init_waitqueue_head(&src_info->recv_wqh); - FD_PREPARE(fdf, O_RDONLY | O_CLOEXEC, - anon_inode_getfile("[papr-hvpipe]", &papr_hvpipe_handle_ops, - (void *)src_info, O_RDWR)); - if (fdf.err) - return fdf.err; - - retain_and_null_ptr(src_info); - spin_lock_irqsave(&hvpipe_src_list_lock, flags); /* - * If two processes are executing ioctl() for the same - * source ID concurrently, prevent the second process to - * acquire FD. + * Do not allow more than one process communicates with + * each source. */ + spin_lock_irqsave(&hvpipe_src_list_lock, flags); if (hvpipe_find_source(srcID)) { spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); + pr_err("pid(%d) could not get the source(%d)\n", + src_info->tsk->pid, srcID); + kfree(src_info); return -EALREADY; } list_add(&src_info->list, &hvpipe_src_list); spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); - return fd_publish(fdf); + + fd = FD_ADD(O_RDONLY | O_CLOEXEC, + anon_inode_getfile("[papr-hvpipe]", &papr_hvpipe_handle_ops, + (void *)src_info, O_RDWR)); + if (fd < 0) { + spin_lock_irqsave(&hvpipe_src_list_lock, flags); + list_del(&src_info->list); + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); + /* + * if we fail to add FD, that means no userspace program is + * polling. In that case if there is a msg pending because the + * interrupt was fired after the src_info was added to the + * global list, then let's consume it here, to unblock the + * hvpipe + */ + if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE) + hvpipe_rtas_recv_msg(NULL, 0); + kfree(src_info); + return fd; + } + + return fd; } /* -- cgit v1.2.3 From 713e468cdbc2277db6ce949c32c1acbd83501733 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:43 +0530 Subject: pseries/papr-hvpipe: Fix & simplify error handling in papr_hvpipe_init() Remove such 3 levels of nesting patterns to check success return values from function calls. ret = enable_hvpipe_IRQ() if (!ret) ret = set_hvpipe_sys_param(1) if (!ret) ret = misc_register() Instead just bail out to "out*:" labels, in case of any error. This simplifies the init flow. While at it let's also fix the following error handling logic: We have already enabled interrupt sources and enabled hvpipe to received interrupts, if misc_register() fails, we will destroy the workqueue, but the HMC might send us a msg via hvpipe which will call, queue work on the workqueue which might be destroyed. So instead, let's reverse the order of enabling set_hvpipe_sys_param(1) and in case of an error let's remove the misc dev by calling misc_deregister(). Cc: stable@vger.kernel.org Fixes: 39a08a4f94980 ("powerpc/pseries: Enable hvpipe with ibm,set-system-parameter RTAS") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/f2141eafb80e7780395e03aa9a22e8a37be80513.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 402781299497..800649f309a5 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -780,23 +780,29 @@ static int __init papr_hvpipe_init(void) } ret = enable_hvpipe_IRQ(); - if (!ret) { - ret = set_hvpipe_sys_param(1); - if (!ret) - ret = misc_register(&papr_hvpipe_dev); - } + if (ret) + goto out_wq; - if (!ret) { - pr_info("hvpipe feature is enabled\n"); - hvpipe_feature = true; - return 0; - } + ret = misc_register(&papr_hvpipe_dev); + if (ret) + goto out_wq; - pr_err("hvpipe feature is not enabled %d\n", ret); + ret = set_hvpipe_sys_param(1); + if (ret) + goto out_misc; + + pr_info("hvpipe feature is enabled\n"); + hvpipe_feature = true; + return 0; + +out_misc: + misc_deregister(&papr_hvpipe_dev); +out_wq: destroy_workqueue(papr_hvpipe_wq); out: kfree(papr_hvpipe_work); papr_hvpipe_work = NULL; + pr_err("hvpipe feature is not enabled %d\n", ret); return ret; } machine_device_initcall(pseries, papr_hvpipe_init); -- cgit v1.2.3 From d48654bd8b1a75f662e224d257db54de475120dc Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:44 +0530 Subject: pseries/papr-hvpipe: Fix the usage of copy_to_user() copy_to_user() return bytes_not_copied to the user buffer. If there was an error writing bytes into the user buffer, i.e. if copy_to_user returns a non-zero value, then we should simply return -EFAULT from the ->read() call. Otherwise, in the non-patched version, we may end up mixing "bytes_not_copied + bytes_copied (HVPIPE_HDR_LEN)" as the return value to the user in ->read() call Also let's make sure we clear the hvpipe_status flag, if we have consumed the hvpipe msg by making the rtas call. ret = -EFAULT means copy_to_user has failed but that still means that the msg was read from the hvpipe, hence for both cases, success & -EFAULT, we should clear the HVPIPE_MSG_AVAILABLE flag in hvpipe_status. Cc: stable@vger.kernel.org Fixes: cebdb522fd3edd1 ("powerpc/pseries: Receive payload with ibm,receive-hvpipe-msg RTAS") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8fda3212a1ad48879c174e92f67472d9b9f1c3b7.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 800649f309a5..c007560d2d8c 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -206,10 +206,11 @@ static int hvpipe_rtas_recv_msg(char __user *buf, int size) bytes_written, size); bytes_written = size; } - ret = copy_to_user(buf, + if (copy_to_user(buf, rtas_work_area_raw_buf(work_area), - bytes_written); - if (!ret) + bytes_written)) + ret = -EFAULT; + else ret = bytes_written; } } else { @@ -328,7 +329,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, struct hvpipe_source_info *src_info = file->private_data; struct papr_hvpipe_hdr hdr = {}; - long ret; + ssize_t ret = 0; /* * Return -ENXIO during migration @@ -376,7 +377,7 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, ret = copy_to_user(buf, &hdr, HVPIPE_HDR_LEN); if (ret) - return ret; + return -EFAULT; /* * Message event has payload, so get the payload with @@ -385,19 +386,23 @@ static ssize_t papr_hvpipe_handle_read(struct file *file, if (hdr.flags & HVPIPE_MSG_AVAILABLE) { ret = hvpipe_rtas_recv_msg(buf + HVPIPE_HDR_LEN, size - HVPIPE_HDR_LEN); - if (ret > 0) { + /* + * Always clear MSG_AVAILABLE once the RTAS call has drained + * the message, regardless of whether copy_to_user succeeded. + */ + if (ret >= 0 || ret == -EFAULT) src_info->hvpipe_status &= ~HVPIPE_MSG_AVAILABLE; - ret += HVPIPE_HDR_LEN; - } } else if (hdr.flags & HVPIPE_LOST_CONNECTION) { /* * Hypervisor is closing the pipe for the specific * source. So notify user space. */ src_info->hvpipe_status &= ~HVPIPE_LOST_CONNECTION; - ret = HVPIPE_HDR_LEN; } + if (ret >= 0) + ret += HVPIPE_HDR_LEN; + return ret; } -- cgit v1.2.3 From 2eeac577480848801b35885b3a8201aa35f46236 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:45 +0530 Subject: pseries/papr-hvpipe: Simplify spin unlock usage in papr_hvpipe_handle_release() Once the src_info is removed from the global list, no one can access it. This simplies the usage of spin_unlock_irqrestore() in papr_hvpipe_handle_release() Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/4a980331557af3d10aada8576aaa16cddc691c65.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index c007560d2d8c..5aa37f6ad8c9 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -460,6 +460,7 @@ static int papr_hvpipe_handle_release(struct inode *inode, src_info = file->private_data; list_del(&src_info->list); file->private_data = NULL; + spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); /* * If the pipe for this specific source has any pending * payload, issue recv HVPIPE RTAS so that pipe will not @@ -467,10 +468,8 @@ static int papr_hvpipe_handle_release(struct inode *inode, */ if (src_info->hvpipe_status & HVPIPE_MSG_AVAILABLE) { src_info->hvpipe_status = 0; - spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); hvpipe_rtas_recv_msg(NULL, 0); - } else - spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); + } kfree(src_info); return 0; -- cgit v1.2.3 From 4e2d83c80495a9327141e8636f25dde13155f14f Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:46 +0530 Subject: pseries/papr-hvpipe: Kill task_struct pointer from struct hvpipe_source_info We don't really use task_struct pointer for anything meaningful. So just kill it for now, and we can bring back later if we need this for any future debug purposes. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/895e061e45cdc95db36fa7f27aa1922b81eed867.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 5 ++--- arch/powerpc/platforms/pseries/papr-hvpipe.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 5aa37f6ad8c9..46159f1c1cf1 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -493,7 +493,6 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) return -ENOMEM; src_info->srcID = srcID; - src_info->tsk = current; init_waitqueue_head(&src_info->recv_wqh); /* @@ -503,8 +502,8 @@ static int papr_hvpipe_dev_create_handle(u32 srcID) spin_lock_irqsave(&hvpipe_src_list_lock, flags); if (hvpipe_find_source(srcID)) { spin_unlock_irqrestore(&hvpipe_src_list_lock, flags); - pr_err("pid(%d) could not get the source(%d)\n", - src_info->tsk->pid, srcID); + pr_err("pid(%s:%d) could not get the source(%d)\n", + current->comm, task_pid_nr(current), srcID); kfree(src_info); return -EALREADY; } diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.h b/arch/powerpc/platforms/pseries/papr-hvpipe.h index c343f4230865..4bdf7bb2fc4d 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.h +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.h @@ -21,7 +21,6 @@ struct hvpipe_source_info { u32 srcID; u32 hvpipe_status; wait_queue_head_t recv_wqh; /* wake up poll() waitq */ - struct task_struct *tsk; }; /* -- cgit v1.2.3 From fe53d2ae82c06aa2d6402624af01e8f8ddfcd5b3 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:47 +0530 Subject: pseries/papr-hvpipe: Refactor and simplify hvpipe_rtas_recv_msg() Simplify hvpipe_rtas_recv_msg() by removing three levels of nesting... if (!ret) if (buf) if (size < bytes_written) ... this refactoring of the function bails out to "out:" label first, in case of any error. This simplifies the init flow. Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/bbe7ddf8b8e25c9be8fc5e2c4aea9e5fca128bf4.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 52 ++++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 46159f1c1cf1..3688b2be0445 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -190,34 +190,34 @@ static int hvpipe_rtas_recv_msg(char __user *buf, int size) return -ENOMEM; } - ret = rtas_ibm_receive_hvpipe_msg(work_area, &srcID, - &bytes_written); - if (!ret) { - /* - * Recv HVPIPE RTAS is successful. - * When releasing FD or no one is waiting on the - * specific source, issue recv HVPIPE RTAS call - * so that pipe is not blocked - this func is called - * with NULL buf. - */ - if (buf) { - if (size < bytes_written) { - pr_err("Received the payload size = %d, but the buffer size = %d\n", - bytes_written, size); - bytes_written = size; - } - if (copy_to_user(buf, - rtas_work_area_raw_buf(work_area), - bytes_written)) - ret = -EFAULT; - else - ret = bytes_written; - } - } else { - pr_err("ibm,receive-hvpipe-msg failed with %d\n", - ret); + /* + * Recv HVPIPE RTAS is successful. + * When releasing FD or no one is waiting on the + * specific source, issue recv HVPIPE RTAS call + * so that pipe is not blocked - this func is called + * with NULL buf. + */ + ret = rtas_ibm_receive_hvpipe_msg(work_area, &srcID, &bytes_written); + if (ret) { + pr_err("ibm,receive-hvpipe-msg failed with %d\n", ret); + goto out; } + if (!buf) + goto out; + + if (size < bytes_written) { + pr_err("Received the payload size = %d, but the buffer size = %d\n", + bytes_written, size); + bytes_written = size; + } + + if (copy_to_user(buf, rtas_work_area_raw_buf(work_area), bytes_written)) + ret = -EFAULT; + else + ret = bytes_written; + +out: rtas_work_area_free(work_area); return ret; } -- cgit v1.2.3 From 629d1a901de57490d29a495273df11e64993ec04 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 1 May 2026 09:41:48 +0530 Subject: pseries/papr-hvpipe: Fix style and checkpatch issues in enable_hvpipe_IRQ() While at it let's also fix the similar style issue in enable_hvpipe_IRQ() function. This also fixes a minor checkpatch warning which I got due to an extra space before " ==". Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1174f60d0ae128e773dbefd11dd8d46d69e7f50e.1777606826.git.ritesh.list@gmail.com --- arch/powerpc/platforms/pseries/papr-hvpipe.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr-hvpipe.c b/arch/powerpc/platforms/pseries/papr-hvpipe.c index 3688b2be0445..0c40bdde45e2 100644 --- a/arch/powerpc/platforms/pseries/papr-hvpipe.c +++ b/arch/powerpc/platforms/pseries/papr-hvpipe.c @@ -693,20 +693,19 @@ static int __init enable_hvpipe_IRQ(void) struct device_node *np; hvpipe_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); - if (hvpipe_check_exception_token == RTAS_UNKNOWN_SERVICE) + if (hvpipe_check_exception_token == RTAS_UNKNOWN_SERVICE) return -ENODEV; /* hvpipe events */ np = of_find_node_by_path("/event-sources/ibm,hvpipe-msg-events"); - if (np != NULL) { - request_event_sources_irqs(np, hvpipe_event_interrupt, - "HPIPE_EVENT"); - of_node_put(np); - } else { - pr_err("Can not enable hvpipe event IRQ\n"); + if (!np) { + pr_err("No device node found, could not enable hvpipe event IRQ\n"); return -ENODEV; } + request_event_sources_irqs(np, hvpipe_event_interrupt, "HPIPE_EVENT"); + of_node_put(np); + return 0; } -- cgit v1.2.3 From b3a97f9484080c6e71db9e803e3cc1bb372a9bc7 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 7 Apr 2026 18:13:44 +0530 Subject: powerpc/kdump: fix KASAN sanitization flag for core_$(BITS).o MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KASAN instrumentation is intended to be disabled for the kexec core code, but the existing Makefile entry misses the object suffix. As a result, the flag is not applied correctly to core_$(BITS).o. So when KASAN is enabled, kexec_copy_flush and copy_segments in kexec/core_64.c are instrumented, which can result in accesses to shadow memory via normal address translation paths. Since these run with the MMU disabled, such accesses may trigger page faults (bad_page_fault) that cannot be handled in the kdump path, ultimately causing a hang and preventing the kdump kernel from booting. The same is true for kexec as well, since the same functions are used there. Update the entry to include the “.o” suffix so that KASAN instrumentation is properly disabled for this object file. Fixes: 2ab2d5794f14 ("powerpc/kasan: Disable address sanitization in kexec paths") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/1dee8891-8bcc-46b4-93f3-fc3a774abd5b@linux.ibm.com/ Cc: stable@vger.kernel.org Reviewed-by: Ritesh Harjani (IBM) Tested-by: Venkat Rao Bagalkote Acked-by: Mahesh Salgaonkar Reviewed-by: Aboorva Devarajan Tested-by: Aboorva Devarajan Signed-off-by: Sourabh Jain Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260407124349.1698552-1-sourabhjain@linux.ibm.com --- arch/powerpc/kexec/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 470eb0453e17..ec7a0eed75dc 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -16,4 +16,4 @@ GCOV_PROFILE_core_$(BITS).o := n KCOV_INSTRUMENT_core_$(BITS).o := n UBSAN_SANITIZE_core_$(BITS).o := n KASAN_SANITIZE_core.o := n -KASAN_SANITIZE_core_$(BITS) := n +KASAN_SANITIZE_core_$(BITS).o := n -- cgit v1.2.3 From 38e989d504fc52900a3786b7144fb53cd67e0389 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 7 Apr 2026 18:13:45 +0530 Subject: powerpc/vmx: avoid KASAN instrumentation in enter_vmx_ops() for kexec The kexec sequence invokes enter_vmx_ops() via copy_page() with the MMU disabled. In this context, code must not rely on normal virtual address translations or trigger page faults. With KASAN enabled, functions get instrumented and may access shadow memory using regular address translation. When executed with the MMU off, this can lead to page faults (bad_page_fault) from which the kernel cannot recover in the kexec path, resulting in a hang. The kexec path sets preempt_count to HARDIRQ_OFFSET before entering the MMU-off copy sequence. current_thread_info()->preempt_count = HARDIRQ_OFFSET kexec_sequence(..., copy_with_mmu_off = 1) -> kexec_copy_flush(image) copy_segments() -> copy_page(dest, addr) bl enter_vmx_ops() if (in_interrupt()) return 0 beq .Lnonvmx_copy Since kexec sets preempt_count to HARDIRQ_OFFSET, in_interrupt() evaluates to true and enter_vmx_ops() returns early. As in_interrupt() (and preempt_count()) are always inlined, mark enter_vmx_ops() with __no_sanitize_address to avoid KASAN instrumentation and shadow memory access with MMU disabled, helping kexec boot fine with KASAN enabled. Reported-by: Aboorva Devarajan Reviewed-by: Aboorva Devarajan Tested-by: Aboorva Devarajan Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Sourabh Jain Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260407124349.1698552-2-sourabhjain@linux.ibm.com --- arch/powerpc/lib/vmx-helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 554b248002b4..57e897b60db8 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -52,7 +52,14 @@ int exit_vmx_usercopy(void) } EXPORT_SYMBOL(exit_vmx_usercopy); -int enter_vmx_ops(void) +/* + * Can be called from kexec copy_page() path with MMU off. The kexec + * code sets preempt_count to HARDIRQ_OFFSET so we return early here. + * Since in_interrupt() is always inline, __no_sanitize_address on this + * function is sufficient to avoid KASAN shadow memory accesses in real + * mode. + */ +int __no_sanitize_address enter_vmx_ops(void) { if (in_interrupt()) return 0; -- cgit v1.2.3 From 0e7c074cfcd9bd93765505f9eb8b42f03ed2a744 Mon Sep 17 00:00:00 2001 From: Pavitra Jha Date: Fri, 1 May 2026 07:07:12 -0400 Subject: net: wwan: t7xx: validate port_count against message length in t7xx_port_enum_msg_handler t7xx_port_enum_msg_handler() uses the modem-supplied port_count field as a loop bound over port_msg->data[] without checking that the message buffer contains sufficient data. A modem sending port_count=65535 in a 12-byte buffer triggers a slab-out-of-bounds read of up to 262140 bytes. Add a sizeof(*port_msg) check before accessing the port message header fields to guard against undersized messages. Add a struct_size() check after extracting port_count and before the loop. In t7xx_parse_host_rt_data(), guard the rt_feature header read with a remaining-buffer check before accessing data_len, validate feat_data_len against the actual remaining buffer to prevent OOB reads and signed integer overflow on offset. Pass msg_len from both call sites: skb->len at the DPMAIF path after skb_pull(), and the validated feat_data_len at the handshake path. Fixes: da45d2566a1d ("net: wwan: t7xx: Add control port") Cc: stable@vger.kernel.org Signed-off-by: Pavitra Jha Link: https://patch.msgid.link/20260501110713.145563-1-jhapavitra98@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/t7xx/t7xx_modem_ops.c | 20 +++++++++++++++++--- drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c | 18 ++++++++++++++++-- drivers/net/wwan/t7xx/t7xx_port_proxy.h | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 7968e208dd37..adb29d30c63f 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -457,8 +457,20 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf offset = sizeof(struct feature_query); for (i = 0; i < FEATURE_COUNT && offset < data_length; i++) { + size_t remaining = data_length - offset; + size_t feat_data_len, feat_total; + + if (remaining < sizeof(*rt_feature)) + break; + rt_feature = data + offset; - offset += sizeof(*rt_feature) + le32_to_cpu(rt_feature->data_len); + feat_data_len = le32_to_cpu(rt_feature->data_len); + + if (feat_data_len > remaining - sizeof(*rt_feature)) + break; + + feat_total = sizeof(*rt_feature) + feat_data_len; + offset += feat_total; ft_spt_cfg = FIELD_GET(FEATURE_MSK, core->feature_set[i]); if (ft_spt_cfg != MTK_FEATURE_MUST_BE_SUPPORTED) @@ -468,8 +480,10 @@ static int t7xx_parse_host_rt_data(struct t7xx_fsm_ctl *ctl, struct t7xx_sys_inf if (ft_spt_st != MTK_FEATURE_MUST_BE_SUPPORTED) return -EINVAL; - if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) - t7xx_port_enum_msg_handler(ctl->md, rt_feature->data); + if (i == RT_ID_MD_PORT_ENUM || i == RT_ID_AP_PORT_ENUM) { + t7xx_port_enum_msg_handler(ctl->md, rt_feature->data, + feat_data_len); + } } return 0; diff --git a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c index ae632ef96698..f869e4ed9ee9 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c +++ b/drivers/net/wwan/t7xx/t7xx_port_ctrl_msg.c @@ -117,6 +117,7 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * t7xx_port_enum_msg_handler() - Parse the port enumeration message to create/remove nodes. * @md: Modem context. * @msg: Message. + * @msg_len: Length of @msg in bytes. * * Used to control create/remove device node. * @@ -124,12 +125,18 @@ static int fsm_ee_message_handler(struct t7xx_port *port, struct t7xx_fsm_ctl *c * * 0 - Success. * * -EFAULT - Message check failure. */ -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len) { struct device *dev = &md->t7xx_dev->pdev->dev; unsigned int version, port_count, i; struct port_msg *port_msg = msg; + if (msg_len < sizeof(*port_msg)) { + dev_err(dev, "Port enum msg too short for header: need %zu, have %zu\n", + sizeof(*port_msg), msg_len); + return -EINVAL; + } + version = FIELD_GET(PORT_MSG_VERSION, le32_to_cpu(port_msg->info)); if (version != PORT_ENUM_VER || le32_to_cpu(port_msg->head_pattern) != PORT_ENUM_HEAD_PATTERN || @@ -141,6 +148,13 @@ int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg) } port_count = FIELD_GET(PORT_MSG_PRT_CNT, le32_to_cpu(port_msg->info)); + + if (msg_len < struct_size(port_msg, data, port_count)) { + dev_err(dev, "Port enum msg too short: need %zu, have %zu\n", + struct_size(port_msg, data, port_count), msg_len); + return -EINVAL; + } + for (i = 0; i < port_count; i++) { u32 port_info = le32_to_cpu(port_msg->data[i]); unsigned int ch_id; @@ -191,7 +205,7 @@ static int control_msg_handler(struct t7xx_port *port, struct sk_buff *skb) case CTL_ID_PORT_ENUM: skb_pull(skb, sizeof(*ctrl_msg_h)); - ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data); + ret = t7xx_port_enum_msg_handler(ctl->md, (struct port_msg *)skb->data, skb->len); if (!ret) ret = port_ctl_send_msg_to_md(port, CTL_ID_PORT_ENUM, 0); else diff --git a/drivers/net/wwan/t7xx/t7xx_port_proxy.h b/drivers/net/wwan/t7xx/t7xx_port_proxy.h index f0918b36e899..7c3190bf0fcf 100644 --- a/drivers/net/wwan/t7xx/t7xx_port_proxy.h +++ b/drivers/net/wwan/t7xx/t7xx_port_proxy.h @@ -103,7 +103,7 @@ void t7xx_port_proxy_reset(struct port_proxy *port_prox); void t7xx_port_proxy_uninit(struct port_proxy *port_prox); int t7xx_port_proxy_init(struct t7xx_modem *md); void t7xx_port_proxy_md_status_notify(struct port_proxy *port_prox, unsigned int state); -int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg); +int t7xx_port_enum_msg_handler(struct t7xx_modem *md, void *msg, size_t msg_len); int t7xx_port_proxy_chl_enable_disable(struct port_proxy *port_prox, unsigned int ch_id, bool en_flag); void t7xx_port_proxy_set_cfg(struct t7xx_modem *md, enum port_cfg_id cfg_id); -- cgit v1.2.3 From da107152c43915211e1fc0319b9526f4241abca2 Mon Sep 17 00:00:00 2001 From: "Christophe Leroy (CS GROUP)" Date: Tue, 21 Apr 2026 08:26:08 +0200 Subject: powerpc/8xx: Fix interrupt mask in cpm1_gpiochip_add16() Allthough fsl,cpm1-gpio-irq-mask always contains a 16 bits value, it is a standard u32 OF property as documented in Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt The driver erroneously uses of_property_read_u16() leading to a mask which is always 0. Fix it by using of_property_read_u32() instead. Fixes: 726bd223105c ("powerpc/8xx: Adding support of IRQ in MPC8xx GPIO") Signed-off-by: Christophe Leroy (CS GROUP) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/bb0b6d6c4543238c38d5d29a776d0674a8c0c180.1776752750.git.chleroy@kernel.org --- arch/powerpc/platforms/8xx/cpm1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 7433be7d66ee..f00734f0590c 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -477,7 +477,7 @@ int cpm1_gpiochip_add16(struct device *dev) struct device_node *np = dev->of_node; struct cpm1_gpio16_chip *cpm1_gc; struct gpio_chip *gc; - u16 mask; + u32 mask; cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL); if (!cpm1_gc) @@ -485,7 +485,7 @@ int cpm1_gpiochip_add16(struct device *dev) spin_lock_init(&cpm1_gc->lock); - if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) { + if (!of_property_read_u32(np, "fsl,cpm1-gpio-irq-mask", &mask)) { int i, j; for (i = 0, j = 0; i < 16; i++) -- cgit v1.2.3 From 131717e656b379addb95af2dcb2d90c723bae24b Mon Sep 17 00:00:00 2001 From: Shivani Nittor Date: Tue, 21 Apr 2026 20:36:28 +0530 Subject: powerpc/perf: Update check for PERF_SAMPLE_DATA_SRC marked events The core-book3s PMU sampling code validates the SIER TYPE field when PERF_SAMPLE_DATA_SRC is requested. The SIER TYPE field indicates the instruction type and is only valid for random sampling (marked events). To handle cases observed where SIER TYPE could be zero even for marked events,validation was added to drop such samples and increment event->lost_samples. However, this validation was applied to all samples, including continuous sampling. In continuous sampling mode, the PMU does not set the SIER TYPE field, so it remains zero. As a result, valid continuous samples were incorrectly treated as invalid and dropped. Fixed this by gating the SIER TYPE validation with mark_event, so the check runs only for marked (random) events. Continuous samples now skip this check and are recorded normally in the final data recording path. Fixes: 2ffb26afa642 ("arch/powerpc/perf: Check the instruction type before creating sample with perf_mem_data_src") Signed-off-by: Shivani Nittor Reviewed-by: Mukesh Kumar Chaurasiya (IBM) Reviewed-by: Athira Rajeev [Maddy: Fixed reviewed-by tag] Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260421150628.96500-1-shivani@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 8b0081441f85..2e6adf5b95c4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2242,6 +2242,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, const u64 last_period = event->hw.last_period; s64 prev, delta, left; int record = 0; + int mark_event = regs->dsisr & MMCRA_SAMPLE_ENABLE; if (event->hw.state & PERF_HES_STOPPED) { write_pmc(event->hw.idx, 0); @@ -2304,9 +2305,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * In ISA v3.0 and before values "0" and "7" are considered reserved. * In ISA v3.1, value "7" has been used to indicate "larx/stcx". * Drop the sample if "type" has reserved values for this field with a - * ISA version check. + * ISA version check for marked events. */ - if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && + if (mark_event && event->attr.sample_type & PERF_SAMPLE_DATA_SRC && ppmu->get_mem_data_src) { val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT; if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) { -- cgit v1.2.3 From ae9582cd0b9ccc4a121af300df68fd27f72e9822 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 4 May 2026 21:10:58 +0300 Subject: net/mlx5e: psp: Fix invalid access on PSP dev registration fail priv->psp->psp is initialized with the PSP device as returned by psp_dev_create(). This could also return an error, in which case a future psp_dev_unregister() will result in unpleasantness. Avoid that by using a local variable and only saving the PSP device when registration succeeds. In case psp_dev_create() fails, priv->psp and steering structs are left in place, but they will be inert. The unchecked access of priv->psp in mlx5e_psp_offload_handle_rx_skb() won't happen because without a PSP device, there can be no SAs added and therefore no packets will be successfully decrypted and be handed off to the SW handler. Fixes: 89ee2d92f66c ("net/mlx5e: Support PSP offload functionality") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504181100.269334-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_accel/psp.c | 26 ++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 6a50b6dec0fa..1ff818fb48df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -1070,29 +1070,37 @@ static struct psp_dev_ops mlx5_psp_ops = { void mlx5e_psp_unregister(struct mlx5e_priv *priv) { - if (!priv->psp || !priv->psp->psp) + struct mlx5e_psp *psp = priv->psp; + + if (!psp || !psp->psp) return; - psp_dev_unregister(priv->psp->psp); + psp_dev_unregister(psp->psp); + psp->psp = NULL; } void mlx5e_psp_register(struct mlx5e_priv *priv) { + struct mlx5e_psp *psp = priv->psp; + struct psp_dev *psd; + /* FW Caps missing */ if (!priv->psp) return; - priv->psp->caps.assoc_drv_spc = sizeof(u32); - priv->psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; + psp->caps.assoc_drv_spc = sizeof(u32); + psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; if (MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_encrypt) && MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_decrypt)) - priv->psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; + psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; - priv->psp->psp = psp_dev_create(priv->netdev, &mlx5_psp_ops, - &priv->psp->caps, NULL); - if (IS_ERR(priv->psp->psp)) + psd = psp_dev_create(priv->netdev, &mlx5_psp_ops, &psp->caps, NULL); + if (IS_ERR(psd)) { mlx5_core_err(priv->mdev, "PSP failed to register due to %pe\n", - priv->psp->psp); + psd); + return; + } + psp->psp = psd; } int mlx5e_psp_init(struct mlx5e_priv *priv) -- cgit v1.2.3 From 50690733db59fbb3de9fa811b606af324eeb4e37 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 4 May 2026 21:10:59 +0300 Subject: net/mlx5e: psp: Expose only a fully initialized priv->psp Currently, during PSP init, priv->psp is initialized to an incompletely built psp struct. Additionally, on fs init failure priv->psp is reset to NULL. Change this so that only a fully initialized priv->psp is set, which makes the code easier to reason about in failure scenarios. Fixes: af2196f49480 ("net/mlx5e: Implement PSP operations .assoc_add and .assoc_del") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504181100.269334-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index 1ff818fb48df..d9adb993e64d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -1139,22 +1139,18 @@ int mlx5e_psp_init(struct mlx5e_priv *priv) if (!psp) return -ENOMEM; - priv->psp = psp; fs = mlx5e_accel_psp_fs_init(priv); if (IS_ERR(fs)) { err = PTR_ERR(fs); - goto out_err; + kfree(psp); + return err; } psp->fs = fs; + priv->psp = psp; mlx5_core_dbg(priv->mdev, "PSP attached to netdevice\n"); return 0; - -out_err: - priv->psp = NULL; - kfree(psp); - return err; } void mlx5e_psp_cleanup(struct mlx5e_priv *priv) -- cgit v1.2.3 From c4a5c46199b5addf0157934da3aa89c33eb02a6d Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 4 May 2026 21:11:00 +0300 Subject: net/mlx5e: psp: Hook PSP dev reg/unreg to profile enable/disable devlink reload while PSP connections are active does: mlx5_unload_one_devl_locked() -> mlx5_detach_device() -> _mlx5e_suspend() -> mlx5e_detach_netdev() -> profile->cleanup_rx -> profile->cleanup_tx -> mlx5e_destroy_mdev_resources() -> mlx5_core_dealloc_pd() fails: ... mlx5_core 0000:08:00.0: mlx5_cmd_out_err:821:(pid 19722): DEALLOC_PD(0x801) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0xef0c8a), err(-22) ... The reason for failure is the existence of TX keys, which are removed by the PSP dev unregistration happening in: profile->cleanup() -> mlx5e_psp_unregister() -> mlx5e_psp_cleanup() -> psp_dev_unregister() ...but this isn't invoked in the devlink reload flow, only when changing the NIC profile (e.g. when transitioning to switchdev mode) or on dev teardown. Move PSP device registration into mlx5e_nic_enable(), and unregistration into the corresponding mlx5e_nic_disable(). These functions are called during netdev attach/detach after RX & TX are set up. This ensures that the keys will be gone by the time the PD is destroyed. Fixes: 89ee2d92f66c ("net/mlx5e: Support PSP offload functionality") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504181100.269334-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5a46870c4b74..8e9443caa933 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6023,7 +6023,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (take_rtnl) rtnl_lock(); - mlx5e_psp_register(priv); /* update XDP supported features */ mlx5e_set_xdp_feature(priv); @@ -6036,7 +6035,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_psp_unregister(priv); mlx5e_ktls_cleanup(priv); mlx5e_psp_cleanup(priv); mlx5e_fs_cleanup(priv->fs); @@ -6160,6 +6158,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_fs_init_l2_addr(priv->fs, netdev); mlx5e_ipsec_init(priv); + mlx5e_psp_register(priv); err = mlx5e_macsec_init(priv); if (err) @@ -6230,6 +6229,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); mlx5e_macsec_cleanup(priv); + mlx5e_psp_unregister(priv); mlx5e_ipsec_cleanup(priv); } -- cgit v1.2.3 From 4052b932041614675fa2dbf48f725557c23ebf05 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 1 Apr 2026 10:30:03 +0200 Subject: arch/powerpc: Drop CONFIG_FIRMWARE_EDID from defconfig files CONFIG_FIRMWARE_EDID=y depends on X86 or EFI_GENERIC_STUB. Neither is true here, so drop the lines from the defconfig files. Signed-off-by: Thomas Zimmermann Reviewed-by: Christophe Leroy (CS GROUP) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260401083023.214426-1-tzimmermann@suse.de --- arch/powerpc/configs/amigaone_defconfig | 1 - arch/powerpc/configs/chrp32_defconfig | 1 - arch/powerpc/configs/g5_defconfig | 1 - arch/powerpc/configs/pasemi_defconfig | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/ppc64e_defconfig | 1 - arch/powerpc/configs/skiroot_defconfig | 1 - 8 files changed, 8 deletions(-) diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 69ef3dc31c4b..7a515390646b 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -76,7 +76,6 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_RADEON=y CONFIG_FB_3DFX=y diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index b799c95480ae..66eae5b7e16c 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -76,7 +76,6 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_NVRAM=y # CONFIG_HWMON is not set CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 04bbb37f5978..e9996711b362 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -121,7 +121,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_AGP=m CONFIG_AGP_UNINORTH=m CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_OF=y CONFIG_FB_NVIDIA=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 8bbf51b38480..89bcbeb05067 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -98,7 +98,6 @@ CONFIG_SENSORS_LM85=y CONFIG_SENSORS_LM90=y CONFIG_DRM=y CONFIG_DRM_RADEON=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_VGA16=y CONFIG_FB_NVIDIA=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index cc9802420237..5d32c2767a65 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -196,7 +196,6 @@ CONFIG_I2C_CHARDEV=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_AST=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=m CONFIG_FB_MATROX_MILLENIUM=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 3bf518e3a573..6316ca4df25d 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -249,7 +249,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y CONFIG_I2C_PASEMI=y CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 0fd49f67331f..20cc17dce94d 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -118,7 +118,6 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index ff1bed4b6d2c..005536ee75bb 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -214,7 +214,6 @@ CONFIG_SENSORS_IBMPOWERNV=m CONFIG_DRM=m CONFIG_DRM_AST=m CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y -- cgit v1.2.3 From 3abcedfdfd3125431ed404fa75724118beac630b Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 4 May 2026 21:02:03 +0300 Subject: net/mlx5: SD: Serialize init/cleanup mlx5_sd_init() / mlx5_sd_cleanup() may run from multiple PFs in the same Socket-Direct group. This can cause the SD bring-up/tear-down sequence to be executed more than once or interleaved across PFs. Protect SD init/cleanup with mlx5_devcom_comp_lock() and track the SD group state on the primary device. Skip init if the primary is already UP, and skip cleanup unless the primary is UP. The state check on cleanup is needed because sd_register() drops the devcom comp lock between marking the comp ready and assigning primary_dev on each peer. A concurrent cleanup that acquires the lock in this window would observe devcom_is_ready==true while primary_dev is still NULL (causing mlx5_sd_get_primary() to return NULL) or while the FW alias setup performed by mlx5_sd_init()'s body has not yet run (causing sd_cmd_unset_primary() to dereference a NULL tx_ft). Gate the cleanup body on primary_sd->state == MLX5_SD_STATE_UP, which is set only at the very end of mlx5_sd_init() under the same comp lock - so observing UP guarantees primary_dev, secondaries[], tx_ft, and dfs are all populated. Also bail explicitly if mlx5_sd_get_primary() returns NULL, in case state is checked on a peer whose primary_dev hasn't been assigned yet. In addition, move mlx5_devcom_comp_set_ready(false) from sd_unregister() into the cleanup's locked section, including the !primary and state != UP early-exit paths, so the device cannot unregister and free its struct mlx5_sd while devcom is still marked ready. A concurrent init acquiring the devcom lock will now observe devcom is no longer ready and bail out immediately. Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group") Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504180206.268568-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c | 56 +++++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 762c783156b4..ec42685bdece 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -18,6 +18,7 @@ struct mlx5_sd { u8 host_buses; struct mlx5_devcom_comp_dev *devcom; struct dentry *dfs; + u8 state; bool primary; union { struct { /* primary */ @@ -31,6 +32,11 @@ struct mlx5_sd { }; }; +enum mlx5_sd_state { + MLX5_SD_STATE_DOWN = 0, + MLX5_SD_STATE_UP, +}; + static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); @@ -270,9 +276,6 @@ static void sd_unregister(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); - mlx5_devcom_comp_lock(sd->devcom); - mlx5_devcom_comp_set_ready(sd->devcom, false); - mlx5_devcom_comp_unlock(sd->devcom); mlx5_devcom_unregister_component(sd->devcom); } @@ -426,6 +429,7 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) struct mlx5_core_dev *primary, *pos, *to; struct mlx5_sd *sd = mlx5_get_sd(dev); u8 alias_key[ACCESS_KEY_LEN]; + struct mlx5_sd *primary_sd; int err, i; err = sd_init(dev); @@ -440,10 +444,17 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) if (err) goto err_sd_cleanup; + mlx5_devcom_comp_lock(sd->devcom); if (!mlx5_devcom_comp_is_ready(sd->devcom)) - return 0; + goto out; primary = mlx5_sd_get_primary(dev); + if (!primary) + goto out; + + primary_sd = mlx5_get_sd(primary); + if (primary_sd->state != MLX5_SD_STATE_DOWN) + goto out; for (i = 0; i < ACCESS_KEY_LEN; i++) alias_key[i] = get_random_u8(); @@ -472,6 +483,9 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) sd->group_id, mlx5_devcom_comp_get_size(sd->devcom)); sd_print_group(primary); + primary_sd->state = MLX5_SD_STATE_UP; +out: + mlx5_devcom_comp_unlock(sd->devcom); return 0; err_unset_secondaries: @@ -481,6 +495,15 @@ err_unset_secondaries: sd_cmd_unset_primary(primary); debugfs_remove_recursive(sd->dfs); err_sd_unregister: + mlx5_sd_for_each_secondary(i, primary, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); + + primary_sd->secondaries[i - 1] = NULL; + peer_sd->primary_dev = NULL; + } + primary_sd->primary = false; + mlx5_devcom_comp_set_ready(sd->devcom, false); + mlx5_devcom_comp_unlock(sd->devcom); sd_unregister(dev); err_sd_cleanup: sd_cleanup(dev); @@ -491,22 +514,43 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); struct mlx5_core_dev *primary, *pos; + struct mlx5_sd *primary_sd; int i; if (!sd) return; + mlx5_devcom_comp_lock(sd->devcom); if (!mlx5_devcom_comp_is_ready(sd->devcom)) - goto out; + goto out_unlock; primary = mlx5_sd_get_primary(dev); + if (!primary) + goto out_ready_false; + + primary_sd = mlx5_get_sd(primary); + if (primary_sd->state != MLX5_SD_STATE_UP) + goto out_clear_peers; + mlx5_sd_for_each_secondary(i, primary, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); debugfs_remove_recursive(sd->dfs); sd_info(primary, "group id %#x, uncombined\n", sd->group_id); -out: + primary_sd->state = MLX5_SD_STATE_DOWN; +out_clear_peers: + mlx5_sd_for_each_secondary(i, primary, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(pos); + + primary_sd->secondaries[i - 1] = NULL; + peer_sd->primary_dev = NULL; + } + primary_sd->primary = false; +out_ready_false: + mlx5_devcom_comp_set_ready(sd->devcom, false); +out_unlock: + mlx5_devcom_comp_unlock(sd->devcom); sd_unregister(dev); sd_cleanup(dev); } -- cgit v1.2.3 From 05217e4ffbb229e7218cf318e0033780abadb624 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 4 May 2026 21:02:04 +0300 Subject: net/mlx5: SD, Keep multi-pf debugfs entries on primary mlx5_sd_init() creates the "multi-pf" debugfs directory under the primary device debugfs root, but stored the dentry in the calling device's sd struct. When sd_cleanup() run on a different PF, this leads to using the wrong sd->dfs for removing entries, which results in memory leak and an error in when re-creating the SD.[1] Fix it by explicitly storing the debugfs dentry in the primary device sd struct and use it for all per-group files. [1] debugfs: 'multi-pf' already exists in '0000:08:00.1' Fixes: 4375130bf527 ("net/mlx5: SD, Add debugfs") Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504180206.268568-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index ec42685bdece..89b7e4d67303 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -463,9 +463,13 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) if (err) goto err_sd_unregister; - sd->dfs = debugfs_create_dir("multi-pf", mlx5_debugfs_get_dev_root(primary)); - debugfs_create_x32("group_id", 0400, sd->dfs, &sd->group_id); - debugfs_create_file("primary", 0400, sd->dfs, primary, &dev_fops); + primary_sd->dfs = + debugfs_create_dir("multi-pf", + mlx5_debugfs_get_dev_root(primary)); + debugfs_create_x32("group_id", 0400, primary_sd->dfs, + &primary_sd->group_id); + debugfs_create_file("primary", 0400, primary_sd->dfs, primary, + &dev_fops); mlx5_sd_for_each_secondary(i, primary, pos) { char name[32]; @@ -475,7 +479,8 @@ int mlx5_sd_init(struct mlx5_core_dev *dev) goto err_unset_secondaries; snprintf(name, sizeof(name), "secondary_%d", i - 1); - debugfs_create_file(name, 0400, sd->dfs, pos, &dev_fops); + debugfs_create_file(name, 0400, primary_sd->dfs, pos, + &dev_fops); } @@ -493,7 +498,8 @@ err_unset_secondaries: mlx5_sd_for_each_secondary_to(i, primary, to, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); - debugfs_remove_recursive(sd->dfs); + debugfs_remove_recursive(primary_sd->dfs); + primary_sd->dfs = NULL; err_sd_unregister: mlx5_sd_for_each_secondary(i, primary, pos) { struct mlx5_sd *peer_sd = mlx5_get_sd(pos); @@ -535,7 +541,8 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev) mlx5_sd_for_each_secondary(i, primary, pos) sd_cmd_unset_secondary(pos); sd_cmd_unset_primary(primary); - debugfs_remove_recursive(sd->dfs); + debugfs_remove_recursive(primary_sd->dfs); + primary_sd->dfs = NULL; sd_info(primary, "group id %#x, uncombined\n", sd->group_id); primary_sd->state = MLX5_SD_STATE_DOWN; -- cgit v1.2.3 From 3564222cfdde83a2d760b80192155a3ada1c9bdd Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 4 May 2026 21:02:05 +0300 Subject: net/mlx5e: SD, Fix missing cleanup on probe error When _mlx5e_probe() fails, the preceding successful mlx5_sd_init() is not undone. Auxiliary bus probe failure skips binding, so mlx5e_remove() is never called for that adev and the matching mlx5_sd_cleanup() never runs - leaking the per-dev SD struct. Call mlx5_sd_cleanup() on the probe error path to balance mlx5_sd_init(). A similar gap exists on the resume path: mlx5_sd_init() and mlx5_sd_cleanup() are currently bundled with both probe/remove and suspend/resume, even though only the FW alias state actually needs to follow the suspend/resume lifecycle - the sd struct allocation and devcom membership are software state that should track the full bound lifetime. As a result, a failed resume can leave a still-bound device with sd == NULL, which mlx5_sd_get_adev() can't distinguish from a non-SD device. Fixing this requires sd_suspend/resume APIs which will only destroy FW resources and is left for a follow-up series. Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group") Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504180206.268568-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8e9443caa933..62b70334a13d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6775,8 +6775,8 @@ static int mlx5e_resume(struct auxiliary_device *adev) actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); if (actual_adev) - return _mlx5e_resume(actual_adev); - return 0; + err = _mlx5e_resume(actual_adev); + return err; } static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) @@ -6912,9 +6912,16 @@ static int mlx5e_probe(struct auxiliary_device *adev, return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) - return _mlx5e_probe(actual_adev); + if (actual_adev) { + err = _mlx5e_probe(actual_adev); + if (err) + goto sd_cleanup; + } return 0; + +sd_cleanup: + mlx5_sd_cleanup(mdev); + return err; } static void _mlx5e_remove(struct auxiliary_device *adev) -- cgit v1.2.3 From d466ddda5500b6b8ae060909d2317811f2c32a6a Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 4 May 2026 21:02:06 +0300 Subject: net/mlx5e: SD, Fix race condition in secondary device probe/remove When utilizing Socket-Direct single netdev functionality the driver resolves the actual auxiliary device using mlx5_sd_get_adev(). However, the current implementation returns the primary ETH auxiliary device without holding the device lock, leading to a potential race condition where the ETH device could be unbound or removed concurrently during probe, suspend, resume, or remove operations.[1] Fix this by introducing mlx5_sd_put_adev() and updating mlx5_sd_get_adev() so that secondaries devices would get a ref and acquire the device lock of the returned auxiliary device. After the lock is acquired, a second devcom check is needed[2]. In addition, update The callers to pair the get operation with the new put operation, ensuring the lock is held while the auxiliary device is being operated on and released afterwards. The "primary" designation is determined once in sd_register(). It's set before devcom is marked ready, and it never changes after that. In Addition, The primary path never locks a secondary: When the primary device invoke mlx5_sd_get_adev(), it sees dev == primary and returns. no additional lock is taken. Therefore lock ordering is always: secondary_lock -> primary_lock. The reverse never happens, so ABBA deadlock is impossible. [1] for example: BUG: kernel NULL pointer dereference, address: 0000000000000370 PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP CPU: 4 UID: 0 PID: 3945 Comm: bash Not tainted 6.19.0-rc3+ #1 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5e_dcbnl_dscp_app+0x23/0x100 [mlx5_core] Call Trace: mlx5e_remove+0x82/0x12a [mlx5_core] device_release_driver_internal+0x194/0x1f0 bus_remove_device+0xc6/0x140 device_del+0x159/0x3c0 ? devl_param_driverinit_value_get+0x29/0x80 mlx5_rescan_drivers_locked+0x92/0x160 [mlx5_core] mlx5_unregister_device+0x34/0x50 [mlx5_core] mlx5_uninit_one+0x43/0xb0 [mlx5_core] remove_one+0x4e/0xc0 [mlx5_core] pci_device_remove+0x39/0xa0 device_release_driver_internal+0x194/0x1f0 unbind_store+0x99/0xa0 kernfs_fop_write_iter+0x12e/0x1e0 vfs_write+0x215/0x3d0 ksys_write+0x5f/0xd0 do_syscall_64+0x55/0xe90 entry_SYSCALL_64_after_hwframe+0x4b/0x53 [2] CPU0 (primary) CPU1 (secondary) ========================================================================== mlx5e_remove() (device_lock held) mlx5e_remove() (2nd device_lock held) mlx5_sd_get_adev() mlx5_devcom_comp_is_ready() => true device_lock(primary) mlx5_sd_get_adev() ==> ret adev _mlx5e_remove() mlx5_sd_cleanup() // mlx5e_remove finished // releasing device_lock //need another check here... mlx5_devcom_comp_is_ready() => false Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group") Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260504180206.268568-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++- drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c | 39 +++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h | 2 ++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 62b70334a13d..8f2b3abe0092 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6774,8 +6774,10 @@ static int mlx5e_resume(struct auxiliary_device *adev) return err; actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); - if (actual_adev) + if (actual_adev) { err = _mlx5e_resume(actual_adev); + mlx5_sd_put_adev(actual_adev, adev); + } return err; } @@ -6815,6 +6817,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) err = _mlx5e_suspend(actual_adev, false); mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); return err; } @@ -6916,11 +6920,14 @@ static int mlx5e_probe(struct auxiliary_device *adev, err = _mlx5e_probe(actual_adev); if (err) goto sd_cleanup; + mlx5_sd_put_adev(actual_adev, adev); } return 0; sd_cleanup: mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); return err; } @@ -6973,6 +6980,8 @@ static void mlx5e_remove(struct auxiliary_device *adev) _mlx5e_remove(actual_adev); mlx5_sd_cleanup(mdev); + if (actual_adev) + mlx5_sd_put_adev(actual_adev, adev); } static const struct auxiliary_device_id mlx5e_id_table[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 89b7e4d67303..6e199161b008 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -562,22 +562,55 @@ out_unlock: sd_cleanup(dev); } +/* Lock order: + * primary: actual_adev_lock -> SD devcom comp lock + * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock + * The two locks are never held together, so no ABBA. + */ struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, struct auxiliary_device *adev, int idx) { struct mlx5_sd *sd = mlx5_get_sd(dev); struct mlx5_core_dev *primary; + struct mlx5_adev *primary_adev; if (!sd) return adev; - if (!mlx5_devcom_comp_is_ready(sd->devcom)) + mlx5_devcom_comp_lock(sd->devcom); + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { + mlx5_devcom_comp_unlock(sd->devcom); return NULL; + } primary = mlx5_sd_get_primary(dev); - if (dev == primary) + if (!primary || dev == primary) { + mlx5_devcom_comp_unlock(sd->devcom); return adev; + } + + primary_adev = primary->priv.adev[idx]; + get_device(&primary_adev->adev.dev); + mlx5_devcom_comp_unlock(sd->devcom); - return &primary->priv.adev[idx]->adev; + device_lock(&primary_adev->adev.dev); + /* Primary may have completed remove between dropping devcom and + * acquiring device_lock; recheck. + */ + if (!mlx5_devcom_comp_is_ready(sd->devcom)) { + device_unlock(&primary_adev->adev.dev); + put_device(&primary_adev->adev.dev); + return NULL; + } + return &primary_adev->adev; +} + +void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, + struct auxiliary_device *adev) +{ + if (actual_adev != adev) { + device_unlock(&actual_adev->dev); + put_device(&actual_adev->dev); + } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h index 137efaf9aabc..9bfd5b9756b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h @@ -15,6 +15,8 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, struct auxiliary_device *adev, int idx); +void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, + struct auxiliary_device *adev); int mlx5_sd_init(struct mlx5_core_dev *dev); void mlx5_sd_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 525cb7ba6661074c1c5cc3772bccc6afab6791ef Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 5 May 2026 05:34:03 +0000 Subject: platform/chrome: cros_ec_typec: Init mutex in Thunderbolt registration cros_typec_register_thunderbolt() missed initializing the `adata->lock` mutex. This leads to a NULL dereference when the mutex is later acquired (e.g. in cros_typec_altmode_work()). Initialize the mutex in cros_typec_register_thunderbolt() to fix the issue. Cc: stable@vger.kernel.org Fixes: 3b00be26b16a ("platform/chrome: cros_ec_typec: Thunderbolt support") Reviewed-by: Benson Leung Reviewed-by: Abhishek Pandit-Subedi Link: https://lore.kernel.org/r/20260505053403.3335740-1-tzungbi@kernel.org Signed-off-by: Tzung-Bi Shih --- drivers/platform/chrome/cros_typec_altmode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/chrome/cros_typec_altmode.c b/drivers/platform/chrome/cros_typec_altmode.c index 557340b53af0..66c546bf89b5 100644 --- a/drivers/platform/chrome/cros_typec_altmode.c +++ b/drivers/platform/chrome/cros_typec_altmode.c @@ -359,6 +359,7 @@ cros_typec_register_thunderbolt(struct cros_typec_port *port, } INIT_WORK(&adata->work, cros_typec_altmode_work); + mutex_init(&adata->lock); adata->alt = alt; adata->port = port; adata->ap_mode_entry = true; -- cgit v1.2.3 From 60c71369ee356d11ad845ddeb28ceb70ec6cd70e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 11 Mar 2026 17:39:56 -0700 Subject: powerpc/vdso: Drop -DCC_USING_PATCHABLE_FUNCTION_ENTRY from 32-bit flags with clang After commit 73cdf24e81e4 ("powerpc64: make clang cross-build friendly"), building 64-bit little endian + CONFIG_COMPAT=y with clang results in many warnings along the lines of: $ cat arch/powerpc/configs/compat.config CONFIG_COMPAT=y $ make -skj"$(nproc)" ARCH=powerpc LLVM=1 ppc64le_defconfig compat.config arch/powerpc/kernel/vdso/ ... In file included from :4: In file included from lib/vdso/gettimeofday.c:6: In file included from include/vdso/datapage.h:15: In file included from include/vdso/cache.h:5: arch/powerpc/include/asm/cache.h:77:8: warning: unknown attribute 'patchable_function_entry' ignored [-Wunknown-attributes] 77 | static inline u32 l1_icache_bytes(void) | ^~~~~~ include/linux/compiler_types.h:235:58: note: expanded from macro 'inline' 235 | #define inline inline __gnu_inline __inline_maybe_unused notrace | ^~~~~~~ include/linux/compiler_types.h:215:34: note: expanded from macro 'notrace' 215 | #define notrace __attribute__((patchable_function_entry(0, 0))) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ... arch/powerpc/Makefile adds -DCC_USING_PATCHABLE_FUNCTION_ENTRY to KBUILD_CPPFLAGS, which is inherited by the 32-bit vDSO. However, the 32-bit little endian target does not support '-fpatchable-function-entry', resulting in the warnings above. Remove -DCC_USING_PATCHABLE_FUNCTION_ENTRY from the 32-bit vDSO flags when building with clang to avoid the warnings. Fixes: 73cdf24e81e4 ("powerpc64: make clang cross-build friendly") Signed-off-by: Nathan Chancellor Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260311-ppc-vdso-drop-cc-using-pfe-define-clang-v1-1-66c790e22650@kernel.org --- arch/powerpc/kernel/vdso/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 8834dfe9d727..368759f81708 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -62,6 +62,12 @@ CC32FLAGSREMOVE += -fno-stack-clash-protection # 32-bit one. clang validates the values passed to these arguments during # parsing, even when -fno-stack-protector is passed afterwards. CC32FLAGSREMOVE += -mstack-protector-guard% +# ftrace is disabled for the vdso but arch/powerpc/Makefile adds this define to +# KBUILD_CPPFLAGS, which enables use of the 'patchable_function_entry' +# attribute in the 'inline' define via 'notrace'. This attribute is not +# supported for the powerpcle target, resulting in many instances of +# -Wunknown-attributes. +CC32FLAGSREMOVE += -DCC_USING_PATCHABLE_FUNCTION_ENTRY endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__ -- cgit v1.2.3 From 8333e4916040e529bd5b56b82d573aba51e88a14 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Mar 2026 14:08:24 +0100 Subject: powerpc/ps3: Drop redundant result assignment Return value of ps3_start_probe_thread() is not used, so code can be simplified to fix W=1 clang warnings: arch/powerpc/platforms/ps3/device-init.c:953:6: error: variable 'result' set but not used [-Werror,-Wunused-but-set-variable] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260317130823.240279-3-krzysztof.kozlowski@oss.qualcomm.com --- arch/powerpc/platforms/ps3/device-init.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 12c473768c39..9109c218a060 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -950,8 +950,6 @@ static int __init ps3_start_probe_thread(enum ps3_bus_type bus_type) static int __init ps3_register_devices(void) { - int result; - if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) return -ENODEV; @@ -959,7 +957,7 @@ static int __init ps3_register_devices(void) /* ps3_repository_dump_bus_info(); */ - result = ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE); + ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE); ps3_register_vuart_devices(); -- cgit v1.2.3 From f583bd5f64d40e083dde5bb22846c4d93e59d471 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Mar 2026 14:08:25 +0100 Subject: powerpc/pasemi: Drop redundant res assignment Return value of pas_add_bridge() is not used, so code can be simplified to fix W=1 clang warnings: arch/powerpc/platforms/pasemi/pci.c:275:6: error: variable 'res' set but not used [-Werror,-Wunused-but-set-variable] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260317130823.240279-4-krzysztof.kozlowski@oss.qualcomm.com --- arch/powerpc/platforms/pasemi/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 60f990a336c4..2df955274652 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -272,13 +272,12 @@ void __init pas_pci_init(void) { struct device_node *root = of_find_node_by_path("/"); struct device_node *np; - int res; pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS); np = of_find_compatible_node(root, NULL, "pasemi,rootbus"); if (np) { - res = pas_add_bridge(np); + pas_add_bridge(np); of_node_put(np); } of_node_put(root); -- cgit v1.2.3 From d73a9a63f9f7f7c17637731fd28daf3665992d1e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:15 +0300 Subject: xsk: reject sw-csum UMEM binding to IFF_TX_SKB_NO_LINEAR devices skb_checksum_help() is a common helper that writes the folded 16-bit checksum back via skb->data + csum_start + csum_offset, i.e. it relies on the skb's linear head and fails (with WARN_ONCE and -EINVAL) when skb_headlen() is 0. AF_XDP generic xmit takes two very different paths depending on the netdev. Drivers that advertise IFF_TX_SKB_NO_LINEAR (e.g. virtio_net) skip the "copy payload into a linear head" step on purpose as a performance optimisation: xsk_build_skb_zerocopy() only attaches UMEM pages as frags and never calls skb_put(), so skb_headlen() stays 0 for the whole skb. For these skbs there is simply no linear area for skb_checksum_help() to write the csum into - the sw-csum fallback is structurally inapplicable. The patch tries to catch this and reject the combination with error at setup time. Rejecting at bind() converts this silent per-packet failure into a synchronous, actionable -EOPNOTSUPP at setup time. HW csum and launch_time metadata on IFF_TX_SKB_NO_LINEAR drivers are unaffected because they do not call skb_checksum_help(). Without the patch, every descriptor carrying 'XDP_TX_METADATA | XDP_TXMD_FLAGS_CHECKSUM' produces: 1) a WARN_ONCE "offset (N) >= skb_headlen() (0)" from skb_checksum_help(), 2) sendmsg() returning -EINVAL without consuming the descriptor (invalid_descs is not incremented), 3) a wedged TX ring: __xsk_generic_xmit() does not advance the consumer on non-EOVERFLOW errors, so the next sendmsg() re-reads the same descriptor and re-hits the same WARN until the socket is closed. Closes: https://lore.kernel.org/all/20260419045822.843BFC2BCAF@smtp.kernel.org/#t Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Fixes: 30c3055f9c0d ("xsk: wrap generic metadata handling onto separate function") Link: https://patch.msgid.link/20260502200722.53960-2-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index cd7bc50872f6..d981cfdd8535 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (force_zc && force_copy) return -EINVAL; + if (pool->tx_sw_csum && (netdev->priv_flags & IFF_TX_SKB_NO_LINEAR)) + return -EOPNOTSUPP; + if (xsk_get_pool_from_qid(netdev, queue_id)) return -EBUSY; -- cgit v1.2.3 From 0bb7a9caf5c1d6e25ba376ea6b39261ad28550f4 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:16 +0300 Subject: xsk: free the skb when hitting the upper bound MAX_SKB_FRAGS Fix it by explicitly adding kfree_skb() before returning back to its caller. How to reproduce it in virtio_net: 1. the current skb is the first one (which means xs->skb is NULL) and hit the limit MAX_SKB_FRAGS. 2. xsk_build_skb_zerocopy() returns -EOVERFLOW. 3. the caller xsk_build_skb() clears skb by using 'skb = NULL;'. This is why bug can be triggered. 4. there is no chance to free this skb anymore. Note that if in this case the xs->skb is not NULL, xsk_build_skb() will call xsk_drop_skb(xs->skb) to do the right thing. Fixes: cf24f5a5feea ("xsk: add support for AF_XDP multi-buffer on Tx path") Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-3-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 887abed25466..d706b1e0bf60 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -856,8 +856,11 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, addr = buffer - pool->addrs; for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { - if (unlikely(i >= MAX_SKB_FRAGS)) + if (unlikely(i >= MAX_SKB_FRAGS)) { + if (!xs->skb) + kfree_skb(skb); return ERR_PTR(-EOVERFLOW); + } page = pool->umem->pgs[addr >> PAGE_SHIFT]; get_page(page); -- cgit v1.2.3 From 8cd3c1c6e7d9a1f0954159ec5f2fdaa7f6a48bd8 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:17 +0300 Subject: xsk: handle NULL dereference of the skb without frags issue When a first descriptor (xs->skb == NULL) triggers -EOVERFLOW in xsk_build_skb_zerocopy() (e.g., MAX_SKB_FRAGS exceeded), the free_err -EOVERFLOW handler unconditionally dereferences xs->skb via xsk_inc_num_desc(xs->skb) and xsk_drop_skb(xs->skb), causing a NULL pointer dereference. Fix this by guarding the existing xsk_inc_num_desc()/xsk_drop_skb() calls with an xs->skb check (for the continuation case), and add an else branch for the first-descriptor case that manually cancels the one reserved CQ slot and increments invalid_descs by one to account for the single invalid descriptor. Fixes: cf24f5a5feea ("xsk: add support for AF_XDP multi-buffer on Tx path") Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-4-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d706b1e0bf60..06ee260f3afc 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -976,9 +976,14 @@ free_err: kfree_skb(skb); if (err == -EOVERFLOW) { - /* Drop the packet */ - xsk_inc_num_desc(xs->skb); - xsk_drop_skb(xs->skb); + if (xs->skb) { + /* Drop the packet */ + xsk_inc_num_desc(xs->skb); + xsk_drop_skb(xs->skb); + } else { + xsk_cq_cancel_locked(xs->pool, 1); + xs->tx->invalid_descs++; + } xskq_cons_release(xs->tx); } else { /* Let application retry */ -- cgit v1.2.3 From 0f3776583d282550dbafe6082a914efcf9094d59 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:18 +0300 Subject: xsk: fix use-after-free of xs->skb in xsk_build_skb() free_err path When xsk_build_skb() processes multi-buffer packets in copy mode, the first descriptor stores data into the skb linear area without adding any frags, so nr_frags stays at 0. The caller then sets xs->skb = skb to accumulate subsequent descriptors. If a continuation descriptor fails (e.g. alloc_page returns NULL with -EAGAIN), we jump to free_err where the condition: if (skb && !skb_shinfo(skb)->nr_frags) kfree_skb(skb); evaluates to true because nr_frags is still 0 (the first descriptor used the linear area, not frags). This frees the skb while xs->skb still points to it, creating a dangling pointer. On the next transmit attempt or socket close, xs->skb is dereferenced, causing a use-after-free or double-free. Fix by using a !xs->skb check to handle first frag situation, ensuring we only free skbs that were freshly allocated in this call (xs->skb is NULL) and never free an in-progress multi-buffer skb that the caller still references. Closes: https://lore.kernel.org/all/20260415082654.21026-4-kerneljasonxing@gmail.com/ Fixes: 6b9c129c2f93 ("xsk: remove @first_frag from xsk_build_skb()") Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-5-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 06ee260f3afc..55378c3855d5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -972,7 +972,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, return skb; free_err: - if (skb && !skb_shinfo(skb)->nr_frags) + if (skb && !xs->skb) kfree_skb(skb); if (err == -EOVERFLOW) { -- cgit v1.2.3 From 3dec153ae484e3b2ddac841156e197ba54c8df94 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:19 +0300 Subject: xsk: prevent CQ desync when freeing half-built skbs in xsk_build_skb() Once xsk_skb_init_misc() has been called on an skb, its destructor is set to xsk_destruct_skb(), which submits the descriptor address(es) to the completion queue and advances the CQ producer. If such an skb is subsequently freed via kfree_skb() along an error path - before the skb has ever been handed to the driver - the destructor still runs and submits a bogus, half-initialized address to the CQ. Postpone the init phase when we believe the allocation of first frag is successfully completed. Before this init, skb can be safely freed by kfree_skb(). Closes: https://lore.kernel.org/all/20260419045822.843BFC2BCAF@smtp.kernel.org/ Fixes: c30d084960cf ("xsk: avoid overwriting skb fields for multi-buffer traffic") Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-6-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 55378c3855d5..af3c5752bb63 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -819,8 +819,6 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); skb_reserve(skb, hr); - - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, pool, hr); if (unlikely(err)) @@ -917,7 +915,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, if (unlikely(err)) goto free_err; - xsk_skb_init_misc(skb, xs, desc->addr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, xs->pool, hr); @@ -967,6 +964,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } + if (!xs->skb) + xsk_skb_init_misc(skb, xs, desc->addr); xsk_inc_num_desc(skb); return skb; -- cgit v1.2.3 From 8c2cff50afdd2b53c7cc2ca2297301c0ffd3e802 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:20 +0300 Subject: xsk: avoid skb leak in XDP_TX_METADATA case Fix it by explicitly adding kfree_skb() before returning back to its caller. How to reproduce it in virtio_net: 1. the current skb is the first one (which means no frag and xs->skb is NULL) and users enable metadata feature. 2. xsk_skb_metadata() returns a error code. 3. the caller xsk_build_skb() clears skb by using 'skb = NULL;'. 4. there is no chance to free this skb anymore. Closes: https://lore.kernel.org/all/20260415085204.3F87AC19424@smtp.kernel.org/ Fixes: 30c3055f9c0d ("xsk: wrap generic metadata handling onto separate function") Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-7-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index af3c5752bb63..770ba4695a9d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -821,8 +821,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, skb_reserve(skb, hr); if (desc->options & XDP_TX_METADATA) { err = xsk_skb_metadata(skb, buffer, desc, pool, hr); - if (unlikely(err)) + if (unlikely(err)) { + kfree_skb(skb); return ERR_PTR(err); + } } } else { struct xsk_addrs *xsk_addr; -- cgit v1.2.3 From e0f229025a8e774a695017a376c4a01279c0e66e Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:21 +0300 Subject: xsk: fix xsk_addrs slab leak on multi-buffer error path When xsk_build_skb() / xsk_build_skb_zerocopy() sees the first continuation descriptor, it promotes destructor_arg from an inlined address to a freshly allocated xsk_addrs (num_descs = 1). The counter is bumped to >= 2 only at the very end of a successful build (by calling xsk_inc_num_desc()). If the build fails in between (e.g. alloc_page() returns NULL with -EAGAIN, or the MAX_SKB_FRAGS overflow hits), we jump to free_err, skip calling xsk_inc_num_desc() to increment num_descs and leave the half-built skb attached to xs->skb for the app to retry. The skb now has 1) destructor_arg = a real xsk_addrs pointer, 2) num_descs = 1 If the app never retries and just close()s the socket, xsk_release() calls xsk_drop_skb() -> xsk_consume_skb(), which decides whether to free xsk_addrs by testing num_descs > 1: if (unlikely(num_descs > 1)) kmem_cache_free(xsk_tx_generic_cache, destructor_arg); Because num_descs is exactly 1 the branch is skipped and the xsk_addrs object is leaked to the xsk_tx_generic_cache slab. Fix it by directly testing if destructor_arg is still addr. Or else it is modified and used to store the newly allocated memory from xsk_tx_generic_cache regardless of increment of num_desc, which we need to handle. Closes: https://lore.kernel.org/all/20260419045824.D9E5EC2BCAF@smtp.kernel.org/ Fixes: 0ebc27a4c67d ("xsk: avoid data corruption on cq descriptor number") Acked-by: Stanislav Fomichev Signed-off-by: Jason Xing Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-8-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 770ba4695a9d..079abd4bcb69 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -685,7 +685,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; for (i = 0; i < num_descs; i++) { @@ -740,7 +740,7 @@ static void xsk_consume_skb(struct sk_buff *skb) u32 num_descs = xsk_get_num_desc(skb); struct xsk_addrs *xsk_addr; - if (unlikely(num_descs > 1)) { + if (unlikely(!xsk_skb_destructor_is_addr(skb))) { xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; kmem_cache_free(xsk_tx_generic_cache, xsk_addr); } -- cgit v1.2.3 From 203cee647f551abc87b992045cd920b117ff990a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 2 May 2026 23:07:22 +0300 Subject: xsk: fix u64 descriptor address truncation on 32-bit architectures In copy mode TX, xsk_skb_destructor_set_addr() stores the 64-bit descriptor address into skb_shinfo(skb)->destructor_arg (void *) via a uintptr_t cast: skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); On 32-bit architectures uintptr_t is 32 bits, so the upper 32 bits of the descriptor address are silently dropped. In XDP_ZEROCOPY unaligned mode the chunk offset is encoded in bits 48-63 of the descriptor address (XSK_UNALIGNED_BUF_OFFSET_SHIFT = 48), meaning the offset is lost entirely. The completion queue then returns a truncated address to userspace, making buffer recycling impossible. Fix this by handling the 32-bit case directly in xsk_skb_destructor_set_addr(): when !CONFIG_64BIT, allocate an xsk_addrs struct (the same path already used for multi-descriptor SKBs) to store the full u64 address. The existing tagged-pointer logic in xsk_skb_destructor_is_addr() stays unchanged: slab pointers returned from kmem_cache_zalloc() are always word-aligned and therefore have bit 0 clear, which correctly identifies them as a struct pointer rather than an inline tagged address on every architecture. Factor the shared kmem_cache_zalloc + destructor_arg assignment into __xsk_addrs_alloc() and add a wrapper xsk_addrs_alloc() that handles the inline-to-list upgrade (is_addr check + get_addr + num_descs = 1). The three former open-coded kmem_cache_zalloc call sites now reduce to a single call each. Propagate the -ENOMEM from xsk_skb_destructor_set_addr() through xsk_skb_init_misc() so the caller can clean up the skb via kfree_skb() before skb->destructor is installed. The overhead is one extra kmem_cache_zalloc per first descriptor on 32-bit only; 64-bit builds are completely unchanged. Closes: https://lore.kernel.org/all/20260419045824.D9E5EC2BCAF@smtp.kernel.org/ Fixes: 0ebc27a4c67d ("xsk: avoid data corruption on cq descriptor number") Signed-off-by: Jason Xing Acked-by: Stanislav Fomichev Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260502200722.53960-9-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 88 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 079abd4bcb69..5e5786cd9af5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -646,9 +646,42 @@ static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb) return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); } -static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +static struct xsk_addrs *__xsk_addrs_alloc(struct sk_buff *skb, u64 addr) { - skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + struct xsk_addrs *xsk_addr; + + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); + if (unlikely(!xsk_addr)) + return NULL; + + xsk_addr->addrs[0] = addr; + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + return xsk_addr; +} + +static struct xsk_addrs *xsk_addrs_alloc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (!xsk_skb_destructor_is_addr(skb)) + return (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + + xsk_addr = __xsk_addrs_alloc(skb, xsk_skb_destructor_get_addr(skb)); + if (likely(xsk_addr)) + xsk_addr->num_descs = 1; + return xsk_addr; +} + +static int xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); + return 0; + } + + if (unlikely(!__xsk_addrs_alloc(skb, addr))) + return -ENOMEM; + return 0; } static void xsk_inc_num_desc(struct sk_buff *skb) @@ -724,14 +757,20 @@ void xsk_destruct_skb(struct sk_buff *skb) sock_wfree(skb); } -static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, - u64 addr) +static int xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, + u64 addr) { + int err; + + err = xsk_skb_destructor_set_addr(skb, addr); + if (unlikely(err)) + return err; + skb->dev = xs->dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; - xsk_skb_destructor_set_addr(skb, addr); + return 0; } static void xsk_consume_skb(struct sk_buff *skb) @@ -829,18 +868,9 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, } else { struct xsk_addrs *xsk_addr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) - return ERR_PTR(-ENOMEM); - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; - } + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); /* in case of -EOVERFLOW that could happen below, * xsk_consume_skb() will release this node as whole skb @@ -929,19 +959,10 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct page *page; u8 *vaddr; - if (xsk_skb_destructor_is_addr(skb)) { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, - GFP_KERNEL); - if (!xsk_addr) { - err = -ENOMEM; - goto free_err; - } - - xsk_addr->num_descs = 1; - xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); - skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; - } else { - xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + xsk_addr = xsk_addrs_alloc(skb); + if (!xsk_addr) { + err = -ENOMEM; + goto free_err; } if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { @@ -966,8 +987,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } - if (!xs->skb) - xsk_skb_init_misc(skb, xs, desc->addr); + if (!xs->skb) { + err = xsk_skb_init_misc(skb, xs, desc->addr); + if (unlikely(err)) + goto free_err; + } xsk_inc_num_desc(skb); return skb; -- cgit v1.2.3 From bd3c45dd01283ada23b0a388c578dcf5600deb8a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 23 Apr 2026 18:53:49 +0200 Subject: timers/migration: Fix another hotplug activation race The hotplug control CPU is assumed to be active in the hierarchy but that doesn't imply that the root is active. If the current CPU is not the one that activated the current hierarchy, and the CPU performing this duty is still halfway through the tree, the root may still be observed inactive. And this can break the activation of a new root as in the following scenario: 1) Initially, the whole system has 64 CPUs and only CPU 63 is awake. [GRP1:0] active / | \ / | \ [GRP0:0] [...] [GRP0:7] idle idle active / | \ | CPU 0 CPU 1 ... CPU 63 idle idle active 2) CPU 63 goes idle _but_ due to a #VMEXIT it hasn't yet reached the [GRP1:0]->parent dereference (that would be NULL and stop the walk) in __walk_groups_from(). [GRP1:0] idle / | \ / | \ [GRP0:0] [...] [GRP0:7] idle idle idle / | \ | CPU 0 CPU 1 ... CPU 63 idle idle idle 3) CPU 1 wakes up, activates GRP0:0 but didn't yet manage to propagate up to GRP1:0 due to yet another #VMEXIT. [GRP1:0] idle / | \ / | \ [GRP0:0] [...] [GRP0:7] active idle idle / | \ | CPU 0 CPU 1 ... CPU 63 idle active idle 3) CPU 0 wakes up and doesn't need to walk above GRP0:0 as it's CPU 1 role. [GRP1:0] idle / | \ / | \ [GRP0:0] [...] [GRP0:7] active idle idle / | \ | CPU 0 CPU 1 ... CPU 63 active active idle 4) CPU 0 boots CPU 64. It creates a new root for it. [GRP2:0] idle / \ / \ [GRP1:0] [GRP1:1] idle idle / | \ \ / | \ \ [GRP0:0] [...] [GRP0:7] [GRP0:8] active idle idle idle / | \ | | CPU 0 CPU 1 ... CPU 63 CPU 64 active active idle offline 5) CPU 0 activates the new root, but note that GRP1:0 is still idle, waiting for CPU 1 to resume from #VMEXIT and activate it. [GRP2:0] active / \ / \ [GRP1:0] [GRP1:1] idle idle / | \ \ / | \ \ [GRP0:0] [...] [GRP0:7] [GRP0:8] active idle idle idle / | \ | | CPU 0 CPU 1 ... CPU 63 CPU 64 active active idle offline 6) CPU 63 resumes after #VMEXIT and sees the new GRP1:0 parent. Therefore it propagates the stale inactive state of GRP1:0 up to GRP2:0. [GRP2:0] idle / \ / \ [GRP1:0] [GRP1:1] idle idle / | \ \ / | \ \ [GRP0:0] [...] [GRP0:7] [GRP0:8] active idle idle idle / | \ | | CPU 0 CPU 1 ... CPU 63 CPU 64 active active idle offline 7) CPU 1 resumes after #VMEXIT and finally activates GRP1:0. But it doesn't observe its parent link because no ordering enforced that. Therefore GRP2:0 is spuriously left idle. [GRP2:0] idle / \ / \ [GRP1:0] [GRP1:1] active idle / | \ \ / | \ \ [GRP0:0] [...] [GRP0:7] [GRP0:8] active idle idle idle / | \ | | CPU 0 CPU 1 ... CPU 63 CPU 64 active active idle offline Such races are highly theoretical and the problem would solve itself once the old root ever becomes idle again. But it still leaves a taste of discomfort. Fix it with enforcing a fully ordered atomic read of the old root state before propagating the activate state up to the new root. It has a two directions ordering effect: * Acquire + release of the latest old root state: If the hotplug control CPU is not the one that woke up the old root, make sure to acquire its active state and propagate it upwards through the ordered chain of activation (the acquire pairs with the cmpxchg() in tmigr_active_up() and subsequent releases will pair with atomic_read_acquire() and smp_mb__after_atomic() in tmigr_inactive_up()). * Release: If the hotplug control CPU is not the one that must wake up the old root, but the CPU covering that is lagging behind its duty, publish the links from the old root to the new parents. This way the lagging CPU will propagate the active state itself. Fixes: 7ee988770326 ("timers: Implement the hierarchical pull model") Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260423165354.95152-2-frederic@kernel.org --- kernel/time/timer_migration.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 155eeaea4113..1d0d3a4058d5 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -1860,19 +1860,37 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node, * child to the new parents. So tmigr_active_up() activates the * new parents while walking up from the old root to the new. * - * * It is ensured that @start is active, as this setup path is - * executed in hotplug prepare callback. This is executed by an - * already connected and !idle CPU. Even if all other CPUs go idle, - * the CPU executing the setup will be responsible up to current top - * level group. And the next time it goes inactive, it will release - * the new childmask and parent to subsequent walkers through this - * @child. Therefore propagate active state unconditionally. + * * It is ensured that @start is active, (or on the way to be activated + * by another CPU that woke up before the current one) as this setup path + * is executed in hotplug prepare callback. This is executed by an already + * connected and !idle CPU in the hierarchy. + * + * * The below RmW atomic operation ensures that: + * + * 1) If the old root has been completely activated, the latest state is + * acquired (the below implicit acquire pairs with the implicit release + * from cmpxchg() in tmigr_active_up()). + * + * 2) If the old root is still on the way to be activated, the lagging behind + * CPU performing the activation will acquire the links up to the new root. + * (The below implicit release pairs with the implicit acquire from cmpxchg() + * in tmigr_active_up()). + * + * 3) Every subsequent CPU below the old root will acquire the new links while + * walking through the old root (The below implicit release pairs with the + * implicit acquire from cmpxchg() in either tmigr_active_up()) or + * tmigr_inactive_up(). */ - state.state = atomic_read(&start->migr_state); - WARN_ON_ONCE(!state.active); + state.state = atomic_fetch_or(0, &start->migr_state); WARN_ON_ONCE(!start->parent); - data.childmask = start->groupmask; - __walk_groups_from(tmigr_active_up, &data, start, start->parent); + /* + * If the state of the old root is inactive, another CPU is on its way to activate + * it and propagate to the new root. + */ + if (state.active) { + data.childmask = start->groupmask; + __walk_groups_from(tmigr_active_up, &data, start, start->parent); + } } /* Root update */ -- cgit v1.2.3 From 92429ca999db99febced82f23362a71b2ba4c1d8 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Wed, 6 May 2026 00:15:48 -0300 Subject: ALSA: seq: Fix UMP group 16 filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequencer UAPI defines group_filter as an unsigned int bitmap. Bit 0 filters groupless messages and bits 1-16 filter UMP groups 1-16. The internal snd_seq_client storage is only unsigned short, so bit 16 is truncated when userspace sets the filter. The same truncation affects the automatic UMP client filter used to avoid delivery to inactive groups, so events for group 16 cannot be filtered. Store the internal bitmap as unsigned int and keep both userspace-provided and automatically generated values limited to the defined UAPI bits. Fixes: d2b706077792 ("ALSA: seq: Add UMP group filter") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260506-alsa-seq-ump-group16-filter-v1-1-b75160bf6993@gmail.com Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 2 +- sound/core/seq/seq_clientmgr.h | 5 ++++- sound/core/seq/seq_ump_client.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 75a7a2af9d8c..5719637575a9 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1253,7 +1253,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3)) client->midi_version = client_info->midi_version; memcpy(client->event_filter, client_info->event_filter, 32); - client->group_filter = client_info->group_filter; + client->group_filter = client_info->group_filter & SND_SEQ_GROUP_FILTER_MASK; /* notify the change */ snd_seq_system_client_ev_client_change(client->number); diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index ece02c58db70..feea8bb7d987 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h @@ -14,6 +14,9 @@ /* client manager */ +#define SND_SEQ_GROUP_FILTER_MASK GENMASK(SNDRV_UMP_MAX_GROUPS, 0) +#define SND_SEQ_GROUP_FILTER_GROUPS GENMASK(SNDRV_UMP_MAX_GROUPS, 1) + struct snd_seq_user_client { struct file *file; /* file struct of client */ /* ... */ @@ -40,7 +43,7 @@ struct snd_seq_client { int number; /* client number */ unsigned int filter; /* filter flags */ DECLARE_BITMAP(event_filter, 256); - unsigned short group_filter; + unsigned int group_filter; snd_use_lock_t use_lock; int event_lost; /* ports */ diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index fdc76f23e03f..9079ccfdc866 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -369,7 +369,7 @@ static void setup_client_group_filter(struct seq_ump_client *client) cptr = snd_seq_kernel_client_get(client->seq_client); if (!cptr) return; - filter = ~(1U << 0); /* always allow groupless messages */ + filter = SND_SEQ_GROUP_FILTER_GROUPS; /* always allow groupless messages */ for (p = 0; p < SNDRV_UMP_MAX_GROUPS; p++) { if (client->ump->groups[p].active) filter &= ~(1U << (p + 1)); -- cgit v1.2.3 From 01801e20d69346e1e6cec0d908f1cea3a49e51b5 Mon Sep 17 00:00:00 2001 From: Rodrigo Faria Date: Tue, 5 May 2026 19:55:18 +0100 Subject: ALSA: hda/realtek: Add mute LED fixup for HP Pavilion 15-cs1xxx Add a new fixup for the mute LED on the HP Pavilion 15-cs1xxx series using the VREF on NID 0x1b. The BIOS on these models (tested up to F.32) incorrectly reports the mute LED on NID 0x18 via DMI OEM strings, which lacks VREF capabilities. This fixup overrides the LED pin to the correct NID 0x1b. Signed-off-by: Rodrigo Faria Link: https://patch.msgid.link/20260505185518.23625-1-rodrigofilipefaria@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index a7525ed83e2b..11d0ea8ed859 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -1669,6 +1669,21 @@ static void alc295_fixup_hp_mute_led_coefbit11(struct hda_codec *codec, } } +/* Override wrong pin to NID 0x1b (F.32 BIOS reports 0x18 via DMI OEM string) + * on HP pavilion 15-cs1xxx laptops + */ +static void alc295_fixup_hp_pavilion_mute_led_1b(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + alc269_fixup_hp_mute_led(codec, fix, action); + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->mute_led_nid = 0x1b; +} + static void alc233_fixup_lenovo_coef_micmute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3870,6 +3885,7 @@ enum { ALC290_FIXUP_SUBWOOFER, ALC290_FIXUP_SUBWOOFER_HSJACK, ALC295_FIXUP_HP_MUTE_LED_COEFBIT11, + ALC295_FIXUP_HP_PAVILION_MUTE_LED_1B, ALC269_FIXUP_THINKPAD_ACPI, ALC269_FIXUP_LENOVO_XPAD_ACPI, ALC269_FIXUP_DMIC_THINKPAD_ACPI, @@ -5715,6 +5731,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc295_fixup_hp_mute_led_coefbit11, }, + [ALC295_FIXUP_HP_PAVILION_MUTE_LED_1B] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc295_fixup_hp_pavilion_mute_led_1b, + }, [ALC298_FIXUP_SAMSUNG_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_samsung_amp, @@ -6931,6 +6951,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8548, "HP EliteBook x360 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x856a, "HP Pavilion 15-cs1xxx", ALC295_FIXUP_HP_PAVILION_MUTE_LED_1B), SND_PCI_QUIRK(0x103c, 0x85c6, "HP Pavilion x360 Convertible 14-dy1xxx", ALC295_FIXUP_HP_MUTE_LED_COEFBIT11), SND_PCI_QUIRK(0x103c, 0x85de, "HP Envy x360 13-ar0xxx", ALC285_FIXUP_HP_ENVY_X360), SND_PCI_QUIRK(0x103c, 0x8603, "HP Omen 17-cb0xxx", ALC285_FIXUP_HP_MUTE_LED), -- cgit v1.2.3 From 5337213381df578058e2e41da93cbd0e4639935f Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Wed, 6 May 2026 00:34:47 -0300 Subject: ALSA: core: Serialize deferred fasync state checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snd_fasync_helper() updates fasync->on under snd_fasync_lock, and snd_fasync_work_fn() now also evaluates fasync->on under the same lock. snd_kill_fasync() still tests the flag before taking the lock, leaving an unsynchronized read against FASYNC enable/disable updates. Move the enabled-state check into the locked section. Also clear fasync->on under snd_fasync_lock in snd_fasync_free() before unlinking the pending entry. Together with the locked sender-side check, this publishes teardown before flushing the deferred work and prevents a racing sender from requeueing the entry after free has started. Fixes: ef34a0ae7a26 ("ALSA: core: Add async signal helpers") Fixes: 8146cd333d23 ("ALSA: core: Fix potential data race at fasync handling") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260506-alsa-core-fasync-on-lock-v1-1-ea48c77d6ca4@gmail.com Signed-off-by: Takashi Iwai --- sound/core/misc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/misc.c b/sound/core/misc.c index 5aca09edf971..833124c8e4fa 100644 --- a/sound/core/misc.c +++ b/sound/core/misc.c @@ -148,9 +148,11 @@ EXPORT_SYMBOL_GPL(snd_fasync_helper); void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll) { - if (!fasync || !fasync->on) + if (!fasync) return; guard(spinlock_irqsave)(&snd_fasync_lock); + if (!fasync->on) + return; fasync->signal = signal; fasync->poll = poll; list_move(&fasync->list, &snd_fasync_list); @@ -163,8 +165,10 @@ void snd_fasync_free(struct snd_fasync *fasync) if (!fasync) return; - scoped_guard(spinlock_irq, &snd_fasync_lock) + scoped_guard(spinlock_irq, &snd_fasync_lock) { + fasync->on = 0; list_del_init(&fasync->list); + } flush_work(&snd_fasync_work); kfree(fasync); -- cgit v1.2.3 From 2bcbb163162789d3488562073dbb99d9bd71a762 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 5 May 2026 20:18:54 -0700 Subject: ALSA: sparc/dbri: add missing fallthrough Fixes compiler error with probably newer compilers: sound/sparc/dbri.c:595:2: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] 595 | case 1: | ^ sound/sparc/dbri.c:595:2: note: insert 'break;' to avoid fall-through 595 | case 1: | ^ | break; Signed-off-by: Rosen Penev Link: https://patch.msgid.link/20260506031854.780411-1-rosenp@gmail.com Signed-off-by: Takashi Iwai --- sound/sparc/dbri.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 75f82a92ff44..2f5f62079fa4 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -592,6 +592,7 @@ static __u32 reverse_bytes(__u32 b, int len) fallthrough; case 2: b = ((b & 0xaaaaaaaa) >> 1) | ((b & 0x55555555) << 1); + fallthrough; case 1: case 0: break; -- cgit v1.2.3 From 283fc9e44ff5b5ac967439b4951b80bd4299f4e4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 May 2026 15:15:34 +0200 Subject: wifi: mac80211: remove station if connection prep fails If connection preparation fails for MLO connections, then the interface is completely reset to non-MLD. In this case, we must not keep the station since it's related to the link of the vif being removed. Delete an existing station. Any "new_sta" is already being removed, so that doesn't need changes. This fixes a use-after-free/double-free in debugfs if that's enabled, because a vif going from MLD (and to MLD, but that's not relevant here) recreates its entire debugfs. Cc: stable@vger.kernel.org Fixes: 81151ce462e5 ("wifi: mac80211: support MLO authentication/association with one link") Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20260505151533.c4e52deb06ad.Iafe56cec7de8512626169496b134bce3a6c17010@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 298ebff6bbf8..0a0f27836d57 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -9149,7 +9149,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_link_data *link; - bool have_sta = false; + struct sta_info *have_sta = NULL; bool mlo; int err; u16 new_links; @@ -9168,11 +9168,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, mlo = false; } - if (assoc) { - rcu_read_lock(); + if (assoc) have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } if (mlo && !have_sta && WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) @@ -9336,6 +9333,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, out_release_chan: ieee80211_link_release_channel(link); out_err: + if (mlo && have_sta) + WARN_ON(__sta_info_destroy(have_sta)); ieee80211_vif_set_links(sdata, 0, 0); return err; } -- cgit v1.2.3 From 0f3c0a197309717d74729568f88957d448847937 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 May 2026 13:38:37 +0200 Subject: wifi: nl80211: fix NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST usage This is documented as a u8 and has a policy of NLA_U8, but uses nla_get_u32() which means it's completely broken on big-endian. Fix it to use nla_get_u8(). Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API") Link: https://patch.msgid.link/20260505113837.260159-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/wireless/pmsr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 4c8ea0583f94..d6cd0de64d1f 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) out->ftm.ftms_per_burst = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]); if (capa->ftm.max_ftms_per_burst && (out->ftm.ftms_per_burst > capa->ftm.max_ftms_per_burst || -- cgit v1.2.3 From 15994bb0cbb8fc4879da7552ddd08c1896261c39 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Wed, 6 May 2026 14:48:53 +0800 Subject: wifi: nl80211: require CAP_NET_ADMIN over the target netns in SET_WIPHY_NETNS NL80211_CMD_SET_WIPHY_NETNS dispatches with GENL_UNS_ADMIN_PERM, which verifies that the caller has CAP_NET_ADMIN for the source netns. It doesn't verify that the caller has CAP_NET_ADMIN over the target netns selected by NL80211_ATTR_NETNS_FD or NL80211_ATTR_PID. This diverges from the convention enforced in net/core/rtnetlink.c::rtnl_get_net_ns_capable(): /* For now, the caller is required to have CAP_NET_ADMIN in * the user namespace owning the target net ns. */ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) return ERR_PTR(-EACCES); A user with CAP_NET_ADMIN in their own user namespace can therefore push a wiphy into an arbitrary netns (including init_net) over which they have no privilege. Mirror the rtnetlink convention by requiring CAP_NET_ADMIN in the target netns before calling cfg80211_switch_netns(). Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260506064854.2207105-2-maoyixie.tju@gmail.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67088804dcc7..db546dd93d08 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13867,6 +13867,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(net)) return PTR_ERR(net); + /* + * The caller already has CAP_NET_ADMIN over the source netns + * (enforced by GENL_UNS_ADMIN_PERM on the genl op). Mirror the + * convention used by net/core/rtnetlink.c::rtnl_get_net_ns_capable() + * and require CAP_NET_ADMIN over the target netns as well, so that + * a caller that is privileged in their own user namespace cannot + * push a wiphy into a netns where they have no privilege. + */ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return -EPERM; + } + err = 0; /* check if anything to do */ -- cgit v1.2.3 From 79240f3f6d766b342b57c32397d643e1cfa26b81 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Wed, 6 May 2026 14:48:54 +0800 Subject: wifi: nl80211: re-check wiphy netns in nl80211_prepare_wdev_dump() continuation NL80211_CMD_GET_SCAN is implemented as a multi-call dumpit. The first invocation of nl80211_prepare_wdev_dump() validates the requested wdev against the caller's netns via __cfg80211_wdev_from_attrs(). Subsequent invocations look up the same wiphy by its global index and do not check that the wiphy is still in the caller's netns. Add the same filter to the continuation path. If the wiphy's netns no longer matches the caller's, return -ENODEV and the netlink dump machinery terminates the walk cleanly. Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260506064854.2207105-3-maoyixie.tju@gmail.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index db546dd93d08..7db9cd433801 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1276,6 +1276,18 @@ static int nl80211_prepare_wdev_dump(struct netlink_callback *cb, rtnl_unlock(); return -ENODEV; } + + /* + * The first invocation validated the wdev's netns against + * the caller via __cfg80211_wdev_from_attrs(). The wiphy + * may have moved netns between dumpit invocations (via + * NL80211_CMD_SET_WIPHY_NETNS), so re-check here. + */ + if (!net_eq(wiphy_net(wiphy), sock_net(cb->skb->sk))) { + rtnl_unlock(); + return -ENODEV; + } + *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; -- cgit v1.2.3 From 86f33ca9bea30cf011f2b1edad4593faea9c6e98 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 May 2026 16:22:38 +0800 Subject: ublk: validate physical_bs_shift, io_min_shift and io_opt_shift ublk_validate_params() checks logical_bs_shift is within [9, PAGE_SHIFT] but has no upper bound for physical_bs_shift, io_min_shift, or io_opt_shift. A malicious userspace can set any of these to a large value (e.g., 44), causing undefined behavior from `1 << shift` in ublk_ctrl_start_dev() since the result is stored in 32-bit unsigned int. Cap all three at ilog2(SZ_256M) (28). 256M is big enough to cover all practical block sizes, and originates from the maximum physical block size possible in NVMe (lba_size * (1 + npwg), where npwg is 16-bit). Also zero out ub->params with memset() when copy_from_user() fails or ublk_validate_params() returns error, so that no stale or partial params survive for a subsequent START_DEV to consume. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260506082238.22363-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d10460d29e4a..57ec900f0ce0 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -900,6 +900,20 @@ static int ublk_validate_params(const struct ublk_device *ub) if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9) return -EINVAL; + /* + * 256M is a reasonable upper bound for physical block size, + * io_min and io_opt; it aligns with the maximum physical + * block size possible in NVMe. + */ + if (p->physical_bs_shift > ilog2(SZ_256M)) + return -EINVAL; + + if (p->io_min_shift > ilog2(SZ_256M)) + return -EINVAL; + + if (p->io_opt_shift > ilog2(SZ_256M)) + return -EINVAL; + if (p->logical_bs_shift > p->physical_bs_shift) return -EINVAL; @@ -4992,13 +5006,15 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { + /* zero out partial copy so no stale params survive */ + memset(&ub->params, 0, sizeof(ub->params)); ret = -EFAULT; } else { /* clear all we don't support yet */ ub->params.types &= UBLK_PARAM_TYPE_ALL; ret = ublk_validate_params(ub); if (ret) - ub->params.types = 0; + memset(&ub->params, 0, sizeof(ub->params)); } mutex_unlock(&ub->mutex); -- cgit v1.2.3 From 9cc6bac1bebf8310d2950d1411a91479e86d69a1 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Mon, 4 May 2026 23:37:54 +0800 Subject: io_uring/timeout: honour caller's time namespace for IORING_TIMEOUT_ABS io_uring's IORING_OP_TIMEOUT and IORING_OP_LINK_TIMEOUT accept a timespec from the caller via io_parse_user_time(). With IORING_TIMEOUT_ABS, the timestamp is an absolute deadline on the selected clock. The clock is CLOCK_MONOTONIC by default. CLOCK_BOOTTIME and CLOCK_REALTIME are also selectable. A submitter inside a CLONE_NEWTIME time namespace observes CLOCK_MONOTONIC and CLOCK_BOOTTIME shifted by the namespace's offsets relative to the host. Every other ABS timer interface in the kernel converts the caller's absolute time to host view via timens_ktime_to_host() before arming an hrtimer: kernel/time/posix-timers.c -- timer_settime(TIMER_ABSTIME) kernel/time/posix-stubs.c -- clock_nanosleep(TIMER_ABSTIME) kernel/time/alarmtimer.c -- alarm_timer_nsleep(TIMER_ABSTIME) fs/timerfd.c -- timerfd_settime(TFD_TIMER_ABSTIME) io_parse_user_time() does not. As a result, an absolute timeout submitted from within a time namespace is interpreted in host view. That is generally a different point in time. It may already be in the past, causing the timer to fire immediately, or far in the future, causing the timer not to fire when expected. Reproducer: in unshare --user --time, with a -10s monotonic offset, submit IORING_OP_TIMEOUT with IORING_TIMEOUT_ABS and deadline = now + 1s. The CQE is delivered after <1ms instead of the expected ~1s. Apply timens_ktime_to_host() to the parsed time when IORING_TIMEOUT_ABS is set. Split the existing clock id resolver in io_timeout_get_clock() into a flags only helper io_flags_to_clock(), so io_parse_user_time() can resolve the clock without a struct io_timeout_data. timens_ktime_to_host() is a no-op for clocks not affected by time namespaces, e.g. CLOCK_REALTIME. It is also a no-op for callers in the initial time namespace. The fast path is unchanged. SQPOLL is also covered. The SQPOLL kernel thread is created via create_io_thread() with CLONE_THREAD and no CLONE_NEW* flag. copy_namespaces() therefore shares the submitter's nsproxy by reference. Inside the SQPOLL kthread, current->nsproxy->time_ns is the submitter's time_ns. timens_ktime_to_host() resolves correctly. Suggested-by: Pavel Begunkov Suggested-by: Jens Axboe Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260504153755.1293932-2-maoyi.xie@ntu.edu.sg Signed-off-by: Jens Axboe --- io_uring/timeout.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 4cfdfc519770..e2595cae2b07 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -35,6 +36,22 @@ struct io_timeout_rem { bool ltimeout; }; +static clockid_t io_flags_to_clock(unsigned flags) +{ + switch (flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; + } +} + static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) { struct timespec64 ts; @@ -43,7 +60,7 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) *time = ns_to_ktime(arg); if (*time < 0) return -EINVAL; - return 0; + goto out; } if (get_timespec64(&ts, u64_to_user_ptr(arg))) @@ -51,6 +68,9 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) if (ts.tv_sec < 0 || ts.tv_nsec < 0) return -EINVAL; *time = timespec64_to_ktime(ts); +out: + if (flags & IORING_TIMEOUT_ABS) + *time = timens_ktime_to_host(io_flags_to_clock(flags), *time); return 0; } @@ -399,18 +419,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) static clockid_t io_timeout_get_clock(struct io_timeout_data *data) { - switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { - case IORING_TIMEOUT_BOOTTIME: - return CLOCK_BOOTTIME; - case IORING_TIMEOUT_REALTIME: - return CLOCK_REALTIME; - default: - /* can't happen, vetted at prep time */ - WARN_ON_ONCE(1); - fallthrough; - case 0: - return CLOCK_MONOTONIC; - } + return io_flags_to_clock(data->flags); } static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, -- cgit v1.2.3 From 45d2b37a37ab98484693533496395c610a2cab96 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Mon, 4 May 2026 23:37:55 +0800 Subject: io_uring/wait: honour caller's time namespace for IORING_ENTER_ABS_TIMER io_uring_enter() with IORING_ENTER_ABS_TIMER takes an absolute timespec from the caller via ext_arg->ts. It arms an ABS mode hrtimer in __io_cqring_wait_schedule(). The conversion path in io_uring/wait.c parses ext_arg->ts inline rather than going through io_parse_user_time(). It therefore does not pick up the time namespace conversion added by the previous patch. Apply timens_ktime_to_host() to the parsed time on the IORING_ENTER_ABS_TIMER branch. This mirrors the IORING_TIMEOUT_ABS fix in io_parse_user_time(). Use ctx->clockid as the clock id. ctx->clockid is set either at ring creation or via IORING_REGISTER_CLOCK. timens_ktime_to_host() is a no-op for clocks not affected by time namespaces. It is also a no-op for callers in the initial time namespace. The fast path is unchanged. Reproducer: in unshare --user --time, with a -10s monotonic offset, call io_uring_enter with min_complete=1, IORING_ENTER_ABS_TIMER, and ts = now + 1s. The call returns -ETIME after <1ms instead of after the expected ~1s. Suggested-by: Pavel Begunkov Suggested-by: Jens Axboe Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260504153755.1293932-3-maoyi.xie@ntu.edu.sg Signed-off-by: Jens Axboe --- io_uring/wait.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/wait.c b/io_uring/wait.c index 91df86ce0d18..ec01e78a216d 100644 --- a/io_uring/wait.c +++ b/io_uring/wait.c @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -229,7 +230,10 @@ int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, if (ext_arg->ts_set) { iowq.timeout = timespec64_to_ktime(ext_arg->ts); - if (!(flags & IORING_ENTER_ABS_TIMER)) + if (flags & IORING_ENTER_ABS_TIMER) + iowq.timeout = timens_ktime_to_host(ctx->clockid, + iowq.timeout); + else iowq.timeout = ktime_add(iowq.timeout, start_time); } -- cgit v1.2.3 From 5cbb61bf4168859d97c068d88d364f4f1f440325 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 5 May 2026 09:02:13 -0700 Subject: arm64/fpsimd: ptrace: zero target's fpsimd_state, not the tracer's sve_set_common() is the backend for PTRACE_SETREGSET(NT_ARM_SVE) and PTRACE_SETREGSET(NT_ARM_SSVE). Every write in the function operates on the tracee (target) - except a single memset that uses current instead, zeroing the tracer's saved V0-V31 / FPSR / FPCR shadow on every ptrace SETREGSET call. The memset is meant to give the tracee a defined zero register image before the user-supplied payload is copied in (for partial writes, header-only writes, and FPSIMD<->SVE format switches). Aiming it at current both denies the tracee that clean slate and silently corrupts the tracer. The corruption of the tracer's saved FPSIMD state is not always observable. Where the tracer's state is live on a CPU, this may be reused without loading the corrupted state from memory, and will eventually be written back over the corrupted state. Where the tracer's state is saved in SVE_PT_REGS_SVE format, only the FPSR and FPCR are clobbered, and the effective copy of the vectors is in the task's sve_state. Reproducible on an arm64 kernel with SVE: a single-threaded tracer that loads a known pattern into V0-V31, issues PTRACE_SETREGSET(NT_ARM_SVE) on a child, and reads V0-V31 back observes them all zeroed within tens of thousands of iterations when a sibling thread keeps stealing the FPSIMD CPU binding. Fixes: 316283f276eb ("arm64/fpsimd: ptrace: Consistently handle partial writes to NT_ARM_(S)SVE") Cc: Signed-off-by: Breno Leitao Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index ba5eab23fd90..4d08598e2891 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -983,8 +983,8 @@ static int sve_set_common(struct task_struct *target, } /* Always zero V regs, FPSR, and FPCR */ - memset(¤t->thread.uw.fpsimd_state, 0, - sizeof(current->thread.uw.fpsimd_state)); + memset(&target->thread.uw.fpsimd_state, 0, + sizeof(target->thread.uw.fpsimd_state)); /* Registers: FPSIMD-only case */ -- cgit v1.2.3 From bee87cf0f1248c0f20710d7a79df41fe892d9f88 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 5 May 2026 17:11:23 +0100 Subject: ASoC: cs35l56: Don't use devres to unregister component Manually call snd_soc_unregister_component() from cs35l56_remove() instead of using devres cleanup. This ensures that the component is destroyed before cs35l56_remove() starts cleanup of anything the component code could be using. Devres cleanup happens after the driver remove() callback, so if snd_soc_register_component() is used, it will not be destroyed until after cs35l56_remove() has returned. But there is some cleanup that must be done in cs35l56_remove(), or wrapped in a custom devres cleanup handler to ensure correct ordering. The simplest option is to call snd_soc_unregister_component() at the start of cs35l56_remove(). Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Closes: https://sashiko.dev/#/patchset/20260501103002.2843735-1-rf%40opensource.cirrus.com Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260505161124.3621000-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 378017fcea10..e847bb32af2e 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1956,9 +1956,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) goto err; } - ret = devm_snd_soc_register_component(cs35l56->base.dev, - &soc_component_dev_cs35l56, - cs35l56_dai, ARRAY_SIZE(cs35l56_dai)); + ret = snd_soc_register_component(cs35l56->base.dev, + &soc_component_dev_cs35l56, + cs35l56_dai, ARRAY_SIZE(cs35l56_dai)); if (ret < 0) { dev_err_probe(cs35l56->base.dev, ret, "Register codec failed\n"); goto err; @@ -2057,6 +2057,8 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_init, "SND_SOC_CS35L56_CORE"); void cs35l56_remove(struct cs35l56_private *cs35l56) { + snd_soc_unregister_component(cs35l56->base.dev); + cs35l56->base.init_done = false; /* -- cgit v1.2.3 From fd4d83e1437d6395021b21531e187c8a67ac21b0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 5 May 2026 17:11:24 +0100 Subject: ASoC: cs35l56: Destroy workqueue in probe error path The error path in cs35l56_common_probe() should call destroy_workqueue() on the workqueue that was created by cs35l56_dsp_init(). Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260505161124.3621000-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index e847bb32af2e..849d70ca23d6 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1970,6 +1970,9 @@ err: gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 0); regulator_bulk_disable(ARRAY_SIZE(cs35l56->supplies), cs35l56->supplies); + if (cs35l56->dsp_wq) + destroy_workqueue(cs35l56->dsp_wq); + return ret; } EXPORT_SYMBOL_NS_GPL(cs35l56_common_probe, "SND_SOC_CS35L56_CORE"); -- cgit v1.2.3 From 628497e6d925d43efb56e3ffecef0a9d217926b3 Mon Sep 17 00:00:00 2001 From: Fenglin Wu Date: Wed, 6 May 2026 02:28:51 -0700 Subject: regulator: qcom-rpmh: Fix index for pmh0101 ldo16 The wrong index is assigned to pmh0101 ldo16, which results incorrect rpmh resource being used when the regulator device is voted. Fix it. Fixes: 65efe5404d15 ("regulator: rpmh-regulator: Add RPMH regulator support for Glymur") Signed-off-by: Fenglin Wu Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260506-fix_pmh0101_ldo16_index-v1-1-cdc8708b01f4@oss.qualcomm.com Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 6e4cb2871fca..0dcb50bf5c35 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1512,7 +1512,7 @@ static const struct rpmh_vreg_init_data pmh0101_vreg_data[] = { RPMH_VREG("ldo13", LDO, 13, &pmic5_pldo530_mvp150, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", LDO, 14, &pmic5_pldo530_mvp150, "vdd-l2-l13-l14"), RPMH_VREG("ldo15", LDO, 15, &pmic5_nldo530, "vdd-l15"), - RPMH_VREG("ldo16", LDO, 15, &pmic5_pldo530_mvp600, "vdd-l5-l16"), + RPMH_VREG("ldo16", LDO, 16, &pmic5_pldo530_mvp600, "vdd-l5-l16"), RPMH_VREG("ldo17", LDO, 17, &pmic5_pldo515_mv, "vdd-l17"), RPMH_VREG("ldo18", LDO, 18, &pmic5_nldo530, "vdd-l18"), RPMH_VREG("bob1", BOB, 1, &pmic5_bob, "vdd-bob1"), -- cgit v1.2.3 From 4bacec2317527ba04b7172145848f1c206999ea1 Mon Sep 17 00:00:00 2001 From: Jiawei Liu Date: Wed, 6 May 2026 14:24:12 +0800 Subject: spi: ch341: correct company name in MODULE_DESCRIPTION The company name "QiHeng Electronics" is incorrect. The correct legal name is "Nanjing Qinheng Microelectronics Co., Ltd.". Update the module description accordingly. Signed-off-by: Jiawei Liu Link: https://patch.msgid.link/20260506062412.371034-1-ljw@wch.cn Signed-off-by: Mark Brown --- drivers/spi/spi-ch341.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-ch341.c b/drivers/spi/spi-ch341.c index 3eaa8f176f63..6448a44a8b67 100644 --- a/drivers/spi/spi-ch341.c +++ b/drivers/spi/spi-ch341.c @@ -250,5 +250,5 @@ static struct usb_driver ch341a_usb_driver = { module_usb_driver(ch341a_usb_driver); MODULE_AUTHOR("Johannes Thumshirn "); -MODULE_DESCRIPTION("QiHeng Electronics ch341 USB2SPI"); +MODULE_DESCRIPTION("Nanjing Qinheng Microelectronics CH341 USB2SPI driver"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 0c8c88b8eb82a2a41bec5f17c076d6312dc40316 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 5 May 2026 15:42:57 -0700 Subject: ovl: fix verity lazy-load guard broken by fsverity_active() semantic change Commit f77f281b6118 ("fsverity: use a hashtable to find the fsverity_info") made fsverity_active() check whether the inode has the verity flag, rather than whether the inode's fsverity_info is loaded. This broke ovl_ensure_verity_loaded(), which wants to load the fsverity_info for any verity inodes that haven't had it loaded yet. Therefore, to check that the fsverity_info hasn't yet been loaded, use fsverity_get_info(inode) == NULL instead of !fsverity_active(inode). Also, since fsverity_get_info() now involves a hash table lookup, put the more lightweight IS_VERITY() flag check first. Fixes: f77f281b6118 ("fsverity: use a hashtable to find the fsverity_info") Cc: stable@vger.kernel.org Link: https://github.com/bootc-dev/bootc/issues/2174 Signed-off-by: Colin Walters Acked-by: Amir Goldstein Link: https://patch.msgid.link/20260505224257.23213-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/overlayfs/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 7b86a6bac644..b41f4788e4f0 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1354,7 +1354,7 @@ int ovl_ensure_verity_loaded(const struct path *datapath) struct inode *inode = d_inode(datapath->dentry); struct file *filp; - if (!fsverity_active(inode) && IS_VERITY(inode)) { + if (IS_VERITY(inode) && fsverity_get_info(inode) == NULL) { /* * If this inode was not yet opened, the verity info hasn't been * loaded yet, so we need to do that here to force it into memory. -- cgit v1.2.3 From fdf4eb632683bfc2840acebe62716cb468d43e10 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 26 Apr 2026 17:51:07 +0200 Subject: selftests/rseq: Validate legacy behavior The RSEQ legacy mode behavior requires that the ID fields in the rseq region are unconditionally updated on every context switch and before signal delivery even if not required by the ABI specification. To ensure that this behavior is preserved for legacy users in the future, add a test which validates that with a sleep() and a signal sent to self. Provide a run script which prevents GLIBC from registering a RSEQ region, so that the test can register it's own legacy sized region. Fixes: 566d8015f7ee ("rseq: Avoid CPU/MM CID updates when no event pending") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.764705536%40kernel.org Cc: stable@vger.kernel.org --- tools/testing/selftests/rseq/Makefile | 5 +- tools/testing/selftests/rseq/legacy_check.c | 65 ++++++++++++++++++++++++ tools/testing/selftests/rseq/run_legacy_check.sh | 4 ++ 3 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/rseq/legacy_check.c create mode 100755 tools/testing/selftests/rseq/run_legacy_check.sh diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 0d1947c0d623..0293a2f17f51 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -22,9 +22,10 @@ TEST_GEN_PROGS_EXTENDED = librseq.so \ param_test_compare_twice \ param_test_mm_cid \ param_test_mm_cid_compare_twice \ - syscall_errors_test + syscall_errors_test \ + legacy_check -TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh +TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh TEST_FILES := settings diff --git a/tools/testing/selftests/rseq/legacy_check.c b/tools/testing/selftests/rseq/legacy_check.c new file mode 100644 index 000000000000..3f7de4e28303 --- /dev/null +++ b/tools/testing/selftests/rseq/legacy_check.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include + +#include "rseq.h" + +#include "../kselftest_harness.h" + +FIXTURE(legacy) +{ +}; + +static int cpu_id_in_sigfn = -1; + +static void sigfn(int sig) +{ + struct rseq_abi *rs = rseq_get_abi(); + + cpu_id_in_sigfn = rs->cpu_id_start; +} + +FIXTURE_SETUP(legacy) +{ + int res = __rseq_register_current_thread(true, true); + + switch (res) { + case -ENOSYS: + SKIP(return, "RSEQ not enabled\n"); + case -EBUSY: + SKIP(return, "GLIBC owns RSEQ. Disable GLIBC RSEQ registration\n"); + default: + ASSERT_EQ(res, 0); + } + + ASSERT_NE(signal(SIGUSR1, sigfn), SIG_ERR); +} + +FIXTURE_TEARDOWN(legacy) +{ +} + +TEST_F(legacy, legacy_test) +{ + struct rseq_abi *rs = rseq_get_abi(); + + ASSERT_NE(rs, NULL); + + /* Overwrite rs::cpu_id_start */ + rs->cpu_id_start = -1; + sleep(1); + ASSERT_NE(rs->cpu_id_start, -1); + + rs->cpu_id_start = -1; + ASSERT_EQ(raise(SIGUSR1), 0); + ASSERT_NE(rs->cpu_id_start, -1); + ASSERT_NE(cpu_id_in_sigfn, -1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rseq/run_legacy_check.sh b/tools/testing/selftests/rseq/run_legacy_check.sh new file mode 100755 index 000000000000..5577b46ea092 --- /dev/null +++ b/tools/testing/selftests/rseq/run_legacy_check.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./legacy_check -- cgit v1.2.3 From 82f572449cfe75f12ea985986da60e11f308f77d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 26 Apr 2026 16:21:02 +0200 Subject: rseq: Implement read only ABI enforcement for optimized RSEQ V2 mode The optimized RSEQ V2 mode requires that user space adheres to the ABI specification and does not modify the read-only fields cpu_id_start, cpu_id, node_id and mm_cid behind the kernel's back. While the kernel does not rely on these fields, the adherence to this is a fundamental prerequisite to allow multiple entities, e.g. libraries, in an application to utilize the full potential of RSEQ without stepping on each other toes. Validate this adherence on every update of these fields. If the kernel detects that user space modified the fields, the application is force terminated. Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.845230956%40kernel.org Cc: stable@vger.kernel.org --- include/linux/rseq_entry.h | 83 +++++++++++++++++----------------------------- include/linux/rseq_types.h | 4 ++- kernel/rseq.c | 5 ++- 3 files changed, 35 insertions(+), 57 deletions(-) diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index 934db41ec782..2d0295df5107 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -248,7 +248,6 @@ static __always_inline bool rseq_grant_slice_extension(unsigned long ti_work, un #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); -bool rseq_debug_validate_ids(struct task_struct *t); static __always_inline void rseq_note_user_irq_entry(void) { @@ -368,43 +367,6 @@ efault: return false; } -/* - * On debug kernels validate that user space did not mess with it if the - * debug branch is enabled. - */ -bool rseq_debug_validate_ids(struct task_struct *t) -{ - struct rseq __user *rseq = t->rseq.usrptr; - u32 cpu_id, uval, node_id; - - /* - * On the first exit after registering the rseq region CPU ID is - * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0! - */ - node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ? - cpu_to_node(t->rseq.ids.cpu_id) : 0; - - scoped_user_read_access(rseq, efault) { - unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); - if (cpu_id != t->rseq.ids.cpu_id) - goto die; - unsafe_get_user(uval, &rseq->cpu_id, efault); - if (uval != cpu_id) - goto die; - unsafe_get_user(uval, &rseq->node_id, efault); - if (uval != node_id) - goto die; - unsafe_get_user(uval, &rseq->mm_cid, efault); - if (uval != t->rseq.ids.mm_cid) - goto die; - } - return true; -die: - t->rseq.event.fatal = true; -efault: - return false; -} - #endif /* RSEQ_BUILD_SLOW_PATH */ /* @@ -514,20 +476,32 @@ efault: * faults in task context are fatal too. */ static rseq_inline -bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, - u32 node_id, u64 *csaddr) +bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, u64 *csaddr) { struct rseq __user *rseq = t->rseq.usrptr; - if (static_branch_unlikely(&rseq_debug_enabled)) { - if (!rseq_debug_validate_ids(t)) - return false; - } - scoped_user_rw_access(rseq, efault) { + /* Validate the R/O fields for debug and optimized mode */ + if (static_branch_unlikely(&rseq_debug_enabled) || rseq_v2(t)) { + u32 cpu_id, uval; + + unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault); + if (cpu_id != t->rseq.ids.cpu_id) + goto die; + unsafe_get_user(uval, &rseq->cpu_id, efault); + if (uval != cpu_id) + goto die; + unsafe_get_user(uval, &rseq->node_id, efault); + if (uval != t->rseq.ids.node_id) + goto die; + unsafe_get_user(uval, &rseq->mm_cid, efault); + if (uval != t->rseq.ids.mm_cid) + goto die; + } + unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault); unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault); - unsafe_put_user(node_id, &rseq->node_id, efault); + unsafe_put_user(ids->node_id, &rseq->node_id, efault); unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); @@ -539,10 +513,13 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, rseq_slice_clear_grant(t); /* Cache the new values */ - t->rseq.ids.cpu_cid = ids->cpu_cid; + t->rseq.ids = *ids; rseq_stat_inc(rseq_stats.ids); rseq_trace_update(t, ids); return true; + +die: + t->rseq.event.fatal = true; efault: return false; } @@ -552,11 +529,11 @@ efault: * is in a critical section. */ static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs, - struct rseq_ids *ids, u32 node_id) + struct rseq_ids *ids) { u64 csaddr; - if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr)) + if (!rseq_set_ids_get_csaddr(t, ids, &csaddr)) return false; /* @@ -659,12 +636,12 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t } struct rseq_ids ids = { - .cpu_id = task_cpu(t), - .mm_cid = task_mm_cid(t), + .cpu_id = task_cpu(t), + .mm_cid = task_mm_cid(t), + .node_id = cpu_to_node(ids.cpu_id), }; - u32 node_id = cpu_to_node(ids.cpu_id); - return rseq_update_usr(t, regs, &ids, node_id); + return rseq_update_usr(t, regs, &ids); efault: return false; } diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index a469c1870849..85739a63e85e 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -66,8 +66,9 @@ struct rseq_event { * compiler emit a single compare on 64-bit * @cpu_id: The CPU ID which was written last to user space * @mm_cid: The MM CID which was written last to user space + * @node_id: The node ID which was written last to user space * - * @cpu_id and @mm_cid are updated when the data is written to user space. + * @cpu_id, @mm_cid and @node_id are updated when the data is written to user space. */ struct rseq_ids { union { @@ -77,6 +78,7 @@ struct rseq_ids { u32 mm_cid; }; }; + u32 node_id; }; /** diff --git a/kernel/rseq.c b/kernel/rseq.c index aa25753ea135..101612027f6a 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -263,7 +263,6 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs) }; struct task_struct *t = current; struct rseq_ids ids; - u32 node_id; bool event; if (unlikely(t->flags & PF_EXITING)) @@ -299,9 +298,9 @@ static void rseq_slowpath_update_usr(struct pt_regs *regs) if (!event) return; - node_id = cpu_to_node(ids.cpu_id); + ids.node_id = cpu_to_node(ids.cpu_id); - if (unlikely(!rseq_update_usr(t, regs, &ids, node_id))) { + if (unlikely(!rseq_update_usr(t, regs, &ids))) { /* * Clear the errors just in case this might survive magically, but * leave the rest intact. -- cgit v1.2.3 From 99428157dcf32fdac97355aa1cc1364dbc9e073c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 26 Apr 2026 10:01:56 +0200 Subject: rseq: Reenable performance optimizations conditionally Due to the incompatibility with TCMalloc the RSEQ optimizations and extended features (time slice extensions) have been disabled and made run-time conditional. The original RSEQ implementation, which TCMalloc depends on, registers a 32 byte region (ORIG_RSEG_SIZE). This region has a 32 byte alignment requirement. The extension safe newer variant exposes the kernel RSEQ feature size via getauxval(AT_RSEQ_FEATURE_SIZE) and the alignment requirement via getauxval(AT_RSEQ_ALIGN). The alignment requirement is that the registered RSEQ region is aligned to the next power of two of the feature size. The kernel currently has a feature size of 33 bytes, which means the alignment requirement is 64 bytes. The TCMalloc RSEQ region is embedded into a cache line aligned data structure starting at offset 32 bytes so that bytes 28-31 and the cpu_id_start field at bytes 32-35 form a 64-bit little endian pointer with the top-most bit (63 set) to check whether the kernel has overwritten cpu_id_start with an actual CPU id value, which is guaranteed to not have the top most bit set. As this is part of their performance tuned magic, it's a pretty safe assumption, that TCMalloc won't use a larger RSEQ size. This allows the kernel to declare that registrations with a size greater than the original size of 32 bytes, which is the cases since time slice extensions got introduced, as RSEQ ABI v2 with the following differences to the original behaviour: 1) Unconditional updates of the user read only fields (CPU, node, MMCID) are removed. Those fields are only updated on registration, task migration and MMCID changes. 2) Unconditional evaluation of the criticial section pointer is removed. It's only evaluated when user space was interrupted and was scheduled out or before delivering a signal in the interrupted context. 3) The read/only requirement of the ID fields is enforced. When the kernel detects that userspace manipulated the fields, the process is terminated. This ensures that multiple entities (libraries) can utilize RSEQ without interfering. 4) Todays extended RSEQ feature (time slice extensions) and future extensions are only enabled in the v2 enabled mode. Registrations with the original size of 32 bytes operate in backwards compatible legacy mode without performance improvements and extended features. Unfortunately that also affects users of older GLIBC versions which register the original size of 32 bytes and do not evaluate the kernel required size in the auxiliary vector AT_RSEQ_FEATURE_SIZE. That's the result of the lack of enforcement in the original implementation and the unwillingness of a single entity to cooperate with the larger ecosystem for many years. Implement the required registration changes by restructuring the spaghetti code and adding the size/version check. Also add documentation about the differences of legacy and optimized RSEQ V2 mode. Thanks to Mathieu for pointing out the ORIG_RSEQ_SIZE constraints! Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224427.927160119%40kernel.org Cc: stable@vger.kernel.org --- Documentation/userspace-api/rseq.rst | 94 ++++++++++++++++++++++- kernel/rseq.c | 144 +++++++++++++++++++++-------------- 2 files changed, 178 insertions(+), 60 deletions(-) diff --git a/Documentation/userspace-api/rseq.rst b/Documentation/userspace-api/rseq.rst index 3cd27a3c7c7e..8549a6c61531 100644 --- a/Documentation/userspace-api/rseq.rst +++ b/Documentation/userspace-api/rseq.rst @@ -24,6 +24,97 @@ Quick access to CPU number, node ID Allows to implement per CPU data efficiently. Documentation is in code and selftests. :( +Optimized RSEQ V2 +----------------- + +On architectures which utilize the generic entry code and generic TIF bits +the kernel supports runtime optimizations for RSEQ, which also enable +enhanced features like scheduler time slice extensions. + +To enable them a task has to register the RSEQ region with at least the +length advertised by getauxval(AT_RSEQ_FEATURE_SIZE). + +If existing binaries register with RSEQ_ORIG_SIZE (32 bytes), the kernel +keeps the legacy low performance mode enabled to fulfil the expectations +of existing users regarding the original RSEQ implementation behaviour. + +The following table documents the ABI and behavioral guarantees of the +legacy and the optimized V2 mode. + +.. list-table:: RSEQ modes + :header-rows: 1 + + * - Nr + - What + + - Legacy + - Optimized V2 + + * - 1 + - The cpu_id_start, cpu_id, node_id and mm_cid fields (User mode read + only) + .. Legacy + - Updated by the kernel unconditionally after each context switch and + before signal delivery + .. Optimized V2 + - Updated by the kernel if and only if they change, i.e. if the task + is migrated or mm_cid changes + + * - 2 + - The rseq_cs critical section field + .. Legacy + - Evaluated and handled unconditionally after each context switch and + before signal delivery + .. Optimized V2 + - Evaluated and handled conditionally only when user space was + interrupted and was scheduled out or before delivering a signal in + the interrupted context. + + * - 3 + - Read only fields + .. Legacy + - No strict enforcement except in debug mode + .. Optimized V2 + - Strict enforcement + + * - 4 + - membarrier(...RSEQ) + .. Legacy + - All running threads of the process are interrupted and the ID fields + are rewritten and eventually active critical sections are aborted + before they return to user space. All threads which are scheduled + out whether voluntary or not are covered by #1/#2 above. + .. Optimized V2 + - All running threads of the process are interrupted and eventually + active critical sections are aborted before these threads return to + user space. The ID fields are only updated if changed as a + consequence of the interrupt. All threads which are scheduled out + whether voluntary or not are covered by #1/#2 above. + + * - 5 + - Time slice extensions + .. Legacy + - Not supported + .. Optimized V2 + - Supported + +The legacy mode is obviously less performant as it does unconditional +updates and critical section checks even if not strictly required by the +ABI contract. That can't be changed anymore as some users depend on that +observed behavior, which in turn enables them to violate the ABI and +overwrite the cpu_id_start field for their own purposes. This is obviously +discouraged as it renders RSEQ incompatible with the intended usage and +breaks the expectation of other libraries in the same application. + +The ABI compliant optimized v2 mode, which respects the read only fields, +does not require unconditional updates and therefore is way more +performant. The kernel validates the read only fields for compliance. If +user space modifies them, the process is killed. Compliant usage allows +multiple libraries in the same application to benefit from the RSEQ +functionality without disturbing each other. The ABI compliant optimized v2 +mode also enables extended RSEQ features like time slice extensions. + + Scheduler time slice extensions ------------------------------- @@ -37,7 +128,8 @@ The prerequisites for this functionality are: * Enabled at boot time (default is enabled) - * A rseq userspace pointer has been registered for the thread + * A rseq userspace pointer has been registered for the thread in + optimized V2 mode The thread has to enable the functionality via prctl(2):: diff --git a/kernel/rseq.c b/kernel/rseq.c index 101612027f6a..e75e3a5e312c 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -412,70 +412,23 @@ efault: /* The original rseq structure size (including padding) is 32 bytes. */ #define ORIG_RSEQ_SIZE 32 -/* - * sys_rseq - setup restartable sequences for caller thread. - */ -SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) +static long rseq_register(struct rseq __user * rseq, u32 rseq_len, int flags, u32 sig) { u32 rseqfl = 0; u8 version = 1; - if (flags & RSEQ_FLAG_UNREGISTER) { - if (flags & ~RSEQ_FLAG_UNREGISTER) - return -EINVAL; - /* Unregister rseq for current thread. */ - if (current->rseq.usrptr != rseq || !current->rseq.usrptr) - return -EINVAL; - if (rseq_len != current->rseq.len) - return -EINVAL; - if (current->rseq.sig != sig) - return -EPERM; - if (!rseq_reset_ids()) - return -EFAULT; - rseq_reset(current); - return 0; - } - - if (unlikely(flags & ~(RSEQ_FLAG_SLICE_EXT_DEFAULT_ON))) - return -EINVAL; - - if (current->rseq.usrptr) { - /* - * If rseq is already registered, check whether - * the provided address differs from the prior - * one. - */ - if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) - return -EINVAL; - if (current->rseq.sig != sig) - return -EPERM; - /* Already registered. */ - return -EBUSY; - } - - /* - * If there was no rseq previously registered, ensure the provided rseq - * is properly aligned, as communcated to user-space through the ELF - * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq - * size, the required alignment is the original struct rseq alignment. - * - * The rseq_len is required to be greater or equal to the original rseq - * size. In order to be valid, rseq_len is either the original rseq size, - * or large enough to contain all supported fields, as communicated to - * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. - */ - if (rseq_len < ORIG_RSEQ_SIZE || - (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || - (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) || - rseq_len < offsetof(struct rseq, end)))) - return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; /* - * The version check effectivly disables time slice extensions until the - * RSEQ ABI V2 registration are implemented. + * Architectures, which use the generic IRQ entry code (at least) enable + * registrations with a size greater than the original v1 fixed sized + * @rseq_len, which has been validated already to utilize the optimized + * v2 ABI mode which also enables extended RSEQ features beyond MMCID. */ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && rseq_len > ORIG_RSEQ_SIZE) + version = 2; + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION) && version > 1) { if (rseq_slice_extension_enabled()) { rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; @@ -523,11 +476,10 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 #endif /* - * If rseq was previously inactive, and has just been - * registered, ensure the cpu_id_start and cpu_id fields - * are updated before returning to user-space. + * Ensure the cpu_id_start and cpu_id fields are updated before + * returning to user-space. */ - current->rseq.event.has_rseq = true; + current->rseq.event.has_rseq = version; rseq_force_update(); return 0; @@ -535,6 +487,80 @@ efault: return -EFAULT; } +static long rseq_unregister(struct rseq __user * rseq, u32 rseq_len, int flags, u32 sig) +{ + if (flags & ~RSEQ_FLAG_UNREGISTER) + return -EINVAL; + if (current->rseq.usrptr != rseq || !current->rseq.usrptr) + return -EINVAL; + if (rseq_len != current->rseq.len) + return -EINVAL; + if (current->rseq.sig != sig) + return -EPERM; + if (!rseq_reset_ids()) + return -EFAULT; + rseq_reset(current); + return 0; +} + +static long rseq_reregister(struct rseq __user * rseq, u32 rseq_len, u32 sig) +{ + /* + * If rseq is already registered, check whether the provided address + * differs from the prior one. + */ + if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) + return -EINVAL; + if (current->rseq.sig != sig) + return -EPERM; + /* Already registered. */ + return -EBUSY; +} + +static bool rseq_length_valid(struct rseq __user *rseq, unsigned int rseq_len) +{ + /* + * Ensure the provided rseq is properly aligned, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_ALIGN. If + * rseq_len is the original rseq size, the required alignment is the + * original struct rseq alignment. + * + * In order to be valid, rseq_len is either the original rseq size, or + * large enough to contain all supported fields, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. + */ + if (rseq_len < ORIG_RSEQ_SIZE) + return false; + + if (rseq_len == ORIG_RSEQ_SIZE) + return IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE); + + return IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) && + rseq_len >= offsetof(struct rseq, end); +} + +#define RSEQ_FLAGS_SUPPORTED (RSEQ_FLAG_SLICE_EXT_DEFAULT_ON) + +/* + * sys_rseq - Register or unregister restartable sequences for the caller thread. + */ +SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) +{ + if (flags & RSEQ_FLAG_UNREGISTER) + return rseq_unregister(rseq, rseq_len, flags, sig); + + if (unlikely(flags & ~RSEQ_FLAGS_SUPPORTED)) + return -EINVAL; + + if (current->rseq.usrptr) + return rseq_reregister(rseq, rseq_len, sig); + + if (!rseq_length_valid(rseq, rseq_len)) + return -EINVAL; + + return rseq_register(rseq, rseq_len, flags, sig); +} + #ifdef CONFIG_RSEQ_SLICE_EXTENSION struct slice_timer { struct hrtimer timer; -- cgit v1.2.3 From e744060076871eebc2647b24420b550ff44b2b65 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Apr 2026 14:48:23 +0200 Subject: selftests/rseq: Expand for optimized RSEQ ABI v2 Update the selftests so they are executed for legacy (32 bytes RSEQ region) and optimized RSEQ ABI v2 mode. Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Link: https://patch.msgid.link/20260428224428.009121296%40kernel.org Cc: stable@vger.kernel.org --- tools/testing/selftests/rseq/Makefile | 11 ++++-- tools/testing/selftests/rseq/check_optimized.c | 17 ++++++++++ tools/testing/selftests/rseq/param_test.c | 25 +++++++++----- tools/testing/selftests/rseq/run_param_test.sh | 39 ++++++++++++++++++++++ tools/testing/selftests/rseq/run_timeslice_test.sh | 14 ++++++++ tools/testing/selftests/rseq/slice_test.c | 2 +- 6 files changed, 95 insertions(+), 13 deletions(-) create mode 100644 tools/testing/selftests/rseq/check_optimized.c create mode 100755 tools/testing/selftests/rseq/run_timeslice_test.sh diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 0293a2f17f51..50d69e22ee7a 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -15,7 +15,7 @@ LDLIBS += -lpthread -ldl OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test \ - param_test_benchmark param_test_mm_cid_benchmark slice_test + param_test_benchmark param_test_mm_cid_benchmark TEST_GEN_PROGS_EXTENDED = librseq.so \ param_test \ @@ -23,9 +23,11 @@ TEST_GEN_PROGS_EXTENDED = librseq.so \ param_test_mm_cid \ param_test_mm_cid_compare_twice \ syscall_errors_test \ - legacy_check + legacy_check \ + slice_test \ + check_optimized -TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh +TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh run_legacy_check.sh run_timeslice_test.sh TEST_FILES := settings @@ -66,3 +68,6 @@ $(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) $(OUTPUT)/slice_test: slice_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ + +$(OUTPUT)/check_optimized: check_optimized.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h + $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ diff --git a/tools/testing/selftests/rseq/check_optimized.c b/tools/testing/selftests/rseq/check_optimized.c new file mode 100644 index 000000000000..a13e3f2c8fc6 --- /dev/null +++ b/tools/testing/selftests/rseq/check_optimized.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include "rseq.h" + +int main(int argc, char **argv) +{ + if (__rseq_register_current_thread(true, false)) + return -1; + return 0; +} diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 05d03e679e06..e1e98dbabe4b 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -38,7 +38,7 @@ static int opt_modulo, verbose; static int opt_yield, opt_signal, opt_sleep, opt_disable_rseq, opt_threads = 200, opt_disable_mod = 0, opt_test = 's'; - +static bool opt_rseq_legacy; static long long opt_reps = 5000; static __thread __attribute__((tls_model("initial-exec"))) @@ -281,9 +281,12 @@ unsigned int yield_mod_cnt, nr_abort; } \ } +#define rseq_no_glibc true + #else #define printf_verbose(fmt, ...) +#define rseq_no_glibc false #endif /* BENCHMARK */ @@ -481,7 +484,7 @@ void *test_percpu_spinlock_thread(void *arg) long long i, reps; if (!opt_disable_rseq && thread_data->reg && - rseq_register_current_thread()) + __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { @@ -558,7 +561,7 @@ void *test_percpu_inc_thread(void *arg) long long i, reps; if (!opt_disable_rseq && thread_data->reg && - rseq_register_current_thread()) + __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = thread_data->reps; for (i = 0; i < reps; i++) { @@ -712,7 +715,7 @@ void *test_percpu_list_thread(void *arg) long long i, reps; struct percpu_list *list = (struct percpu_list *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -895,7 +898,7 @@ void *test_percpu_buffer_thread(void *arg) long long i, reps; struct percpu_buffer *buffer = (struct percpu_buffer *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -1105,7 +1108,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg) long long i, reps; struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) abort(); reps = opt_reps; @@ -1258,7 +1261,7 @@ void *test_membarrier_worker_thread(void *arg) const int iters = opt_reps; int i; - if (rseq_register_current_thread()) { + if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); @@ -1323,7 +1326,7 @@ void *test_membarrier_manager_thread(void *arg) intptr_t expect_a = 0, expect_b = 0; int cpu_a = 0, cpu_b = 0; - if (rseq_register_current_thread()) { + if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); @@ -1475,6 +1478,7 @@ static void show_usage(int argc, char **argv) printf(" [-D M] Disable rseq for each M threads\n"); printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); + printf(" [-O] Test with optimized RSEQ\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); printf("\n"); @@ -1602,6 +1606,9 @@ int main(int argc, char **argv) case 'M': opt_mo = RSEQ_MO_RELEASE; break; + case 'L': + opt_rseq_legacy = true; + break; default: show_usage(argc, argv); goto error; @@ -1618,7 +1625,7 @@ int main(int argc, char **argv) if (set_signal_handler()) goto error; - if (!opt_disable_rseq && rseq_register_current_thread()) + if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) goto error; if (!opt_disable_rseq && !rseq_validate_cpu_id()) { fprintf(stderr, "Error: cpu id getter unavailable\n"); diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index 8d31426ab41f..69a3fa049929 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -34,6 +34,11 @@ REPS=1000 SLOW_REPS=100 NR_THREADS=$((6*${NR_CPUS})) +# Prevent GLIBC from registering RSEQ so the selftest can run in legacy and +# performance optimized mode. +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" +export GLIBC_TUNABLES + function do_tests() { local i=0 @@ -103,6 +108,40 @@ function inject_blocking() NR_LOOPS= } +echo "Testing in legacy RSEQ mode" +echo "Yield injection (25%)" +inject_blocking -m 4 -y -L + +echo "Yield injection (50%)" +inject_blocking -m 2 -y -L + +echo "Yield injection (100%)" +inject_blocking -m 1 -y -L + +echo "Kill injection (25%)" +inject_blocking -m 4 -k -L + +echo "Kill injection (50%)" +inject_blocking -m 2 -k -L + +echo "Kill injection (100%)" +inject_blocking -m 1 -k -L + +echo "Sleep injection (1ms, 25%)" +inject_blocking -m 4 -s 1 -L + +echo "Sleep injection (1ms, 50%)" +inject_blocking -m 2 -s 1 -L + +echo "Sleep injection (1ms, 100%)" +inject_blocking -m 1 -s 1 -L + +./check_optimized || { + echo "Skipping optimized RSEQ mode test. Not supported"; + exit 0 +} + +echo "Testing in optimized RSEQ mode" echo "Yield injection (25%)" inject_blocking -m 4 -y diff --git a/tools/testing/selftests/rseq/run_timeslice_test.sh b/tools/testing/selftests/rseq/run_timeslice_test.sh new file mode 100755 index 000000000000..551ebed71ec6 --- /dev/null +++ b/tools/testing/selftests/rseq/run_timeslice_test.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ + +# Prevent GLIBC from registering RSEQ so the selftest can run in legacy +# and performance optimized mode. +GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" +export GLIBC_TUNABLES + +./check_optimized || { + echo "Skipping optimized RSEQ mode test. Not supported"; + exit 0 +} + +./slice_test diff --git a/tools/testing/selftests/rseq/slice_test.c b/tools/testing/selftests/rseq/slice_test.c index 77e668ff74d7..e402d4440bc2 100644 --- a/tools/testing/selftests/rseq/slice_test.c +++ b/tools/testing/selftests/rseq/slice_test.c @@ -124,7 +124,7 @@ FIXTURE_SETUP(slice_ext) { cpu_set_t affinity; - if (rseq_register_current_thread()) + if (__rseq_register_current_thread(true, false)) SKIP(return, "RSEQ not supported\n"); if (prctl(PR_RSEQ_SLICE_EXTENSION, PR_RSEQ_SLICE_EXTENSION_SET, -- cgit v1.2.3 From b6eee96843e8d088200f01b035da98e72067c5fe Mon Sep 17 00:00:00 2001 From: Zhan Xusheng Date: Fri, 1 May 2026 12:40:06 +0200 Subject: sched/fair: Fix overflow in vruntime_eligible() Zhan Xusheng reported running into sporadic a s64 mult overflow in vruntime_eligible(). When constructing a worst case scenario: If you have cgroups, then you can have an entity of weight 2 (per calc_group_shares()), and its vlag should then be bounded by: (slice+TICK_NSEC) * NICE_0_LOAD, which is around 44 bits as per the comment on entity_key(). The other extreme is 100*NICE_0_LOAD, thus you get: {key, weight}[] := { puny: { (slice + TICK_NSEC) * NICE_0_LOAD, 2 }, max: { 0, 100*NICE_0_LOAD }, } The avg_vruntime() would end up being very close to 0 (which is zero_vruntime), so no real help making that more accurate. vruntime_eligible(puny) ends up with: avg = 2 * puny.key (+ 0) load = 2 + 100 * NICE_0_LOAD avg >= puny.key * load And that is: (slice + TICK_NSEC) * NICE_0_LOAD * NICE_0_LOAD * 100, which will overflow s64. Zhan suggested using __builtin_mul_overflow(), however after staring at compiler output for various architectures using godbolt, it seems that using an __int128 multiplication often results in better code. Specifically, a number of architectures already compute the __int128 product to determine the overflow. Eg. arm64 already has the 'smulh' instruction used. By explicitly doing an __int128 multiply, it will emit the 'mul; smulh' pattern, which modern cores can fuse (armv8-a clang-22.1.0). x86_64 has less branches (no OF handling). Since Linux has ARCH_SUPPORTS_INT128 to gate __int128 usage, also provide the __builtin_mul_overflow() variant as a fallback. [peterz: Changelog and __int128 bits] Fixes: 556146ce5e94 ("sched/fair: Avoid overflow in enqueue_entity()") Reported-by: Zhan Xusheng Closes: https://patch.msgid.link/20260415145742.10359-1-zhanxusheng%40xiaomi.com Signed-off-by: Zhan Xusheng Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260505103155.GN3102924%40noisy.programming.kicks-ass.net --- kernel/sched/fair.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 728965851842..b91c8b294229 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -882,11 +882,11 @@ bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) * * lag_i >= 0 -> V >= v_i * - * \Sum (v_i - v)*w_i - * V = ------------------ + v + * \Sum (v_i - v0)*w_i + * V = ------------------- + v0 * \Sum w_i * - * lag_i >= 0 -> \Sum (v_i - v)*w_i >= (v_i - v)*(\Sum w_i) + * lag_i >= 0 -> \Sum (v_i - v0)*w_i >= (v_i - v0)*(\Sum w_i) * * Note: using 'avg_vruntime() > se->vruntime' is inaccurate due * to the loss in precision caused by the division. @@ -894,7 +894,7 @@ bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) { struct sched_entity *curr = cfs_rq->curr; - s64 avg = cfs_rq->sum_w_vruntime; + s64 key, avg = cfs_rq->sum_w_vruntime; long load = cfs_rq->sum_weight; if (curr && curr->on_rq) { @@ -904,7 +904,36 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) load += weight; } - return avg >= vruntime_op(vruntime, "-", cfs_rq->zero_vruntime) * load; + key = vruntime_op(vruntime, "-", cfs_rq->zero_vruntime); + + /* + * The worst case term for @key includes 'NSEC_TICK * NICE_0_LOAD' + * and @load obviously includes NICE_0_LOAD. NSEC_TICK is around 24 + * bits, while NICE_0_LOAD is 20 on 64bit and 10 otherwise. + * + * This gives that on 64bit the product will be at least 64bit which + * overflows s64, while on 32bit it will only be 44bits and should fit + * comfortably. + */ +#ifdef CONFIG_64BIT +#ifdef CONFIG_ARCH_SUPPORTS_INT128 + /* This often results in simpler code than __builtin_mul_overflow(). */ + return avg >= (__int128)key * load; +#else + s64 rhs; + /* + * On overflow, the sign of key tells us the correct answer: a large + * positive key means vruntime >> V, so not eligible; a large negative + * key means vruntime << V, so eligible. + */ + if (check_mul_overflow(key, load, &rhs)) + return key <= 0; + + return avg >= rhs; +#endif +#else /* 32bit */ + return avg >= key * load; +#endif } int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) -- cgit v1.2.3 From 9f6d929ee2c6f0266edb564bcd2bd47fd6e884a8 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Sun, 3 May 2026 12:45:03 +0200 Subject: sched/fair: Fix wakeup_preempt_fair() for not waking up task Make sure to only call pick_next_entity() on an non-empty cfs_rq. The assumption that p is always enqueued and not delayed, is only true for wakeup. If p was moved while delayed, pick_next_entity() will dequeue it and the cfs might become empty. Test if there are still queued tasks before trying again to determine if p could be the next one to be picked. There are at least 2 cases: When cfs becomes idle, it tries to pull tasks but if those pulled tasks are delayed, they will be dequeued when attached to cfs. attach_tasks() -> attach_task() -> wakeup_preempt(rq, p, 0); A misfit task running on cfs A triggers a load balance to be pulled on a better cpu, the load balance on cfs B starts an active load balance to pulled the running misfit task. If there is a delayed dequeue task on cfs A, it can be pulled instead of the previously running misfit task. attach_one_task() -> attach_task() -> wakeup_preempt(rq, p, 0); Fixes: ac8e69e69363 ("sched/fair: Fix wakeup_preempt_fair() vs delayed dequeue") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260503104503.1732682-1-vincent.guittot@linaro.org --- kernel/sched/fair.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b91c8b294229..3ebec186f982 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9174,9 +9174,10 @@ pick: /* * Because p is enqueued, nse being null can only mean that we - * dequeued a delayed task. + * dequeued a delayed task. If there are still entities queued in + * cfs, check if the next one will be p. */ - if (!nse) + if (!nse && cfs_rq->nr_queued) goto pick; if (sched_feat(RUN_TO_PARITY)) -- cgit v1.2.3 From 1f7305d87aa23db2579df222eba504a333c2c978 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 5 May 2026 17:52:03 +0100 Subject: KVM: arm64: Work around C1-Pro erratum 4193714 for protected guests C1-Pro cores with SME have an erratum where TLBI+DSB does not complete all outstanding SME accesses. Instead a DSB needs to be executed on the affected CPUs. The implication is that pages cannot be unmapped from the host Stage 2 and then provided to a protected guest or to the hypervisor. Host SME accesses may still complete after this point. This erratum breaks pKVM's guarantees, and the workaround is hard to implement as EL2 and EL1 share a security state meaning EL1 can mask IPIs sent by EL2, leading to interrupt blackouts. Instead, do this in EL3. This has the advantage of a separate security state, meaning lower EL cannot mask the IPI. It is also simpler for EL3 to know about CPUs that are off or in PSCI's CPU_SUSPEND. Add the needed hook to host_stage2_set_owner_metadata_locked(). This covers the cases where the host loses access to a page: __pkvm_host_donate_guest() __pkvm_guest_unshare_host() host_stage2_set_owner_locked() when owner_id == PKVM_ID_HYP Since pKVM relies on the firmware call for correctness, check for the firmware counterpart during protected KVM initialisation and fail the pKVM initialisation if it is missing. Signed-off-by: James Morse Co-developed-by: Catalin Marinas Signed-off-by: Catalin Marinas Cc: Mark Rutland Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Cc: Vincent Donnefort Cc: Lorenzo Pieralisi Cc: Sudeep Holla Link: https://patch.msgid.link/20260505165205.2690919-1-catalin.marinas@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 21 +++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 23 ++++++++++++++++++++++- include/linux/arm-smccc.h | 6 ++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 8bb2c7422cc8..34c9950884d5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -4,6 +4,7 @@ * Author: Christoffer Dall */ +#include #include #include #include @@ -2638,6 +2639,22 @@ static int init_pkvm_host_sve_state(void) return 0; } +static int pkvm_check_sme_dvmsync_fw_call(void) +{ + struct arm_smccc_res res; + + if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714)) + return 0; + + arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res); + if (res.a0) { + kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n"); + return -ENODEV; + } + + return 0; +} + /* * Finalizes the initialization of hyp mode, once everything else is initialized * and the initialziation process cannot fail. @@ -2838,6 +2855,10 @@ static int __init init_hyp_mode(void) if (err) goto out_err; + err = pkvm_check_sme_dvmsync_fw_call(); + if (err) + goto out_err; + err = kvm_hyp_init_protection(hyp_va_bits); if (err) { kvm_err("Failed to init hyp memory protection\n"); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 28a471d1927c..a3050e2b65b1 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -5,6 +5,7 @@ */ #include + #include #include #include @@ -14,6 +15,7 @@ #include +#include #include #include #include @@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool; static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); #define current_vm (*this_cpu_ptr(&__current_vm)) +static void pkvm_sme_dvmsync_fw_call(void) +{ + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) { + struct arm_smccc_res res; + + /* + * Ignore the return value. Probing for the workaround + * availability took place in init_hyp_mode(). + */ + hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res); + } +} + static void guest_lock_component(struct pkvm_hyp_vm *vm) { hyp_spin_lock(&vm->lock); @@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size, ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt, addr, size, &host_s2_pool, KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation); - if (!ret) + if (!ret) { + /* + * After stage2 maintenance has happened, but before the page + * owner has changed. + */ + pkvm_sme_dvmsync_fw_call(); __host_update_page_state(addr, size, PKVM_NOPAGE); + } return ret; } diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 50b47eba7d01..e7195750d21b 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -105,6 +105,12 @@ ARM_SMCCC_SMC_32, \ 0, 0x3fff) +/* C1-Pro erratum 4193714: SME DVMSync early acknowledgement */ +#define ARM_SMCCC_CPU_WORKAROUND_4193714 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_CPU, 0x10) + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ -- cgit v1.2.3 From 8d9b9d985ad3a81c751a6b97edaf1d3c0780af7c Mon Sep 17 00:00:00 2001 From: Wei-Lin Chang Date: Tue, 5 May 2026 15:47:35 +0100 Subject: KVM: arm64: nv: Consider the DS bit when translating TCR_EL2 When running an nVHE L1, TCR_EL2 is mapped to TCR_EL1. Writes to the register are trapped and written to TCR_EL1 after a translation. Booting an nVHE L1 with 52-bit VA isn't working because the translation was ignoring the DS bit set by the guest, hence causing repeating level 0 faults. Add it in the translation function. Signed-off-by: Wei-Lin Chang Link: https://patch.msgid.link/20260505144735.1496530-1-weilin.chang@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 091544e6af44..dc2957662ff2 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -23,6 +23,7 @@ static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2) static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) { return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ + ((tcr & TCR_EL2_DS) ? TCR_DS : 0) | ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | tcr_el2_ps_to_tcr_el1_ips(tcr) | (tcr & TCR_EL2_TG0_MASK) | -- cgit v1.2.3 From 9be19df816dea9eb7dfe1661b3690bed6a2cb146 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 5 May 2026 10:49:13 +0100 Subject: KVM: arm64: Handle permission faults with guest_memfd gmem_abort() calls kvm_pgtable_stage2_map() to make changes to stage 2. It does this for both relaxing permissions on an existing mapping and to install a missing mapping. kvm_pgtable_stage2_map() doesn't make changes to stage 2 if there is an existing, valid entry and the new entry modifies only the permissions. This is checked in: kvm_pgtable_stage2_map() stage2_map_walk_leaf() stage2_map_walker_try_leaf() stage2_pte_needs_update() and if only the permissions differ, kvm_pgtable_stage2_map() returns -EAGAIN and KVM returns to the guest to replay the instruction. The assumption is that a concurrent fault on a different VCPU already mapped the faulting IPA, and replaying the instruction will either succeed, or cause a permission fault, which should be handled with kvm_pgtable_stage2_relax_perms(). gmem_abort(), on a read or write fault on a system without DIC (instruction cache invalidation required for data to instruction coherence), installs a valid entry with read and write permissions, but without executable permissions. On an execution fault on the same page, gmem_abort() attempts to relax the permissions to allow execution, but calls kvm_pgtable_stage2_map() to change the existing, valid, entry. kvm_pgtable_stage2_map() returns -EAGAIN and KVM resumes execution from the faulting instruction, which leads to an infinite loop of permission faults on the same instruction. Allow the guest to make progress by using kvm_pgtable_stage2_relax_perms() to relax permissions. Fixes: a7b57e099592 ("KVM: arm64: Handle guest_memfd-backed guest page faults") Signed-off-by: Alexandru Elisei Reviewed-by: Fuad Tabba Link: https://patch.msgid.link/20260505094913.75317-1-alexandru.elisei@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/mmu.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d089c107d9b7..4da9281312eb 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc { static int gmem_abort(const struct kvm_s2_fault_desc *s2fd) { bool write_fault, exec_fault; + bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu); enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt; unsigned long mmu_seq; struct page *page; struct kvm *kvm = s2fd->vcpu->kvm; - void *memcache; + void *memcache = NULL; kvm_pfn_t pfn; gfn_t gfn; int ret; - memcache = get_mmu_memcache(s2fd->vcpu); - ret = topup_mmu_memcache(s2fd->vcpu, memcache); - if (ret) - return ret; + if (!perm_fault) { + memcache = get_mmu_memcache(s2fd->vcpu); + ret = topup_mmu_memcache(s2fd->vcpu, memcache); + if (ret) + return ret; + } if (s2fd->nested) gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT; @@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd) goto out_unlock; } - ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE, - __pfn_to_phys(pfn), prot, - memcache, flags); + if (perm_fault) { + /* + * Drop the SW bits in favour of those stored in the + * PTE, which will be preserved. + */ + prot &= ~KVM_NV_GUEST_MAP_SZ; + ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa, + prot, flags); + } else { + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE, + __pfn_to_phys(pfn), prot, + memcache, flags); + } out_unlock: kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W); -- cgit v1.2.3 From fc240715fc5003538ff530e3cfb985e7769b7171 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 4 May 2026 13:28:08 +0200 Subject: KVM: selftests: arm64: Fix steal_time test after UAPI refactoring Fix the following failure to the steal_time test on arm64 by making the timer address known to the guest. ==== Test Assertion Failure ==== steal_time.c:229: !ret pid=18514 tid=18514 errno=22 - Invalid argument 1 0x000000000040252f: check_steal_time_uapi at steal_time.c:229 (discriminator 20) 2 (inlined by) main at steal_time.c:537 (discriminator 20) 3 0x0000ffffa23d621b: ?? ??:0 4 0x0000ffffa23d62fb: ?? ??:0 5 0x0000000000402b6f: _start at ??:? KVM_SET_DEVICE_ATTR failed, rc: -1 errno: 22 (Invalid argument) Fixes: 40351ed924dd ("KVM: selftests: Refactor UAPI tests into dedicated function") Signed-off-by: Sebastian Ott Link: https://patch.msgid.link/20260504112808.21276-1-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/steal_time.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 7df2bc8eec02..76fcdd1fd3cb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -220,6 +220,8 @@ static void check_steal_time_uapi(void) }; vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0); + virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1); st_ipa = (ulong)ST_GPA_BASE | 1; ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); -- cgit v1.2.3 From 9a624ea3f26f40c76bd2c7f77cde30659d42efbd Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Thu, 30 Apr 2026 10:37:24 +0000 Subject: KVM: arm64: Remove potential UB on nvhe tracing clock update Sashiko(locally) reports possiblity of division by zero and out-of-bounds bitwise shift in trace_clock_update(). Although the clock update is untrusted, we should at least have some basic checks to avoid undefined behaviours. Reviewed-by: Vincent Donnefort Signed-off-by: Mostafa Saleh Link: https://patch.msgid.link/20260430103724.2151625-1-smostafa@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/clock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c index 32fc4313fe43..a7fc61976fd0 100644 --- a/arch/arm64/kvm/hyp/nvhe/clock.c +++ b/arch/arm64/kvm/hyp/nvhe/clock.c @@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) struct clock_data *clock = &trace_clock_data; u64 bank = clock->cur ^ 1; + if (!mult || shift >= 64) + return; + clock->data[bank].mult = mult; clock->data[bank].shift = shift; clock->data[bank].epoch_ns = epoch_ns; -- cgit v1.2.3 From 54ea22273eef67196f238263bc79f27da04d2114 Mon Sep 17 00:00:00 2001 From: Steffen Eiden Date: Tue, 28 Apr 2026 18:05:12 +0200 Subject: MAINTAINERS: Add Steffen as reviewer for KVM/arm64 KVM/arm64 and KVM/s390 will eventually share some code. Add me as a cross-reviewer from the s390 team to arm64 to help to keep both architectures in sync. Signed-off-by: Steffen Eiden Link: https://patch.msgid.link/20260428160527.1378085-16-seiden@linux.ibm.com [maz: rephrase commit message to use future tense, since this is merged ahead of the code] Signed-off-by: Marc Zyngier --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 882214b0e7db..3fe4ea59199c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14052,6 +14052,7 @@ KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Marc Zyngier M: Oliver Upton R: Joey Gouly +R: Steffen Eiden R: Suzuki K Poulose R: Zenghui Yu L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -- cgit v1.2.3 From b819db93d73f4593636299e229914052b89e3ef2 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 12 Apr 2026 21:47:42 +0300 Subject: Bluetooth: SCO: fix sleeping under spinlock in sco_conn_ready sco_conn_ready calls sleeping functions under conn->lock spinlock. The critical section can be reduced: conn->hcon is modified only with hdev->lock held. It is guaranteed to be held in sco_conn_ready, so conn->lock is not needed to guard it. Move taking conn->lock after lock_sock(parent). This also follows the lock ordering lock_sock() > conn->lock elsewhere in the file. Fixes: 27c24fda62b60 ("Bluetooth: switch to lock_sock in SCO") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 18826d4b9c0b..3a5479538e85 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1377,26 +1377,24 @@ static void sco_conn_ready(struct sco_conn *conn) sk->sk_state_change(sk); release_sock(sk); } else { - sco_conn_lock(conn); - - if (!conn->hcon) { - sco_conn_unlock(conn); + if (!conn->hcon) return; - } + + lockdep_assert_held(&conn->hcon->hdev->lock); parent = sco_get_sock_listen(&conn->hcon->src); - if (!parent) { - sco_conn_unlock(conn); + if (!parent) return; - } lock_sock(parent); + sco_conn_lock(conn); + sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { - release_sock(parent); sco_conn_unlock(conn); + release_sock(parent); return; } @@ -1417,9 +1415,9 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - release_sock(parent); - sco_conn_unlock(conn); + + release_sock(parent); } } -- cgit v1.2.3 From 0beddb0c380bed5f5b8e61ddbe14635bb73d0b41 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sun, 12 Apr 2026 21:29:16 +0100 Subject: Bluetooth: hci_conn: fix potential UAF in create_big_sync Add hci_conn_valid() check in create_big_sync() to detect stale connections before proceeding with BIG creation. Handle the resulting -ECANCELED in create_big_complete() and re-validate the connection under hci_dev_lock() before dereferencing, matching the pattern used by create_le_conn_complete() and create_pa_complete(). Keep the hci_conn object alive across the async boundary by taking a reference via hci_conn_get() when queueing create_big_sync(), and dropping it in the completion callback. The refcount and the lock are complementary: the refcount keeps the object allocated, while hci_dev_lock() serializes hci_conn_hash_del()'s list_del_rcu() on hdev->conn_hash, as required by hci_conn_del(). hci_conn_put() is called outside hci_dev_unlock() so the final put (which resolves to kfree() via bt_link_release) does not run under hdev->lock, though the release path would be safe either way. Without this, create_big_complete() would unconditionally dereference the conn pointer on error, causing a use-after-free via hci_connect_cfm() and hci_conn_del(). Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Cc: stable@vger.kernel.org Co-developed-by: Luiz Augusto von Dentz Signed-off-by: Luiz Augusto von Dentz Signed-off-by: David Carlier Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3a0592599086..96e345fcf303 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2130,6 +2130,9 @@ static int create_big_sync(struct hci_dev *hdev, void *data) u32 flags = 0; int err; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + if (qos->bcast.out.phys == BIT(1)) flags |= MGMT_ADV_FLAG_SEC_2M; @@ -2204,11 +2207,24 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "conn %p", conn); + if (err == -ECANCELED) + goto done; + + hci_dev_lock(hdev); + + if (!hci_conn_valid(hdev, conn)) + goto unlock; + if (err) { bt_dev_err(hdev, "Unable to create BIG: %d", err); hci_connect_cfm(conn, err); hci_conn_del(conn); } + +unlock: + hci_dev_unlock(hdev); +done: + hci_conn_put(conn); } struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, @@ -2336,10 +2352,11 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ - err = hci_cmd_sync_queue(hdev, create_big_sync, conn, + err = hci_cmd_sync_queue(hdev, create_big_sync, hci_conn_get(conn), create_big_complete); if (err < 0) { hci_conn_drop(conn); + hci_conn_put(conn); return ERR_PTR(err); } -- cgit v1.2.3 From 5ddb8014261137cadaf83ab5617a588d80a22586 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 10 Apr 2026 15:29:52 -0400 Subject: Bluetooth: hci_event: Fix OOB read and infinite loop in hci_le_create_big_complete_evt hci_le_create_big_complete_evt() iterates over BT_BOUND connections for a BIG handle using a while loop, accessing ev->bis_handle[i++] on each iteration. However, there is no check that i stays within ev->num_bis before the array access. When a controller sends a LE_Create_BIG_Complete event with fewer bis_handle entries than there are BT_BOUND connections for that BIG, or with num_bis=0, the loop reads beyond the valid bis_handle[] flex array into adjacent heap memory. Since the out-of-bounds values typically exceed HCI_CONN_HANDLE_MAX (0x0EFF), hci_conn_set_handle() rejects them and the connection remains in BT_BOUND state. The same connection is then found again by hci_conn_hash_lookup_big_state(), creating an infinite loop with hci_dev_lock held. Fix this by terminating the BIG if in case not all BIS could be setup properly. Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes") Cc: stable@vger.kernel.org Signed-off-by: ZhiTao Ou Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b2ee6b6a0f56..1b3b9131affa 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7118,9 +7118,29 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, continue; } + if (ev->num_bis <= i) { + bt_dev_err(hdev, + "Not enough BIS handles for BIG 0x%2.2x", + ev->handle); + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); + continue; + } + if (hci_conn_set_handle(conn, - __le16_to_cpu(ev->bis_handle[i++]))) + __le16_to_cpu(ev->bis_handle[i++]))) { + bt_dev_err(hdev, + "Failed to set BIS handle for BIG 0x%2.2x", + ev->handle); + /* Force error so BIG gets terminated as not all BIS + * could be connected. + */ + ev->status = HCI_ERROR_UNSPECIFIED; + hci_connect_cfm(conn, ev->status); + hci_conn_del(conn); continue; + } conn->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_CREATED, &conn->flags); @@ -7129,7 +7149,10 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, hci_iso_setup_path(conn); } - if (!ev->status && !i) + /* If there is an unexpected error or if no BISes have been connected + * for the BIG, terminate it. + */ + if (ev->status == HCI_ERROR_UNSPECIFIED || (!ev->status && !i)) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation -- cgit v1.2.3 From 72b8deccff17a7644e0367e1aaf1a36cfb014324 Mon Sep 17 00:00:00 2001 From: Dudu Lu Date: Wed, 15 Apr 2026 17:39:53 +0800 Subject: Bluetooth: bnep: fix incorrect length parsing in bnep_rx_frame() extension handling In bnep_rx_frame(), the BNEP_FILTER_NET_TYPE_SET and BNEP_FILTER_MULTI_ADDR_SET extension header parsing has two bugs: 1) The 2-byte length field is read with *(u16 *)(skb->data + 1), which performs a native-endian read. The BNEP protocol specifies this field in big-endian (network byte order), and the same file correctly uses get_unaligned_be16() for the identical fields in bnep_ctrl_set_netfilter() and bnep_ctrl_set_mcfilter(). 2) The length is multiplied by 2, but unlike BNEP_SETUP_CONN_REQ where the length byte counts UUID pairs (requiring * 2 for two UUIDs per entry), the filter extension length field already represents the total data size in bytes. This is confirmed by bnep_ctrl_set_netfilter() which reads the same field as a byte count and divides by 4 to get the number of filter entries. The bogus * 2 means skb_pull advances twice as far as it should, either dropping valid data from the next header or causing the pull to fail entirely when the doubled length exceeds the remaining skb. Fix by splitting the pull into two steps: first use skb_pull_data() to safely pull and validate the 3-byte fixed header (ctrl type + length), then pull the variable-length data using the properly decoded length. Fixes: bf8b9a9cb77b ("Bluetooth: bnep: Add support to extended headers of control frames") Signed-off-by: Dudu Lu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/bnep/core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d44987d4515c..853c8d7644b5 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -330,11 +330,18 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) goto badframe; break; case BNEP_FILTER_MULTI_ADDR_SET: - case BNEP_FILTER_NET_TYPE_SET: - /* Pull: ctrl type (1 b), len (2 b), data (len bytes) */ - if (!skb_pull(skb, 3 + *(u16 *)(skb->data + 1) * 2)) + case BNEP_FILTER_NET_TYPE_SET: { + u8 *hdr; + + /* Pull ctrl type (1 b) + len (2 b) */ + hdr = skb_pull_data(skb, 3); + if (!hdr) + goto badframe; + /* Pull data (len bytes); length is big-endian */ + if (!skb_pull(skb, get_unaligned_be16(&hdr[1]))) goto badframe; break; + } default: kfree_skb(skb); return 0; -- cgit v1.2.3 From 4f42363c814f28fe3f59847c35acf1ed033bedd4 Mon Sep 17 00:00:00 2001 From: Dudu Lu Date: Wed, 15 Apr 2026 18:43:55 +0800 Subject: Bluetooth: l2cap: fix MPS check in l2cap_ecred_reconf_req The L2CAP specification states that if more than one channel is being reconfigured, the MPS shall not be decreased. The current check has two issues: 1) The comparison uses >= (greater-than-or-equal), which incorrectly rejects reconfiguration requests where the MPS stays the same. Since the spec says MPS "shall be greater than or equal to the current MPS", only a strict decrease (remote_mps > mps) should be rejected. Keeping the same MPS is valid. 2) The multi-channel guard uses `&& i` (loop index) to approximate "more than one channel", but this incorrectly allows MPS decrease for the first channel (i==0) even when multiple channels are being reconfigured. Replace with `&& num_scid > 1` which correctly checks whether the request covers more than one channel. Fixes: 7accb1c4321a ("Bluetooth: L2CAP: Fix invalid response to L2CAP_ECRED_RECONF_REQ") Signed-off-by: Dudu Lu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 77dec104a9c3..b15374b951fa 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5428,7 +5428,7 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, * configured, the MPS field may be less than the current MPS * of that channel. */ - if (chan[i]->remote_mps >= mps && i) { + if (chan[i]->remote_mps > mps && num_scid > 1) { BT_ERR("chan %p decreased MPS %u -> %u", chan[i], chan[i]->remote_mps, mps); result = L2CAP_RECONF_INVALID_MPS; -- cgit v1.2.3 From 91b5a598b5285da794b72619f31777b62dd336f8 Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Wed, 15 Apr 2026 02:52:37 +0500 Subject: Bluetooth: l2cap: defer conn param update to avoid conn->lock/hdev->lock inversion When a BLE peripheral sends an L2CAP Connection Parameter Update Request the processing path is: process_pending_rx() [takes conn->lock] l2cap_le_sig_channel() l2cap_conn_param_update_req() hci_le_conn_update() [takes hdev->lock] Meanwhile other code paths take the locks in the opposite order: l2cap_chan_connect() [takes hdev->lock] ... mutex_lock(&conn->lock) l2cap_conn_ready() [hdev->lock via hci_cb_list_lock] ... mutex_lock(&conn->lock) This is a classic AB/BA deadlock which lockdep reports as a circular locking dependency when connecting a BLE MIDI keyboard (Carry-On FC-49). Fix this by making hci_le_conn_update() defer the HCI command through hci_cmd_sync_queue() so it no longer needs to take hdev->lock in the caller context. The sync callback uses __hci_cmd_sync_status_sk() to wait for the HCI_EV_LE_CONN_UPDATE_COMPLETE event, then updates the stored connection parameters (hci_conn_params) and notifies userspace (mgmt_new_conn_param) only after the controller has confirmed the update. A reference on hci_conn is held via hci_conn_get()/hci_conn_put() for the lifetime of the queued work to prevent use-after-free, and hci_conn_valid() is checked before proceeding in case the connection was removed while the work was pending. The hci_dev_lock is held across hci_conn_valid() and all conn field accesses to prevent a concurrent disconnect from invalidating the connection mid-use. Fixes: f044eb0524a0 ("Bluetooth: Store latency and supervision timeout in connection params") Signed-off-by: Mikhail Gavrilov Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 105 ++++++++++++++++++++++++++++++++------- net/bluetooth/l2cap_core.c | 12 +---- 3 files changed, 89 insertions(+), 30 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a7bffb908c1e..aa600fbf9a53 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2495,7 +2495,7 @@ void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); int hci_abort_conn(struct hci_conn *conn, u8 reason); -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, __u8 ltk[16], __u8 key_size); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 96e345fcf303..17b46ad6a349 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -480,40 +480,107 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) return hci_setup_sync_conn(conn, handle); } -u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, - u16 to_multiplier) +struct le_conn_update_data { + struct hci_conn *conn; + u16 min; + u16 max; + u16 latency; + u16 to_multiplier; +}; + +static int le_conn_update_sync(struct hci_dev *hdev, void *data) { - struct hci_dev *hdev = conn->hdev; + struct le_conn_update_data *d = data; + struct hci_conn *conn = d->conn; struct hci_conn_params *params; struct hci_cp_le_conn_update cp; + u16 timeout; + u8 store_hint; + int err; + /* Verify connection is still alive and read conn fields under + * the same lock to prevent a concurrent disconnect from freeing + * or reusing the connection while we build the HCI command. + */ hci_dev_lock(hdev); - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - params->conn_min_interval = min; - params->conn_max_interval = max; - params->conn_latency = latency; - params->supervision_timeout = to_multiplier; + if (!hci_conn_valid(hdev, conn)) { + hci_dev_unlock(hdev); + return -ECANCELED; } - hci_dev_unlock(hdev); - memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); - cp.conn_interval_min = cpu_to_le16(min); - cp.conn_interval_max = cpu_to_le16(max); - cp.conn_latency = cpu_to_le16(latency); - cp.supervision_timeout = cpu_to_le16(to_multiplier); + cp.conn_interval_min = cpu_to_le16(d->min); + cp.conn_interval_max = cpu_to_le16(d->max); + cp.conn_latency = cpu_to_le16(d->latency); + cp.supervision_timeout = cpu_to_le16(d->to_multiplier); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); + timeout = conn->conn_timeout; - hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); + hci_dev_unlock(hdev); - if (params) - return 0x01; + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CONN_UPDATE, + sizeof(cp), &cp, + HCI_EV_LE_CONN_UPDATE_COMPLETE, + timeout, NULL); + if (err) + return err; - return 0x00; + /* Update stored connection parameters after the controller has + * confirmed the update via the LE Connection Update Complete event. + */ + hci_dev_lock(hdev); + + params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); + if (params) { + params->conn_min_interval = d->min; + params->conn_max_interval = d->max; + params->conn_latency = d->latency; + params->supervision_timeout = d->to_multiplier; + store_hint = 0x01; + } else { + store_hint = 0x00; + } + + hci_dev_unlock(hdev); + + mgmt_new_conn_param(hdev, &conn->dst, conn->dst_type, store_hint, + d->min, d->max, d->latency, d->to_multiplier); + + return 0; +} + +static void le_conn_update_complete(struct hci_dev *hdev, void *data, int err) +{ + struct le_conn_update_data *d = data; + + hci_conn_put(d->conn); + kfree(d); +} + +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, + u16 to_multiplier) +{ + struct le_conn_update_data *d; + + d = kzalloc_obj(*d); + if (!d) + return; + + hci_conn_get(conn); + d->conn = conn; + d->min = min; + d->max = max; + d->latency = latency; + d->to_multiplier = to_multiplier; + + if (hci_cmd_sync_queue(conn->hdev, le_conn_update_sync, d, + le_conn_update_complete) < 0) { + hci_conn_put(conn); + kfree(d); + } } void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b15374b951fa..7701528f1167 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4706,16 +4706,8 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); - if (!err) { - u8 store_hint; - - store_hint = hci_le_conn_update(hcon, min, max, latency, - to_multiplier); - mgmt_new_conn_param(hcon->hdev, &hcon->dst, hcon->dst_type, - store_hint, min, max, latency, - to_multiplier); - - } + if (!err) + hci_le_conn_update(hcon, min, max, latency, to_multiplier); return 0; } -- cgit v1.2.3 From 2ff1a41a912de8517b4482e946dd951b7d80edbf Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 15 Apr 2026 16:51:36 -0400 Subject: Bluetooth: L2CAP: Fix null-ptr-deref in l2cap_sock_state_change_cb() Add the same NULL guard already present in l2cap_sock_resume_cb() and l2cap_sock_ready_cb(). Fixes: 89bc500e41fc ("Bluetooth: Add state tracking to struct l2cap_chan") Cc: stable@kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 71e8c1b45bce..fb3cb70a5a39 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1657,6 +1657,9 @@ static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state, { struct sock *sk = chan->data; + if (!sk) + return; + sk->sk_state = state; if (err) -- cgit v1.2.3 From 78a88d43dab8d23aeef934ed8ce34d40e6b3d613 Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 15 Apr 2026 16:53:36 -0400 Subject: Bluetooth: L2CAP: Fix null-ptr-deref in l2cap_sock_get_sndtimeo_cb() Add the same NULL guard already present in l2cap_sock_resume_cb() and l2cap_sock_ready_cb(). Fixes: 8d836d71e222 ("Bluetooth: Access sk_sndtimeo indirectly in l2cap_core.c") Cc: stable@kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index fb3cb70a5a39..879c9f90269a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1761,6 +1761,9 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return 0; + return READ_ONCE(sk->sk_sndtimeo); } -- cgit v1.2.3 From 0a120d96166301d7a95be75b52f843837dbd1219 Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 15 Apr 2026 16:49:59 -0400 Subject: Bluetooth: L2CAP: Fix null-ptr-deref in l2cap_sock_new_connection_cb() Add the same NULL guard already present in l2cap_sock_resume_cb() and l2cap_sock_ready_cb(). Fixes: 80808e431e1e ("Bluetooth: Add l2cap_chan_ops abstraction") Cc: stable@kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 879c9f90269a..cf590a67d364 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1498,6 +1498,9 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; + if (!parent) + return NULL; + lock_sock(parent); /* Check for backlog size */ -- cgit v1.2.3 From 4e37f6452d586b95c346a9abdd2fb80b67794f39 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 18 Apr 2026 18:41:12 +0300 Subject: Bluetooth: SCO: hold sk properly in sco_conn_ready sk deref in sco_conn_ready must be done either under conn->lock, or holding a refcount, to avoid concurrent close. conn->sk and parent sk is currently accessed without either, and without checking parent->sk_state: [Task 1] [Task 2] sco_sock_release sco_conn_ready sk = conn->sk lock_sock(sk) conn->sk = NULL lock_sock(sk) release_sock(sk) sco_sock_kill(sk) UAF on sk deref and similarly for access to sco_get_sock_listen() return value. Fix possible UAF by holding sk refcount in sco_conn_ready() and making sco_get_sock_listen() increase refcount. Also recheck after lock_sock that the socket is still valid. Adjust conn->sk locking so it's protected also by lock_sock() of the associated socket if any. Fixes: 27c24fda62b60 ("Bluetooth: switch to lock_sock in SCO") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3a5479538e85..eba44525d41d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -472,9 +472,13 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) sk1 = sk; } + sk = sk ? sk : sk1; + if (sk) + sock_hold(sk); + read_unlock(&sco_sk_list.lock); - return sk ? sk : sk1; + return sk; } static void sco_sock_destruct(struct sock *sk) @@ -515,11 +519,13 @@ static void sco_sock_kill(struct sock *sk) BT_DBG("sk %p state %d", sk, sk->sk_state); /* Sock is dead, so set conn->sk to NULL to avoid possible UAF */ + lock_sock(sk); if (sco_pi(sk)->conn) { sco_conn_lock(sco_pi(sk)->conn); sco_pi(sk)->conn->sk = NULL; sco_conn_unlock(sco_pi(sk)->conn); } + release_sock(sk); /* Kill poor orphan */ bt_sock_unlink(&sco_sk_list, sk); @@ -1365,17 +1371,28 @@ static int sco_sock_release(struct socket *sock) static void sco_conn_ready(struct sco_conn *conn) { - struct sock *parent; - struct sock *sk = conn->sk; + struct sock *parent, *sk; + + sco_conn_lock(conn); + sk = sco_sock_hold(conn); + sco_conn_unlock(conn); BT_DBG("conn %p", conn); if (sk) { lock_sock(sk); - sco_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); + + /* conn->sk may have become NULL if racing with sk close, but + * due to held hdev->lock, it can't become different sk. + */ + if (conn->sk) { + sco_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } + release_sock(sk); + sock_put(sk); } else { if (!conn->hcon) return; @@ -1390,13 +1407,15 @@ static void sco_conn_ready(struct sco_conn *conn) sco_conn_lock(conn); + /* hdev->lock guarantees conn->sk == NULL still here */ + + if (parent->sk_state != BT_LISTEN) + goto release; + sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); - if (!sk) { - sco_conn_unlock(conn); - release_sock(parent); - return; - } + if (!sk) + goto release; sco_sock_init(sk, parent); @@ -1415,9 +1434,10 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); +release: sco_conn_unlock(conn); - release_sock(parent); + sock_put(parent); } } -- cgit v1.2.3 From 5917dd39db2bfc8b1b4c6ea8ed99adb4badef707 Mon Sep 17 00:00:00 2001 From: Sai Teja Aluvala Date: Mon, 20 Apr 2026 23:07:35 +0530 Subject: Bluetooth: btintel_pcie: treat boot stage bit 12 as warning CSR boot stage register bit 12 is documented as a device warning, not a fatal error. Rename the bit definition accordingly and stop including it in btintel_pcie_in_error(). This keeps warning-only boot stage values from being classified as errors while preserving abort-handler state as the actual error condition. Fixes: 190377500fde ("Bluetooth: btintel_pcie: Dump debug registers on error") Signed-off-by: Kiran K Signed-off-by: Sai Teja Aluvala Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 13 ++++++++++--- drivers/bluetooth/btintel_pcie.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2f59c0d6f9ec..a3643e67b33f 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -289,6 +289,9 @@ static inline void btintel_pcie_dump_debug_registers(struct hci_dev *hdev) skb_put_data(skb, buf, strlen(buf)); data->boot_stage_cache = reg; + if (reg & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING) + bt_dev_warn(hdev, "Controller device warning (boot_stage: 0x%8.8x)", reg); + reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_IPC_STATUS_REG); snprintf(buf, sizeof(buf), "ipc status: 0x%8.8x", reg); skb_put_data(skb, buf, strlen(buf)); @@ -880,8 +883,11 @@ static inline bool btintel_pcie_in_lockdown(struct btintel_pcie_data *data) static inline bool btintel_pcie_in_error(struct btintel_pcie_data *data) { - return (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR) || - (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER); + if (data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING) + bt_dev_warn(data->hdev, "Controller device warning (boot_stage: 0x%8.8x)", + data->boot_stage_cache); + + return data->boot_stage_cache & BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER; } static void btintel_pcie_msix_gp1_handler(struct btintel_pcie_data *data) @@ -914,7 +920,8 @@ static void btintel_pcie_msix_gp0_handler(struct btintel_pcie_data *data) data->img_resp_cache = reg; if (btintel_pcie_in_error(data)) { - bt_dev_err(data->hdev, "Controller in error state"); + bt_dev_err(data->hdev, "Controller in error state (boot_stage: 0x%8.8x)", + data->boot_stage_cache); btintel_pcie_dump_debug_registers(data->hdev); return; } diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 3c7bb708362d..f922abd1e7d8 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -48,7 +48,7 @@ #define BTINTEL_PCIE_CSR_BOOT_STAGE_OPFW (BIT(2)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_ROM_LOCKDOWN (BIT(10)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_IML_LOCKDOWN (BIT(11)) -#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_ERR (BIT(12)) +#define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_WARNING (BIT(12)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_ABORT_HANDLER (BIT(13)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_DEVICE_HALTED (BIT(14)) #define BTINTEL_PCIE_CSR_BOOT_STAGE_MAC_ACCESS_ON (BIT(16)) -- cgit v1.2.3 From 902fe40bce7059722f7ffa1c378e577675cf1918 Mon Sep 17 00:00:00 2001 From: Aurelien DESBRIERES Date: Tue, 21 Apr 2026 15:53:31 +0200 Subject: Bluetooth: hci_uart: Fix NULL deref in recv callbacks when priv is uninitialized When a fault is injected during hci_uart line discipline setup, the proto open() callback may fail leaving hu->priv as NULL. A subsequent TIOCSTI ioctl can trigger the recv() callback before priv is initialized, causing a NULL pointer dereference. Fix all four affected HCI UART protocol drivers by adding a NULL check on hu->priv at the start of their recv() callbacks: h4, h5, ath and bcsp. Reported-by: syzbot+ff30eeab8e07b37d524e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ff30eeab8e07b37d524e Signed-off-by: Aurelien DESBRIERES Assisted-by: Claude:claude-sonnet-4-6 Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_ath.c | 3 +++ drivers/bluetooth/hci_bcsp.c | 3 +++ drivers/bluetooth/hci_h4.c | 3 +++ drivers/bluetooth/hci_h5.c | 3 +++ 4 files changed, 12 insertions(+) diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index fa679ad0acdf..8201fa7f61e8 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -191,6 +191,9 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; + if (!ath) + return -ENODEV; + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index b386f91d8b46..db56eead27ce 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -585,6 +585,9 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; + if (!bcsp) + return -ENODEV; + BT_DBG("hu %p count %d rx_state %d rx_count %ld", hu, count, bcsp->rx_state, bcsp->rx_count); diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index a889a66a326f..767372707498 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -109,6 +109,9 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) { struct h4_struct *h4 = hu->priv; + if (!h4) + return -ENODEV; + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index cfdf75dc2847..d35383718212 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -587,6 +587,9 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) struct h5 *h5 = hu->priv; const unsigned char *ptr = data; + if (!h5) + return -ENODEV; + BT_DBG("%s pending %zu count %d", hu->hdev->name, h5->rx_pending, count); -- cgit v1.2.3 From ca40d481079c05c6891a14a798c79596fd2d5f0c Mon Sep 17 00:00:00 2001 From: SeungJu Cheon Date: Tue, 21 Apr 2026 11:51:21 +0900 Subject: Bluetooth: ISO: Fix data-race on dst in iso_sock_connect() iso_sock_connect() copies the destination address into iso_pi(sk)->dst under lock_sock, then releases the lock and reads it back with bacmp() to decide between the CIS and BIS connect paths: lock_sock(sk); bacpy(&iso_pi(sk)->dst, &sa->iso_bdaddr); iso_pi(sk)->dst_type = sa->iso_bdaddr_type; release_sock(sk); if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) // <- no lock held This read after release_sock() races with any concurrent write to iso_pi(sk)->dst on the same socket. Fix by reading the destination address directly from the local sockaddr argument (sa->iso_bdaddr) instead of iso_pi(sk)->dst. Since sa is a function-local argument, reading it requires no locking and avoids the race. This patch addresses only the bacmp() race in iso_sock_connect(); other unprotected iso_pi(sk) accesses are fixed separately in the next patch. KCSAN report: BUG: KCSAN: data-race in memcmp+0x39/0xb0 race at unknown origin, with read to 0xffff8f96ea66dde3 of 1 bytes by task 549 on cpu 1: memcmp+0x39/0xb0 iso_sock_connect+0x275/0xb40 __sys_connect_file+0xbd/0xe0 __sys_connect+0xe0/0x110 __x64_sys_connect+0x40/0x50 x64_sys_call+0xcad/0x1c60 do_syscall_64+0x133/0x590 entry_SYSCALL_64_after_hwframe+0x77/0x7f value changed: 0x00 -> 0xee Reported by Kernel Concurrency Sanitizer on: CPU: 1 UID: 0 PID: 549 Comm: iso_race_combin Not tainted 7.0.0-08391-g1d51b370a0f8 #40 PREEMPT(lazy) Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: SeungJu Cheon Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index be145e2736b7..290a1b9a9daa 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1193,7 +1193,7 @@ static int iso_sock_connect(struct socket *sock, struct sockaddr_unsized *addr, release_sock(sk); - if (bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) + if (bacmp(&sa->iso_bdaddr, BDADDR_ANY)) err = iso_connect_cis(sk); else err = iso_connect_bis(sk); -- cgit v1.2.3 From f958c7805b18e9d69f6b322b231ecee46ec6f331 Mon Sep 17 00:00:00 2001 From: SeungJu Cheon Date: Tue, 21 Apr 2026 11:51:22 +0900 Subject: Bluetooth: ISO: Fix data-race on iso_pi(sk) in socket and HCI event paths Several iso_pi(sk) fields (qos, qos_user_set, bc_sid, base, base_len, sync_handle, bc_num_bis) are written under lock_sock in iso_sock_setsockopt() and iso_sock_bind(), but read and written under hci_dev_lock only in two other paths: - iso_connect_bis() / iso_connect_cis(), invoked from connect(2), read qos/base/bc_sid and reset qos to default_qos on the qos_user_set validation failure -- all without lock_sock. - iso_connect_ind(), invoked from hci_rx_work, writes sync_handle, bc_sid, qos.bcast.encryption, bc_num_bis, base and base_len on PA_SYNC_ESTABLISHED / PAST_RECEIVED / BIG_INFO_ADV_REPORT / PER_ADV_REPORT events. The BIG_INFO handler additionally passes &iso_pi(sk)->qos together with sync_handle / bc_num_bis / bc_bis to hci_conn_big_create_sync() while setsockopt may be mutating them. Acquire lock_sock around the affected accesses in both paths. The locking order hci_dev_lock -> lock_sock matches the existing iso_conn_big_sync() precedent, whose comment documents the same requirement for hci_conn_big_create_sync(). The HCI connect/bind helpers do not wait for command completion -- they enqueue work via hci_cmd_sync_queue{,_once}() / hci_le_create_cis_pending() and return -- so the added hold time is comparable to iso_conn_big_sync(). KCSAN report: BUG: KCSAN: data-race in iso_connect_cis / iso_sock_setsockopt read to 0xffffa3ae8ce3cdc8 of 1 bytes by task 335 on cpu 0: iso_connect_cis+0x49f/0xa20 iso_sock_connect+0x60e/0xb40 __sys_connect_file+0xbd/0xe0 __sys_connect+0xe0/0x110 __x64_sys_connect+0x40/0x50 x64_sys_call+0xcad/0x1c60 do_syscall_64+0x133/0x590 entry_SYSCALL_64_after_hwframe+0x77/0x7f write to 0xffffa3ae8ce3cdc8 of 60 bytes by task 334 on cpu 1: iso_sock_setsockopt+0x69a/0x930 do_sock_setsockopt+0xc3/0x170 __sys_setsockopt+0xd1/0x130 __x64_sys_setsockopt+0x64/0x80 x64_sys_call+0x1547/0x1c60 do_syscall_64+0x133/0x590 entry_SYSCALL_64_after_hwframe+0x77/0x7f Reported by Kernel Concurrency Sanitizer on: CPU: 1 UID: 0 PID: 334 Comm: iso_setup_race Not tainted 7.0.0-10949-g8541d8f725c6 #44 PREEMPT(lazy) The iso_connect_ind() races were found by inspection. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: SeungJu Cheon Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 54 +++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 290a1b9a9daa..7cb2864fe872 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -347,6 +347,7 @@ static int iso_connect_bis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!bis_capable(hdev)) { err = -EOPNOTSUPP; @@ -399,13 +400,9 @@ static int iso_connect_bis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -421,9 +418,8 @@ static int iso_connect_bis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -444,6 +440,7 @@ static int iso_connect_cis(struct sock *sk) return -EHOSTUNREACH; hci_dev_lock(hdev); + lock_sock(sk); if (!cis_central_capable(hdev)) { err = -EOPNOTSUPP; @@ -498,13 +495,9 @@ static int iso_connect_cis(struct sock *sk) goto unlock; } - lock_sock(sk); - err = iso_chan_add(conn, sk, NULL); - if (err) { - release_sock(sk); + if (err) goto unlock; - } /* Update source addr of the socket */ bacpy(&iso_pi(sk)->src, &hcon->src); @@ -520,9 +513,8 @@ static int iso_connect_cis(struct sock *sk) iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } - release_sock(sk); - unlock: + release_sock(sk); hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -2256,8 +2248,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid, ev1); if (sk && !ev1->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); iso_pi(sk)->bc_sid = ev1->sid; + release_sock(sk); } goto done; @@ -2268,8 +2262,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) sk = iso_get_sock(hdev, &hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid_past, ev1a); if (sk && !ev1a->status) { + lock_sock(sk); iso_pi(sk)->sync_handle = le16_to_cpu(ev1a->sync_handle); iso_pi(sk)->bc_sid = ev1a->sid; + release_sock(sk); } goto done; @@ -2296,27 +2292,35 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev2); if (sk) { - int err; - struct hci_conn *hcon = iso_pi(sk)->conn->hcon; + int err = 0; + bool big_sync; + struct hci_conn *hcon; + lock_sock(sk); + + hcon = iso_pi(sk)->conn->hcon; iso_pi(sk)->qos.bcast.encryption = ev2->encryption; if (ev2->num_bis < iso_pi(sk)->bc_num_bis) iso_pi(sk)->bc_num_bis = ev2->num_bis; - if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && - !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { + big_sync = !test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && + !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags); + + if (big_sync) err = hci_conn_big_create_sync(hdev, hcon, &iso_pi(sk)->qos, iso_pi(sk)->sync_handle, iso_pi(sk)->bc_num_bis, iso_pi(sk)->bc_bis); - if (err) { - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); - sock_put(sk); - sk = NULL; - } + + release_sock(sk); + + if (big_sync && err) { + bt_dev_err(hdev, "hci_le_big_create_sync: %d", + err); + sock_put(sk); + sk = NULL; } } @@ -2370,8 +2374,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!base || base_len > BASE_MAX_LENGTH) goto done; + lock_sock(sk); memcpy(iso_pi(sk)->base, base, base_len); iso_pi(sk)->base_len = base_len; + release_sock(sk); } else { /* This is a PA data fragment. Keep pa_data_len set to 0 * until all data has been reassembled. -- cgit v1.2.3 From 634a4408c0615c523cf7531790f4f14a422b9206 Mon Sep 17 00:00:00 2001 From: Tristan Madani Date: Tue, 21 Apr 2026 11:14:54 +0000 Subject: Bluetooth: btmtk: validate WMT event SKB length before struct access btmtk_usb_hci_wmt_sync() casts the WMT event response SKB data to struct btmtk_hci_wmt_evt (7 bytes) and struct btmtk_hci_wmt_evt_funcc (9 bytes) without first checking that the SKB contains enough data. A short firmware response causes out-of-bounds reads from SKB tailroom. Use skb_pull_data() to validate and advance past the base WMT event header. For the FUNC_CTRL case, pull the additional status field bytes before accessing them. Fixes: d019930b0049 ("Bluetooth: btmtk: move btusb_mtk_hci_wmt_sync to btmtk.c") Cc: stable@vger.kernel.org Signed-off-by: Tristan Madani Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 6fb6ca274808..f70c1b0f8990 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -695,8 +695,13 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, if (data->evt_skb == NULL) goto err_free_wc; - /* Parse and handle the return WMT event */ - wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data; + wmt_evt = skb_pull_data(data->evt_skb, sizeof(*wmt_evt)); + if (!wmt_evt) { + bt_dev_err(hdev, "WMT event too short (%u bytes)", + data->evt_skb->len); + err = -EINVAL; + goto err_free_skb; + } if (wmt_evt->whdr.op != hdr->op) { bt_dev_err(hdev, "Wrong op received %d expected %d", wmt_evt->whdr.op, hdr->op); @@ -712,6 +717,12 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, status = BTMTK_WMT_PATCH_DONE; break; case BTMTK_WMT_FUNC_CTRL: + if (!skb_pull_data(data->evt_skb, + sizeof(wmt_evt_funcc->status))) { + err = -EINVAL; + goto err_free_skb; + } + wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt; if (be16_to_cpu(wmt_evt_funcc->status) == 0x404) status = BTMTK_WMT_ON_DONE; -- cgit v1.2.3 From 21bd244b6de5d2fe1063c23acc93fbdd2b20d112 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 21 Apr 2026 13:08:44 -0400 Subject: Bluetooth: virtio_bt: clamp rx length before skb_put virtbt_rx_work() calls skb_put(skb, len) where len comes directly from virtqueue_get_buf() with no validation against the buffer we posted to the device. The RX skb is allocated in virtbt_add_inbuf() and exposed to virtio as exactly 1000 bytes via sg_init_one(). Checking len against skb_tailroom(skb) is not sufficient because alloc_skb() can leave more tailroom than the 1000 bytes actually handed to the device. A malicious or buggy backend can therefore report used.len between 1001 and skb_tailroom(skb), causing skb_put() to include uninitialized kernel heap bytes that were never written by the device. The same path also accepts len == 0, in which case skb_put(skb, 0) leaves the skb empty but virtbt_rx_handle() still reads the pkt_type byte from skb->data, consuming uninitialized memory. Define VIRTBT_RX_BUF_SIZE once and reuse it in alloc_skb() and sg_init_one(), and gate virtbt_rx_work() on that same constant so the bound checked matches the buffer actually exposed to the device. Reject used.len == 0 in the same gate so an empty completion can no longer reach virtbt_rx_handle(). Use bt_dev_err_ratelimited() because the length value comes from an untrusted backend that can otherwise flood the kernel log. Same class of bug as commit c04db81cd028 ("net/9p: Fix buffer overflow in USB transport layer"), which hardened the USB 9p transport against unchecked device-reported length. Fixes: 160fbcf3bfb9 ("Bluetooth: virtio_bt: Use skb_put to set length") Cc: stable@vger.kernel.org Cc: Soenke Huster Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/virtio_bt.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 76d61af8a275..2c5c39356a1c 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -12,6 +12,7 @@ #include #define VERSION "0.1" +#define VIRTBT_RX_BUF_SIZE 1000 enum { VIRTBT_VQ_TX, @@ -33,11 +34,11 @@ static int virtbt_add_inbuf(struct virtio_bluetooth *vbt) struct sk_buff *skb; int err; - skb = alloc_skb(1000, GFP_KERNEL); + skb = alloc_skb(VIRTBT_RX_BUF_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - sg_init_one(sg, skb->data, 1000); + sg_init_one(sg, skb->data, VIRTBT_RX_BUF_SIZE); err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL); if (err < 0) { @@ -227,8 +228,15 @@ static void virtbt_rx_work(struct work_struct *work) if (!skb) return; - skb_put(skb, len); - virtbt_rx_handle(vbt, skb); + if (!len || len > VIRTBT_RX_BUF_SIZE) { + bt_dev_err_ratelimited(vbt->hdev, + "rx reply len %u outside [1, %u]\n", + len, VIRTBT_RX_BUF_SIZE); + kfree_skb(skb); + } else { + skb_put(skb, len); + virtbt_rx_handle(vbt, skb); + } if (virtbt_add_inbuf(vbt) < 0) return; -- cgit v1.2.3 From daf23014e5d975e72ea9c02b5160d3fcf070ea47 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 21 Apr 2026 13:08:45 -0400 Subject: Bluetooth: virtio_bt: validate rx pkt_type header length virtbt_rx_handle() reads the leading pkt_type byte from the RX skb and forwards the remainder to hci_recv_frame() for every event/ACL/SCO/ISO type, without checking that the remaining payload is at least the fixed HCI header for that type. After the preceding patch bounds the backend-supplied used.len to [1, VIRTBT_RX_BUF_SIZE], a one-byte completion still reaches hci_recv_frame() with skb->len already pulled to 0. If the byte happened to be HCI_ACLDATA_PKT, the ACL-vs-ISO classification fast-path in hci_dev_classify_pkt_type() dereferences hci_acl_hdr(skb)->handle whenever the HCI device has an active CIS_LINK, BIS_LINK, or PA_LINK connection, reading two bytes of uninitialized RX-buffer data. The same hazard exists for every packet type the driver accepts because none of the switch cases in virtbt_rx_handle() check skb->len against the per-type minimum HCI header size before handing the frame to the core. After stripping pkt_type, require skb->len to cover the fixed header size for the selected type (event 2, ACL 4, SCO 3, ISO 4) before calling hci_recv_frame(); drop ratelimited otherwise. Unknown pkt_type values still take the original kfree_skb() default path. Use bt_dev_err_ratelimited() because both the length and pkt_type values come from an untrusted backend that can otherwise flood the kernel log. Fixes: 160fbcf3bfb9 ("Bluetooth: virtio_bt: Use skb_put to set length") Cc: stable@vger.kernel.org Cc: Soenke Huster Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/virtio_bt.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c index 2c5c39356a1c..140ab55c9fc5 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -198,6 +198,7 @@ static int virtbt_shutdown_generic(struct hci_dev *hdev) static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) { + size_t min_hdr; __u8 pkt_type; pkt_type = *((__u8 *) skb->data); @@ -205,16 +206,32 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) switch (pkt_type) { case HCI_EVENT_PKT: + min_hdr = sizeof(struct hci_event_hdr); + break; case HCI_ACLDATA_PKT: + min_hdr = sizeof(struct hci_acl_hdr); + break; case HCI_SCODATA_PKT: + min_hdr = sizeof(struct hci_sco_hdr); + break; case HCI_ISODATA_PKT: - hci_skb_pkt_type(skb) = pkt_type; - hci_recv_frame(vbt->hdev, skb); + min_hdr = sizeof(struct hci_iso_hdr); break; default: kfree_skb(skb); - break; + return; } + + if (skb->len < min_hdr) { + bt_dev_err_ratelimited(vbt->hdev, + "rx pkt_type 0x%02x payload %u < hdr %zu\n", + pkt_type, skb->len, min_hdr); + kfree_skb(skb); + return; + } + + hci_skb_pkt_type(skb) = pkt_type; + hci_recv_frame(vbt->hdev, skb); } static void virtbt_rx_work(struct work_struct *work) -- cgit v1.2.3 From 8f59d17b18a78fdfdbb67d693b3d3eb03db184e0 Mon Sep 17 00:00:00 2001 From: Pengpeng Hou Date: Thu, 23 Apr 2026 23:31:00 +0800 Subject: Bluetooth: RFCOMM: pull credit byte with skb_pull_data() rfcomm_recv_data() treats the first payload byte as a credit field when the UIH frame carries PF and credit-based flow control is enabled. After the header has been stripped, the PF/CFC path consumes that byte with a direct skb->data dereference followed by skb_pull(). A malformed short frame can reach this path without a byte available. Use skb_pull_data() so the length check and pull happen together before the returned credit byte is consumed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pengpeng Hou Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/rfcomm/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 611a9a94151e..d11bd5337d57 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1715,9 +1715,12 @@ static int rfcomm_recv_data(struct rfcomm_session *s, u8 dlci, int pf, struct sk } if (pf && d->cfc) { - u8 credits = *(u8 *) skb->data; skb_pull(skb, 1); + u8 *credits = skb_pull_data(skb, 1); - d->tx_credits += credits; + if (!credits) + goto drop; + + d->tx_credits += *credits; if (d->tx_credits) clear_bit(RFCOMM_TX_THROTTLED, &d->flags); } -- cgit v1.2.3 From 72d97cae2a83cecf6f47208646675ecd066d0a3e Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 29 Apr 2026 15:40:46 +0200 Subject: Bluetooth: hci_event: fix memset typo hci_le_big_sync_established_evt() currently does: conn->num_bis = 0; memset(conn->bis, 0, sizeof(conn->num_bis)); sizeof(conn->num_bis) is wrong - it would make sense to either use conn->num_bis (before setting that to 0) or sizeof(conn->bis). Fix it by using sizeof(conn->bis), the least intrusive change. Luckily, nothing actually depends on this memset() working properly: Nothing seems to ever read from conn->bis beyond conn->num_bis, and when conn->num_bis is increased, the corresponding elements of conn->bis are initialized. So I think this line could also just be removed. This is a purely theoretical fix and should have no impact on actual behavior. Fixes: 42ecf1947135 ("Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending") Signed-off-by: Jann Horn Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1b3b9131affa..eea2f810aafa 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7191,7 +7191,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); conn->num_bis = 0; - memset(conn->bis, 0, sizeof(conn->num_bis)); + memset(conn->bis, 0, sizeof(conn->bis)); for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]); -- cgit v1.2.3 From c5d415596cb6fbdf6334b06cc87a1a5a268d8725 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sat, 2 May 2026 12:43:03 -0400 Subject: Bluetooth: HIDP: serialise l2cap_unregister_user via hidp_session_sem Commit dbf666e4fc9b ("Bluetooth: HIDP: Fix possible UAF") made hidp_session_remove() drop the L2CAP reference and set session->conn = NULL once the session is considered removed, and added a bare if (session->conn) guard around the kthread-exit l2cap_unregister_user() call in hidp_session_thread(). The sibling ioctl site in hidp_connection_del() still reads session->conn unlocked and unguarded, and the kthread-exit guard itself is a lockless double-read. hidp_session_find() drops hidp_session_sem before returning, so hidp_session_remove() can null session->conn between the lookup and the call in hidp_connection_del(). Worse, since commit 752a6c9596dd ("Bluetooth: L2CAP: Fix use-after-free in l2cap_unregister_user") takes mutex_lock(&conn->lock) inside l2cap_unregister_user(), a stale non-NULL snapshot also UAFs on conn->lock. v1 only added an if (session->conn) guard at the ioctl site, which doesn't address either race; Luiz suggested snapshotting session->conn under the sem and clearing it before the call. Taking hidp_session_sem across l2cap_unregister_user() would be wrong: l2cap_conn_del() already establishes the lock order conn->lock -> hidp_session_sem via l2cap_unregister_all_users() -> user->remove == hidp_session_remove(), so taking hidp_session_sem before conn->lock would AB/BA deadlock. Factor a helper hidp_session_unregister_conn() that under down_write(&hidp_session_sem) snapshots session->conn and clears the member, then outside the sem calls l2cap_unregister_user() and l2cap_conn_put() on the snapshot. Call it from both hidp_connection_del() and hidp_session_thread()'s exit path. At most one consumer wins the write-sem; later callers observe session->conn == NULL and skip the unregister and put, so the reference hidp_session_new() took via l2cap_conn_get() is consumed exactly once. session_free() already tolerates a NULL session->conn. Fixes: dbf666e4fc9b ("Bluetooth: HIDP: Fix possible UAF") Suggested-by: Luiz Augusto von Dentz Link: https://lore.kernel.org/all/20260422011437.176643-1-michael.bommarito@gmail.com/ Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hidp/core.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7bcf8c5ceaee..976f91eeb745 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1035,6 +1035,28 @@ static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr) return session; } +/* + * Consume session->conn: clear the member under hidp_session_sem, then + * l2cap_unregister_user() and l2cap_conn_put() the snapshot outside the + * sem. At most one caller wins; later callers see NULL and skip. The + * reference is the one hidp_session_new() took via l2cap_conn_get(). + */ +static void hidp_session_unregister_conn(struct hidp_session *session) +{ + struct l2cap_conn *conn; + + down_write(&hidp_session_sem); + conn = session->conn; + if (conn) + session->conn = NULL; + up_write(&hidp_session_sem); + + if (conn) { + l2cap_unregister_user(conn, &session->user); + l2cap_conn_put(conn); + } +} + /* * Start session synchronously * This starts a session thread and waits until initialization @@ -1311,8 +1333,7 @@ static int hidp_session_thread(void *arg) * Instead, this call has the same semantics as if user-space tried to * delete the session. */ - if (session->conn) - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); @@ -1418,7 +1439,7 @@ int hidp_connection_del(struct hidp_conndel_req *req) HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0); else - l2cap_unregister_user(session->conn, &session->user); + hidp_session_unregister_conn(session); hidp_session_put(session); -- cgit v1.2.3 From 0e1368a28dd5231ae0dbe240dfe0ff2657de5647 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 May 2026 17:22:05 -0700 Subject: selftests: drv-net: fix sort order of makefile and config Recent changes added configs and tests in the wrong spot. Link: https://lore.kernel.org/20260506170435.34984dfc@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/Makefile | 2 +- tools/testing/selftests/drivers/net/hw/config | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 3b6ff4708005..82809d5b2478 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -30,8 +30,8 @@ TEST_PROGS = \ gro_hw.py \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ - ipsec_vxlan.py \ iou-zcrx.py \ + ipsec_vxlan.py \ irq.py \ loopback.sh \ nic_timestamp.py \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index ae0168c2bbe6..8c132ace2b8d 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -3,6 +3,10 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_INET6_ESP=y +CONFIG_INET6_ESP_OFFLOAD=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y @@ -12,10 +16,6 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NETKIT=y CONFIG_NET_SCH_INGRESS=y -CONFIG_INET6_ESP=y -CONFIG_INET6_ESP_OFFLOAD=y -CONFIG_INET_ESP=y -CONFIG_INET_ESP_OFFLOAD=y CONFIG_UDMABUF=y CONFIG_VXLAN=y CONFIG_XFRM_USER=y -- cgit v1.2.3 From 7aaa8f5e45a92678256c1e17f1fa2c2f45c61dd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 May 2026 13:00:56 +0000 Subject: ipv6: fix potential UAF caused by ip6_forward_proxy_check() ip6_forward_proxy_check() calls pskb_may_pull() which might re-allocate skb->head. Reload ipv6_hdr() after the pskb_may_pull() call to avoid using the freed memory. Fixes: e21e0b5f19ac ("[IPV6] NDISC: Handle NDP messages to proxied addresses.") Reported-by: Damiano Melotti Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260505130056.2927197-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1f2a33fbed6e..c14adcdd4396 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -468,6 +468,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) default: break; } + hdr = ipv6_hdr(skb); } /* @@ -582,6 +583,8 @@ int ip6_forward(struct sk_buff *skb) if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) { int proxied = ip6_forward_proxy_check(skb); + + hdr = ipv6_hdr(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the -- cgit v1.2.3 From 7ce3f1bedaac88880594720ba0f687da3bd7fc8a Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Tue, 5 May 2026 03:42:23 -0700 Subject: netdevsim: psp: only call nsim_psp_uninit() on PFs VFs go through nsim_init_netdevsim_vf() which never calls nsim_psp_init(), so ns->psp.dev stays NULL. nsim_psp_uninit() guards with !IS_ERR(ns->psp.dev), so destroying a VF reaches psp_dev_unregister(NULL) and dereferences NULL on the first mutex_lock(&psd->lock): BUG: kernel NULL pointer dereference, address: 0000000000000020 RIP: 0010:mutex_lock+0x1c/0x30 Call Trace: psp_dev_unregister+0x2a/0x1a0 nsim_psp_uninit+0x1f/0x40 [netdevsim] nsim_destroy+0x61/0x1e0 [netdevsim] __nsim_dev_port_del+0x47/0x90 [netdevsim] nsim_drv_configure_vfs+0xc9/0x130 [netdevsim] nsim_bus_dev_numvfs_store+0x79/0xb0 [netdevsim] Gate nsim_psp_uninit() on nsim_dev_port_is_pf(), matching the pattern already used for nsim_exit_netdevsim() and the bpf/ipsec/macsec/queue teardowns. Reproducer: modprobe netdevsim echo "10 1" > /sys/bus/netdevsim/new_device echo 1 > /sys/bus/netdevsim/devices/netdevsim10/sriov_numvfs devlink dev eswitch set netdevsim/netdevsim10 mode switchdev echo 0 > /sys/bus/netdevsim/devices/netdevsim10/sriov_numvfs Fixes: f857478d6206 ("netdevsim: a basic test PSP implementation") Signed-off-by: Daniel Zahka Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260505-psd-rcu-v1-1-a8f69ec1ab96@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a05af192caf3..a750768912b5 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -1182,7 +1182,8 @@ void nsim_destroy(struct netdevsim *ns) unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, &ns->nn); - nsim_psp_uninit(ns); + if (nsim_dev_port_is_pf(ns->nsim_dev_port)) + nsim_psp_uninit(ns); rtnl_lock(); peer = rtnl_dereference(ns->peer); -- cgit v1.2.3 From 24c96a42006ee27a078ec8c631c906dea8a3ca6d Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Tue, 5 May 2026 03:42:24 -0700 Subject: netdevsim: psp: serialize calls to nsim_psp_uninit() The debugfs write handler, nsim_psp_rereg_write(), can race against nsim_destroy() and against itself, causing nsim_psp_uninit() to run more than once concurrently. Two complementary changes serialize all callers: 1. Delete the psp_rereg debugfs file from nsim_psp_uninit() before doing the actual teardown. debugfs_remove() drains any in-flight writers and prevents new ones from starting. 2. Add a mutex around the body of nsim_psp_rereg_write() so that two concurrent userspace writers cannot both enter the teardown path at once. The teardown work itself is moved into a new __nsim_psp_uninit() that the rereg handler calls under the mutex, while the public nsim_psp_uninit() wraps it with the debugfs_remove()/mutex_destroy() pair so nsim_destroy() doesn't have to know about the psp internals. Fixes: f857478d6206 ("netdevsim: a basic test PSP implementation") Signed-off-by: Daniel Zahka Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260505-psd-rcu-v1-2-a8f69ec1ab96@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdevsim.h | 2 ++ drivers/net/netdevsim/psp.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 7e129dddbbe7..e373ffc26b0c 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -121,6 +121,8 @@ struct netdevsim { u64_stats_t tx_bytes; struct u64_stats_sync syncp; struct psp_dev *dev; + struct dentry *rereg; + struct mutex rereg_lock; u32 spi; u32 assoc_cnt; } psp; diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c index 0b4d717253b0..86d84b7e566b 100644 --- a/drivers/net/netdevsim/psp.c +++ b/drivers/net/netdevsim/psp.c @@ -209,13 +209,20 @@ static struct psp_dev_caps nsim_psp_caps = { .assoc_drv_spc = sizeof(void *), }; -void nsim_psp_uninit(struct netdevsim *ns) +static void __nsim_psp_uninit(struct netdevsim *ns) { if (!IS_ERR(ns->psp.dev)) psp_dev_unregister(ns->psp.dev); WARN_ON(ns->psp.assoc_cnt); } +void nsim_psp_uninit(struct netdevsim *ns) +{ + debugfs_remove(ns->psp.rereg); + mutex_destroy(&ns->psp.rereg_lock); + __nsim_psp_uninit(ns); +} + static ssize_t nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) @@ -223,11 +230,13 @@ nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, struct netdevsim *ns = file->private_data; int err; - nsim_psp_uninit(ns); + mutex_lock(&ns->psp.rereg_lock); + __nsim_psp_uninit(ns); ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); err = PTR_ERR_OR_ZERO(ns->psp.dev); + mutex_unlock(&ns->psp.rereg_lock); return err ?: count; } @@ -249,6 +258,8 @@ int nsim_psp_init(struct netdevsim *ns) if (err) return err; - debugfs_create_file("psp_rereg", 0200, ddir, ns, &nsim_psp_rereg_fops); + mutex_init(&ns->psp.rereg_lock); + ns->psp.rereg = debugfs_create_file("psp_rereg", 0200, ddir, ns, + &nsim_psp_rereg_fops); return 0; } -- cgit v1.2.3 From 07bdec3fc737aac7f4c273aafa803d353174c43e Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Tue, 5 May 2026 03:42:25 -0700 Subject: netdevsim: psp: rcu protect psp_dev reference There are two issues with the way psp_dev is used in nsim_do_psp(): 1. There is no check for IS_ERR() on the peers psp_dev, before dereferencing. 2. The refcount on this psp_dev can be dropped by nsim_psp_rereg_write() To fix this, we can make netdevsim's reference to its psp_dev an rcu reference, and then nsim_do_psp() can read the fields it needs from an rcu critical section. Fixes: f857478d6206 ("netdevsim: a basic test PSP implementation") Signed-off-by: Daniel Zahka Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260505-psd-rcu-v1-3-a8f69ec1ab96@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdevsim.h | 2 +- drivers/net/netdevsim/psp.c | 54 +++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index e373ffc26b0c..d909c4160ea1 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -120,7 +120,7 @@ struct netdevsim { u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; - struct psp_dev *dev; + struct psp_dev __rcu *dev; struct dentry *rereg; struct mutex rereg_lock; u32 spi; diff --git a/drivers/net/netdevsim/psp.c b/drivers/net/netdevsim/psp.c index 86d84b7e566b..6936ecb8173e 100644 --- a/drivers/net/netdevsim/psp.c +++ b/drivers/net/netdevsim/psp.c @@ -19,6 +19,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, struct netdevsim *peer_ns, struct skb_ext **psp_ext) { enum skb_drop_reason rc = 0; + struct psp_dev *peer_psd; struct psp_assoc *pas; struct net *net; void **ptr; @@ -48,7 +49,8 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, } /* Now pretend we just received this frame */ - if (peer_ns->psp.dev->config.versions & (1 << pas->version)) { + peer_psd = rcu_dereference(peer_ns->psp.dev); + if (peer_psd && peer_psd->config.versions & (1 << pas->version)) { bool strip_icv = false; u8 generation; @@ -61,8 +63,7 @@ nsim_do_psp(struct sk_buff *skb, struct netdevsim *ns, skb_ext_reset(skb); skb->mac_len = ETH_HLEN; - if (psp_dev_rcv(skb, peer_ns->psp.dev->id, generation, - strip_icv)) { + if (psp_dev_rcv(skb, peer_psd->id, generation, strip_icv)) { rc = SKB_DROP_REASON_PSP_OUTPUT; goto out_unlock; } @@ -209,10 +210,18 @@ static struct psp_dev_caps nsim_psp_caps = { .assoc_drv_spc = sizeof(void *), }; -static void __nsim_psp_uninit(struct netdevsim *ns) +static void __nsim_psp_uninit(struct netdevsim *ns, bool teardown) { - if (!IS_ERR(ns->psp.dev)) - psp_dev_unregister(ns->psp.dev); + struct psp_dev *psd; + + psd = rcu_dereference_protected(ns->psp.dev, + teardown || + lockdep_is_held(&ns->psp.rereg_lock)); + if (psd) { + rcu_assign_pointer(ns->psp.dev, NULL); + synchronize_rcu(); + psp_dev_unregister(psd); + } WARN_ON(ns->psp.assoc_cnt); } @@ -220,7 +229,7 @@ void nsim_psp_uninit(struct netdevsim *ns) { debugfs_remove(ns->psp.rereg); mutex_destroy(&ns->psp.rereg_lock); - __nsim_psp_uninit(ns); + __nsim_psp_uninit(ns, true); } static ssize_t @@ -228,16 +237,23 @@ nsim_psp_rereg_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct netdevsim *ns = file->private_data; - int err; + struct psp_dev *psd; + ssize_t ret; mutex_lock(&ns->psp.rereg_lock); - __nsim_psp_uninit(ns); + __nsim_psp_uninit(ns, false); + + psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); + if (IS_ERR(psd)) { + ret = PTR_ERR(psd); + goto out; + } - ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, - &nsim_psp_caps, ns); - err = PTR_ERR_OR_ZERO(ns->psp.dev); + rcu_assign_pointer(ns->psp.dev, psd); + ret = count; +out: mutex_unlock(&ns->psp.rereg_lock); - return err ?: count; + return ret; } static const struct file_operations nsim_psp_rereg_fops = { @@ -250,13 +266,13 @@ static const struct file_operations nsim_psp_rereg_fops = { int nsim_psp_init(struct netdevsim *ns) { struct dentry *ddir = ns->nsim_dev_port->ddir; - int err; + struct psp_dev *psd; + + psd = psp_dev_create(ns->netdev, &nsim_psp_ops, &nsim_psp_caps, ns); + if (IS_ERR(psd)) + return PTR_ERR(psd); - ns->psp.dev = psp_dev_create(ns->netdev, &nsim_psp_ops, - &nsim_psp_caps, ns); - err = PTR_ERR_OR_ZERO(ns->psp.dev); - if (err) - return err; + rcu_assign_pointer(ns->psp.dev, psd); mutex_init(&ns->psp.rereg_lock); ns->psp.rereg = debugfs_create_file("psp_rereg", 0200, ddir, ns, -- cgit v1.2.3 From 701ea57feaabdea403cf299ee5cd0445083bc0ac Mon Sep 17 00:00:00 2001 From: Shitalkumar Gandhi Date: Tue, 5 May 2026 18:02:36 +0530 Subject: net: rtsn: fix mdio_node leak in rtsn_mdio_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of_get_child_by_name() takes a reference. The rtsn_reset() and rtsn_change_mode() failure paths jump to out_free_bus and leak mdio_node. Add out_put_node to drop it before falling through. Fixes: b0d3969d2b4d ("net: ethernet: rtsn: Add support for Renesas Ethernet-TSN") Signed-off-by: Shitalkumar Gandhi Reviewed-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20260505123236.406000-1-shitalkumar.gandhi@cambiumnetworks.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rtsn.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 03a2669f0518..ee8381b60b8d 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -797,11 +797,11 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv) /* Enter config mode before registering the MDIO bus */ ret = rtsn_reset(priv); if (ret) - goto out_free_bus; + goto out_put_node; ret = rtsn_change_mode(priv, OCR_OPC_CONFIG); if (ret) - goto out_free_bus; + goto out_put_node; rtsn_modify(priv, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK, MPIC_PSMCS_DEFAULT | MPIC_PSMHT_DEFAULT); @@ -824,6 +824,8 @@ static int rtsn_mdio_alloc(struct rtsn_private *priv) return 0; +out_put_node: + of_node_put(mdio_node); out_free_bus: mdiobus_free(mii); return ret; -- cgit v1.2.3 From 67ef49047d312be692c8c439145f4514174e517f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 May 2026 13:32:33 +0000 Subject: inetpeer: add a missing read_seqretry() in inet_getpeer() When performing a lockless lookup over the inet_peer rbtree, if a matching node is found, inet_getpeer() returns it immediately without validating the seqlock sequence. This missing check introduces a race condition: Trigger Path: When a host receives an incoming fragmented IPv4 packet, ip4_frag_init() (in net/ipv4/ip_fragment.c) calls inet_getpeer_v4() to track the peer. The Race: If the packet is from a new source IP, CPU A acquires the write_seqlock, allocates a new inet_peer node (p), sets its IP address (daddr), and links it to the rbtree (rb_link_node). Uninitialized Access: Due to the lack of memory barriers between rb_link_node and the initialization of the rest of the struct (like refcount_set(&p->refcnt, 1)), CPU A can make the node visible to readers before its refcnt is initialized. This is especially true on weakly-ordered architectures like ARM64 where the CPU can reorder the memory stores. Lockless Reader: Concurrently, CPU B processes a second fragmented packet from the same source IP. CPU B does a lockless lookup, finds the newly inserted node, and returns it immediately. Use-After-Free (UAF): CPU B reads p->refcnt as uninitialized garbage (left over from previous kmalloc-128/192 allocations). If the garbage is > 0, refcount_inc_not_zero(&p->refcnt) succeeds. CPU A then executes refcount_set(&p->refcnt, 1), overwriting CPU B's increment. When CPU B finishes with the fragment queue, it calls inet_putpeer(), which drops the refcount to 0 and frees the node via RCU. The node is now freed but remains linked in the rbtree, resulting in a Use-After-Free in the rbtree. Fixes: b145425f269a ("inetpeer: remove AVL implementation in favor of RB tree") Reported-by: Damiano Melotti Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260505133233.3039575-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inetpeer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d8083b9033c2..5b957a831e7c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -179,7 +179,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - if (p) + /* Make sure tree was not modified during our lookup. */ + if (p && !read_seqretry(&base->lock, seq)) return p; /* retry an exact lookup, taking the lock before. -- cgit v1.2.3 From 770b136ff9bf3e319d19875da59c4f7f4853da3a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 May 2026 09:11:33 +0000 Subject: net/sched: sch_sfq: annotate data-races from sfq_dump_class_stats() sfq_dump_class_stats() runs locklessly, add needed READ_ONCE() and WRITE_ONCE() annotations. Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260505091133.2452510-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfq.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c3f3181dba54..f39822babf88 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -225,7 +225,8 @@ static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = q->slots[x].qlen--; + d = q->slots[x].qlen; + WRITE_ONCE(q->slots[x].qlen, d - 1); if (n == p && q->cur_depth == d) q->cur_depth--; sfq_link(q, x); @@ -238,7 +239,8 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_unlink(q, x, n, p); - d = ++q->slots[x].qlen; + d = q->slots[x].qlen + 1; + WRITE_ONCE(q->slots[x].qlen, d); if (q->cur_depth < d) q->cur_depth = d; sfq_link(q, x); @@ -298,7 +300,7 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - slot->backlog -= len; + WRITE_ONCE(slot->backlog, slot->backlog - len); sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); @@ -314,7 +316,7 @@ drop: q->tail = NULL; /* no more active slots */ else q->tail->next = slot->next; - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); goto drop; } @@ -364,10 +366,10 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS); - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; - slot->backlog = 0; /* should already be 0 anyway... */ + WRITE_ONCE(slot->backlog, 0); /* should already be 0 anyway... */ red_set_vars(&slot->vars); goto enqueue; } @@ -426,7 +428,7 @@ congestion_drop: head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; - slot->backlog -= delta; + WRITE_ONCE(slot->backlog, slot->backlog - delta); qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT); slot_queue_add(slot, skb); @@ -436,7 +438,7 @@ congestion_drop: enqueue: qdisc_qstats_backlog_inc(sch, skb); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ @@ -452,7 +454,7 @@ enqueue: */ q->tail = slot; /* We could use a bigger initial quantum for new flows */ - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; @@ -489,7 +491,7 @@ next_slot: slot = &q->slots[a]; if (slot->allot <= 0) { q->tail = slot; - slot->allot += q->quantum; + WRITE_ONCE(slot->allot, slot->allot + q->quantum); goto next_slot; } skb = slot_dequeue_head(slot); @@ -497,10 +499,10 @@ next_slot: qdisc_bstats_update(sch, skb); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); - slot->backlog -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog - qdisc_pkt_len(skb)); /* Is the slot empty? */ if (slot->qlen == 0) { - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); next_a = slot->next; if (a == next_a) { q->tail = NULL; /* no more active slots */ @@ -508,7 +510,7 @@ next_slot: } q->tail->next = next_a; } else { - slot->allot -= qdisc_pkt_len(skb); + WRITE_ONCE(slot->allot, slot->allot - qdisc_pkt_len(skb)); } return skb; } @@ -549,9 +551,9 @@ static void sfq_rehash(struct Qdisc *sch) sfq_dec(q, i); __skb_queue_tail(&list, skb); } - slot->backlog = 0; + WRITE_ONCE(slot->backlog, 0); red_set_vars(&slot->vars); - q->ht[slot->hash] = SFQ_EMPTY_SLOT; + WRITE_ONCE(q->ht[slot->hash], SFQ_EMPTY_SLOT); } q->tail = NULL; @@ -570,7 +572,7 @@ drop: dropped++; continue; } - q->ht[hash] = x; + WRITE_ONCE(q->ht[hash], x); slot = &q->slots[x]; slot->hash = hash; } @@ -581,7 +583,7 @@ drop: slot->vars.qavg = red_calc_qavg(q->red_parms, &slot->vars, slot->backlog); - slot->backlog += qdisc_pkt_len(skb); + WRITE_ONCE(slot->backlog, slot->backlog + qdisc_pkt_len(skb)); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ @@ -591,7 +593,7 @@ drop: q->tail->next = x; } q->tail = slot; - slot->allot = q->quantum; + WRITE_ONCE(slot->allot, q->quantum); } } sch->q.qlen -= dropped; @@ -905,16 +907,16 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl - 1]; + sfq_index idx = READ_ONCE(q->ht[cl - 1]); struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot; - qs.qlen = slot->qlen; - qs.backlog = slot->backlog; + xstats.allot = READ_ONCE(slot->allot); + qs.qlen = READ_ONCE(slot->qlen); + qs.backlog = READ_ONCE(slot->backlog); } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; @@ -930,7 +932,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->divisor; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT) { + if (READ_ONCE(q->ht[i]) == SFQ_EMPTY_SLOT) { arg->count++; continue; } -- cgit v1.2.3 From c8f7244c8cccaaed4e6c9fe4b8a07e101d0423e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 May 2026 15:39:27 +0000 Subject: tcp: tcp_child_process() related UAF tcp_child_process( .. child ...) currently calls sock_put(child). Unfortunately @child (named @nsk in callers) can be used after this point to send a RST packet. To fix this UAF, I remove the sock_put() from tcp_child_process() and let the callers handle this after it is safe. Remove @rsk variable in tcp_v4_do_rcv() and change tcp_v6_do_rcv() so that both functions look the same. Fixes: cfb6eeb4c860 ("[TCP]: MD5 Signature Option (RFC2385) support.") Reported-by: Damiano Melotti Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260505153927.3435532-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 14 ++++++-------- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 13 ++++++++----- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8fc24c3743c5..c0526cc03980 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1827,7 +1827,6 @@ INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; - struct sock *rsk; reason = psp_sk_rx_policy_check(sk, skb); if (reason) @@ -1863,24 +1862,21 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; if (nsk != sk) { reason = tcp_child_process(sk, nsk, skb); - if (reason) { - rsk = nsk; + sock_put(nsk); + if (reason) goto reset; - } return 0; } } else sock_rps_save_rxhash(sk, skb); reason = tcp_rcv_state_process(sk, skb); - if (reason) { - rsk = sk; + if (reason) goto reset; - } return 0; reset: - tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); + tcp_v4_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: sk_skb_reason_drop(sk, skb, reason); /* Be careful here. If this function gets more complicated and @@ -2193,8 +2189,10 @@ lookup: rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v4_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 199f0b579e89..e6092c3ac840 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -1012,6 +1012,6 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, } bh_unlock_sock(child); - sock_put(child); + return reason; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2c3f7a739709..51583aef0643 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1617,12 +1617,13 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); + if (!nsk) + return 0; if (nsk != sk) { - if (nsk) { - reason = tcp_child_process(sk, nsk, skb); - if (reason) - goto reset; - } + reason = tcp_child_process(sk, nsk, skb); + sock_put(nsk); + if (reason) + goto reset; return 0; } } else @@ -1827,8 +1828,10 @@ lookup: rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v6_send_reset(nsk, skb, rst_reason); + sock_put(nsk); goto discard_and_relse; } + sock_put(nsk); sock_put(sk); return 0; } -- cgit v1.2.3 From b12014d2d36eaed4e4bec5f1ac7e91110eeb100d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:49 +0200 Subject: mptcp: pm: kernel: correctly retransmit ADD_ADDR ID 0 When adding the ADD_ADDR to the list, the address including the IP, port and ID are copied. On the other hand, when the endpoint corresponds to the one from the initial subflow, the ID is set to 0, as specified by the MPTCP protocol. The issue is that the ID was reset after having copied the ID in the ADD_ADDR entry. So the retransmission was done, but using a different ID than the initial one. Fixes: 8b8ed1b429f8 ("mptcp: pm: reuse ID 0 after delete and re-add") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-1-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index c9f1e5af3cd3..fc818b63752e 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -347,6 +347,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < endp_signal_max) { + u8 endp_id; + /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -360,19 +362,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (!select_signal_address(pernet, msk, &local)) goto subflow; + /* Special case for ID0: set the correct ID */ + endp_id = local.addr.id; + if (endp_id == msk->mpc_endpoint_id) + local.addr.id = 0; + /* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows. */ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return; - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + __clear_bit(endp_id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; - /* Special case for ID0: set the correct ID */ - if (local.addr.id == msk->mpc_endpoint_id) - local.addr.id = 0; - mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_addr_send_ack(msk); -- cgit v1.2.3 From 03f324f3f1f7619a47b9c91282cb12775ab0a2f1 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:50 +0200 Subject: mptcp: pm: ADD_ADDR rtx: allow ID 0 ADD_ADDR can be sent for the ID 0, which corresponds to the local address and port linked to the initial subflow. Indeed, this address could be removed, and re-added later on, e.g. what is done in the "delete re-add signal" MPTCP Join selftests. So no reason to ignore it. Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-2-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 57a456690406..5056eb8db24e 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -337,9 +337,6 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (inet_sk_state_load(sk) == TCP_CLOSE) return; - if (!entry->addr.id) - return; - if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; -- cgit v1.2.3 From 5cd6e0ad79d2615264f63929f8b457ad97ae550d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:51 +0200 Subject: mptcp: pm: ADD_ADDR rtx: fix potential data-race This mptcp_pm_add_timer() helper is executed as a timer callback in softirq context. To avoid any data races, the socket lock needs to be held with bh_lock_sock(). If the socket is in use, retry again soon after, similar to what is done with the keepalive timer. Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-3-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 5056eb8db24e..3912128d9b86 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -337,6 +337,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (inet_sk_state_load(sk) == TCP_CLOSE) return; + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + sk_reset_timer(sk, timer, jiffies + HZ / 20); + goto out; + } + if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; @@ -365,6 +372,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: + bh_unlock_sock(sk); __sock_put(sk); } -- cgit v1.2.3 From 9634cb35af17019baec21ca648516ce376fa10e6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:52 +0200 Subject: mptcp: pm: ADD_ADDR rtx: always decrease sk refcount When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(). It should then be released in all cases at the end. Some (unlikely) checks were returning directly instead of calling sock_put() to decrease the refcount. Jump to a new 'exit' label to call __sock_put() (which will become sock_put() in the next commit) to fix this potential leak. While at it, drop the '!msk' check which cannot happen because it is never reset, and explicitly mark the remaining one as "unlikely". Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-4-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3912128d9b86..2a01bf1b5bfd 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -331,11 +331,8 @@ static void mptcp_pm_add_timer(struct timer_list *timer) pr_debug("msk=%p\n", msk); - if (!msk) - return; - - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) + goto exit; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -373,6 +370,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) out: bh_unlock_sock(sk); +exit: __sock_put(sk); } -- cgit v1.2.3 From b7b9a461569734d33d3259d58d2507adfac107ed Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:53 +0200 Subject: mptcp: pm: ADD_ADDR rtx: free sk if last When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(), and released at the end. If at that moment, it was the last reference being held, the sk would not be freed. sock_put() should then be called instead of __sock_put(). But that's not enough: if it is the last reference, sock_put() will call sk_free(), which will end up calling sk_stop_timer_sync() on the same timer, and waiting indefinitely to finish. So it is needed to mark that the timer is done at the end of the timer handler when it has not been rescheduled, not to call sk_stop_timer_sync() on "itself". Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-5-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 2a01bf1b5bfd..8899327e59a1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; u8 retrans_times; + bool timer_done; struct timer_list add_timer; struct mptcp_sock *sock; struct rcu_head rcu; @@ -327,22 +328,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer) add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - unsigned int timeout; + unsigned int timeout = 0; pr_debug("msk=%p\n", msk); + bh_lock_sock(sk); if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) - goto exit; + goto out; - bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - sk_reset_timer(sk, timer, jiffies + HZ / 20); + timeout = HZ / 20; goto out; } if (mptcp_pm_should_add_signal_addr(msk)) { - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); + timeout = TCP_RTO_MAX / 8; goto out; } @@ -360,8 +361,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) } if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) - sk_reset_timer(sk, timer, - jiffies + (timeout << entry->retrans_times)); + timeout <<= entry->retrans_times; + else + timeout = 0; spin_unlock_bh(&msk->pm.lock); @@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) mptcp_pm_subflow_established(msk); out: + if (timeout) + sk_reset_timer(sk, timer, jiffies + timeout); + else + /* if sock_put calls sk_free: avoid waiting for this timer */ + entry->timer_done = true; bh_unlock_sock(sk); -exit: - __sock_put(sk); + sock_put(sk); } struct mptcp_pm_add_entry * @@ -434,6 +440,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); reset_timer: + add_entry->timer_done = false; timeout = mptcp_adjust_add_addr_timeout(msk); if (timeout) sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout); @@ -454,7 +461,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); list_for_each_entry_safe(entry, tmp, &free_list, list) { - sk_stop_timer_sync(sk, &entry->add_timer); + if (!entry->timer_done) + sk_stop_timer_sync(sk, &entry->add_timer); kfree_rcu(entry, rcu); } } -- cgit v1.2.3 From 3cf12492891c4b5ff54dda404a2de4ec54c9e1b5 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:54 +0200 Subject: mptcp: pm: ADD_ADDR rtx: resched blocked ADD_ADDR quicker When an ADD_ADDR needs to be retransmitted and another one has already been prepared -- e.g. multiple ADD_ADDRs have been sent in a row and need to be retransmitted later -- this additional retransmission will need to wait. In this case, the timer was reset to TCP_RTO_MAX / 8, which is ~15 seconds. This delay is unnecessary long: it should just be rescheduled at the next opportunity, e.g. after the retransmission timeout. Without this modification, some issues can be seen from time to time in the selftests when multiple ADD_ADDRs are sent, and the host takes time to process them, e.g. the "signal addresses, ADD_ADDR timeout" MPTCP Join selftest, especially with a debug kernel config. Note that on older kernels, 'timeout' is not available. It should be enough to replace it by one second (HZ). Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-6-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8899327e59a1..29d1bb6a69cf 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -342,13 +342,8 @@ static void mptcp_pm_add_timer(struct timer_list *timer) goto out; } - if (mptcp_pm_should_add_signal_addr(msk)) { - timeout = TCP_RTO_MAX / 8; - goto out; - } - timeout = mptcp_adjust_add_addr_timeout(msk); - if (!timeout) + if (!timeout || mptcp_pm_should_add_signal_addr(msk)) goto out; spin_lock_bh(&msk->pm.lock); -- cgit v1.2.3 From c6d395e2de1306b5fef0344a3c3835fbbfaa18be Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:55 +0200 Subject: mptcp: pm: ADD_ADDR rtx: skip inactive subflows When looking at the maximum RTO amongst the subflows, inactive subflows were taken into account: that includes stale ones, and the initial one if it has been already been closed. Unusable subflows are now simply skipped. Stale ones are used as an alternative: if there are only stale ones, to take their maximum RTO and avoid to eventually fallback to net.mptcp.add_addr_timeout, which is set to 2 minutes by default. Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-7-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 29d1bb6a69cf..8a5dba7fe66e 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -306,18 +306,28 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) const struct net *net = sock_net((struct sock *)msk); unsigned int rto = mptcp_get_add_addr_timeout(net); struct mptcp_subflow_context *subflow; - unsigned int max = 0; + unsigned int max = 0, max_stale = 0; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); - if (icsk->icsk_rto > max) + if (!__mptcp_subflow_active(subflow)) + continue; + + if (unlikely(subflow->stale)) { + if (icsk->icsk_rto > max_stale) + max_stale = icsk->icsk_rto; + } else if (icsk->icsk_rto > max) { max = icsk->icsk_rto; + } } - if (max && max < rto) - rto = max; + if (max) + return min(max, rto); + + if (max_stale) + return min(max_stale, rto); return rto; } -- cgit v1.2.3 From 62a9b19dce77e72426f049fb99b9d1d032b9a8ea Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:56 +0200 Subject: mptcp: pm: ADD_ADDR rtx: return early if no retrans No need to iterate over all subflows if there is no retransmission needed. Exit early in this case then. Fixes: 30549eebc4d8 ("mptcp: make ADD_ADDR retransmission timeout adaptive") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-8-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8a5dba7fe66e..4a6e5ab30d80 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -308,6 +308,9 @@ static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk) struct mptcp_subflow_context *subflow; unsigned int max = 0, max_stale = 0; + if (!rto) + return 0; + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct inet_connection_sock *icsk = inet_csk(ssk); -- cgit v1.2.3 From 166b78344031bf7ac9f55cb5282776cfd85f220e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:57 +0200 Subject: mptcp: pm: prio: skip closed subflows When sending an MP_PRIO, closed subflows need to be skipped. This fixes the case where the initial subflow got closed, re-opened later, then an MP_PRIO is needed for the same local address. Note that explicit MP_PRIO cannot be sent during the 3WHS, so it is fine to use __mptcp_subflow_active(). Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support") Cc: stable@vger.kernel.org Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-9-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 4a6e5ab30d80..3c152bf66cd5 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -284,6 +284,9 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; + if (!__mptcp_subflow_active(subflow)) + continue; + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; -- cgit v1.2.3 From 65db7b27b90e2ea8d4966935aa9a50b6a60c31ac Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:58 +0200 Subject: selftests: mptcp: check output: catch cmd errors Using '${?}' inside the if-statement to check the returned value from the command that was evaluated as part of the if-statement is not correct: here, '${?}' will be linked to the previous instruction, not the one that is expected here (${cmd}). Instead, simply mark the error, except if an error is expected. If that's the case, 1 can be passed as the 4th argument of this helper. Three checks from pm_netlink.sh expect an error. While at it, improve the error message when the command unexpectedly fails or succeeds. Note that we could expect a specific returned value, but the checks currently expecting an error can be used with 'ip mptcp' or 'pm_nl_ctl', and these two tools don't return the same error code. Fixes: 2d0c1d27ea4e ("selftests: mptcp: add mptcp_lib_check_output helper") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-10-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 16 ++++++++++------ tools/testing/selftests/net/mptcp/pm_netlink.sh | 10 ++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 5fea7e7df628..989a5975dcea 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -474,20 +474,24 @@ mptcp_lib_wait_local_port_listen() { wait_local_port_listen "${@}" "tcp" } +# $1: error file, $2: cmd, $3: expected msg, [$4: expected error] mptcp_lib_check_output() { local err="${1}" local cmd="${2}" local expected="${3}" + local exp_error="${4:-0}" local cmd_ret=0 local out - if ! out=$(${cmd} 2>"${err}"); then - cmd_ret=${?} - fi + out=$(${cmd} 2>"${err}") || cmd_ret=1 - if [ ${cmd_ret} -ne 0 ]; then - mptcp_lib_pr_fail "command execution '${cmd}' stderr" - cat "${err}" + if [ "${cmd_ret}" != "${exp_error}" ]; then + mptcp_lib_pr_fail "unexpected returned code for '${cmd}', info:" + if [ "${exp_error}" = 0 ]; then + cat "${err}" + else + echo "${out}" + fi return 2 elif [ "${out}" = "${expected}" ]; then return 0 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 123d9d7a0278..b69f30fcb91e 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -122,10 +122,12 @@ check() local cmd="$1" local expected="$2" local msg="$3" + local exp_error="$4" local rc=0 mptcp_lib_print_title "$msg" - mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" "${exp_error}" || + rc=${?} if [ ${rc} -eq 2 ]; then mptcp_lib_result_fail "${msg} # error ${rc}" ret=${KSFT_FAIL} @@ -158,13 +160,13 @@ check "show_endpoints" \ "3,10.0.1.3,signal backup")" "dump addrs" del_endpoint 2 -check "get_endpoint 2" "" "simple del addr" +check "get_endpoint 2" "" "simple del addr" 1 check "show_endpoints" \ "$(format_endpoints "1,10.0.1.1" \ "3,10.0.1.3,signal backup")" "dump addrs after del" add_endpoint 10.0.1.3 2>/dev/null -check "get_endpoint 4" "" "duplicate addr" +check "get_endpoint 4" "" "duplicate addr" 1 add_endpoint 10.0.1.4 flags signal check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" @@ -173,7 +175,7 @@ for i in $(seq 5 9); do add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" -check "get_endpoint 10" "" "above hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" 1 del_endpoint 9 for i in $(seq 10 255); do -- cgit v1.2.3 From 53705ddfa18408f8e1f064331b6387509fa19f7f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 5 May 2026 17:00:59 +0200 Subject: selftests: mptcp: pm: restrict 'unknown' check to pm_nl_ctl When pm_netlink.sh is executed with '-i', 'ip mptcp' is used instead of 'pm_nl_ctl'. IPRoute2 doesn't support the 'unknown' flag, which has only been added to 'pm_nl_ctl' for this specific check: to ensure that the kernel ignores such unsupported flag. No reason to add this flag to 'ip mptcp'. Then, this check should be skipped when 'ip mptcp' is used. Fixes: 0cef6fcac24d ("selftests: mptcp: ip_mptcp option for more scripts") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260505-net-mptcp-pm-fixes-7-1-rc3-v1-11-fca8091060a4@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index b69f30fcb91e..04594dfc22b1 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -194,9 +194,13 @@ check "show_endpoints" \ flush_endpoint check "show_endpoints" "" "flush addrs" -add_endpoint 10.0.1.1 flags unknown -check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" -flush_endpoint +# "unknown" flag is only supported by pm_nl_ctl +if ! mptcp_lib_is_ip_mptcp; then + add_endpoint 10.0.1.1 flags unknown + check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" \ + "ignore unknown flags" + flush_endpoint +fi set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" -- cgit v1.2.3 From b266bacba796ff5c4dcd2ae2fc08aacf7ab39153 Mon Sep 17 00:00:00 2001 From: Andreas Haarmann-Thiemann Date: Tue, 5 May 2026 23:52:17 +0200 Subject: net: ethernet: cortina: Drop half-assembled SKB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In gmac_rx() (drivers/net/ethernet/cortina/gemini.c), when gmac_get_queue_page() returns NULL for the second page of a multi-page fragment, the driver logs an error and continues — but does not free the partially assembled skb that was being assembled via napi_build_skb() / napi_get_frags(). Free the in-progress partially assembled skb via napi_free_frags() and increase the number of dropped frames appropriately and assign the skb pointer NULL to make sure it is not lingering around, matching the pattern already used elsewhere in the driver. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Signed-off-by: Andreas Haarmann-Thiemann Signed-off-by: Linus Walleij Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20260505-gemini-ethernet-fix-v2-1-997c31d06079@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cortina/gemini.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 4824232f4890..065cbbf52686 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1491,6 +1491,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE); if (!gpage) { dev_err(geth->dev, "could not find mapping\n"); + if (skb) { + napi_free_frags(&port->napi); + port->stats.rx_dropped++; + skb = NULL; + } continue; } page = gpage->page; -- cgit v1.2.3 From 7e2a4f7ca0952820731ef7bdadfc9a9e9d3571b4 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Mon, 4 May 2026 22:27:36 +0800 Subject: xfrm: route MIGRATE notifications to caller's netns xfrm_send_migrate() in net/xfrm/xfrm_user.c and pfkey_send_migrate() in net/key/af_key.c both hardcode &init_net for the multicast that announces a successful XFRM_MSG_MIGRATE / SADB_X_MIGRATE. XFRM_MSG_MIGRATE arrives on a per-netns NETLINK_XFRM socket, and the rest of the xfrm/af_key netlink path was made netns-aware in 2008. The other 14 multicast paths in xfrm_user.c route their event using xs_net(x), xp_net(xp) or sock_net(skb->sk); only the migrate path was missed. Two consequences of the init_net hardcoding: 1. The notification (selector, old/new endpoint addresses, and the km_address) is delivered to listeners on init_net's XFRMNLGRP_MIGRATE / pfkey BROADCAST_ALL groups rather than on the issuing netns. An IKE daemon running in init_net therefore receives migration notifications originating from any other netns on the host. 2. An IKE daemon running inside a non-init netns and subscribed to its own XFRMNLGRP_MIGRATE / pfkey groups never receives the notification of its own migration. IKEv2 MOBIKE / address-update handling inside a netns is silently broken. Thread struct net through km_migrate() and the xfrm_mgr.migrate function pointer, drop the &init_net override in xfrm_send_migrate() and pfkey_send_migrate(), and pass the caller's net (already in scope in xfrm_migrate() via sock_net(skb->sk)) all the way down. struct xfrm_mgr is in-tree only and not exported as a stable API, so the function-pointer signature change is internal. pfkey_broadcast() is already netns-aware via net_generic(net, pfkey_net_id) since the pernet conversion. The five other pfkey_broadcast() callers in af_key.c already pass xs_net(x), sock_net(sk) or a per-netns net, so this only removes the &init_net outlier. Fixes: 5c79de6e79cd ("[XFRM]: User interface for handling XFRM_MSG_MIGRATE") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Maoyi Xie Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 ++- net/key/af_key.c | 6 +++--- net/xfrm/xfrm_policy.c | 2 +- net/xfrm/xfrm_state.c | 4 ++-- net/xfrm/xfrm_user.c | 5 ++--- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 10d3edde6b2f..874409127e29 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -715,6 +715,7 @@ struct xfrm_mgr { const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, + struct net *net, const struct xfrm_encap_tmpl *encap); bool (*is_alive)(const struct km_event *c); }; @@ -1891,7 +1892,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap); struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, u32 if_id); diff --git a/net/key/af_key.c b/net/key/af_key.c index a166a88d8788..9cffeef18cd9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3564,7 +3564,7 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { int i; @@ -3669,7 +3669,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, net); return 0; @@ -3680,7 +3680,7 @@ err: #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c944327ce66c..59968dcbafe1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4703,7 +4703,7 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate, k, encap); + km_migrate(sel, dir, type, m, num_migrate, k, net, encap); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 686014d39429..395d82411a87 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2837,7 +2837,7 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { int err = -EINVAL; @@ -2848,7 +2848,7 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->migrate) { ret = km->migrate(sel, dir, type, m, num_migrate, k, - encap); + net, encap); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 38a90e5ee3d9..71a4b7278eba 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3271,10 +3271,9 @@ out_cancel: static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { - struct net *net = &init_net; struct sk_buff *skb; int err; @@ -3292,7 +3291,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; -- cgit v1.2.3 From 5772f6535227ebd104065d80afa8ed3478d34c5c Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 16 Apr 2026 14:57:43 +0800 Subject: x86/boot/e820: Re-enable BIOS fallback if e820 table is empty In commit: 157266edcc56 ("x86/boot/e820: Simplify append_e820_table() and remove restriction on single-entry tables") the check on the number of entries in the e820 table was removed. The intention was to support single-entry maps, but by removing the check entirely, we also skip the fallback (to, e.g., the BIOS 88h function). This means that if no E820 map is passed in from the bootloader (which is the case on some bootloaders, like linld), we end up with an empty memory map, and the kernel fails to boot (either by deadlocking on OOM, or by failing to allocate the real mode trampoline, or similar). Re-instate the check in append_e820_table(), but only check that nr_entries is non-zero. This allows e820__memory_setup_default() to fall back to other memory size sources, and doesn't affect e820__memory_setup_extended(), as the latter ignores the return value from append_e820_table(). In doing so, we also update the return values to be proper error codes, with -ENOENT for this case (there are no entries), and -EINVAL for the case where an entry appears invalid. Given none of the callers check the actual value -- just whether it's nonzero -- this is largely aesthetic in practice. Tested against linld, and the kernel boots again fine. [ mingo: Readability edits to the comment and the changelog. ] Fixes: 157266edcc56 ("x86/boot/e820: Simplify append_e820_table() and remove restriction on single-entry tables") Signed-off-by: David Gow Signed-off-by: Ingo Molnar Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: "H. Peter Anvin" Link: https://patch.msgid.link/20260416065746.1896647-1-david@davidgow.net --- arch/x86/kernel/e820.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2a9992758933..eb72537bc0b1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -450,6 +450,10 @@ __init static int append_e820_table(struct boot_e820_entry *entries, u32 nr_entr { struct boot_e820_entry *entry = entries; + /* If there aren't any entries, we'll want to fall back to another source: */ + if (!nr_entries) + return -ENOENT; + while (nr_entries) { u64 start = entry->addr; u64 size = entry->size; @@ -458,7 +462,7 @@ __init static int append_e820_table(struct boot_e820_entry *entries, u32 nr_entr /* Ignore the remaining entries on 64-bit overflow: */ if (start > end && likely(size)) - return -1; + return -EINVAL; e820__range_add(start, size, type); -- cgit v1.2.3 From b15838b03cd0c6cf35651cfde62d17f14bb1d566 Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 21:34:28 +0900 Subject: drm/bochs: Drop manual put on probe error path bochs_pci_probe() allocates the DRM device with devm_drm_dev_alloc(), which registers a devres action to drop the initial DRM device reference on driver detach or probe failure. The error path currently calls drm_dev_put() manually. If probe then returns an error, devres will run the registered release action and put the same device again, after the first put may already have released it. Return the probe error directly and let devres own the final put. Signed-off-by: Myeonghun Pak Fixes: 04826f588682 ("drm/bochs: Allocate DRM device in struct bochs_device") Signed-off-by: Thomas Zimmermann Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/20260424123506.32275-1-mhun512@gmail.com --- drivers/gpu/drm/tiny/bochs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 222e4ae1abbd..5d8dc5efec77 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -761,25 +761,21 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent ret = pcim_enable_device(pdev); if (ret) - goto err_free_dev; + return ret; pci_set_drvdata(pdev, dev); ret = bochs_load(bochs); if (ret) - goto err_free_dev; + return ret; ret = drm_dev_register(dev, 0); if (ret) - goto err_free_dev; + return ret; drm_client_setup(dev, NULL); return ret; - -err_free_dev: - drm_dev_put(dev); - return ret; } static void bochs_pci_remove(struct pci_dev *pdev) -- cgit v1.2.3 From 3051cd060fa496df42954291fa2306ed2eab4ecc Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 6 May 2026 01:55:11 +0800 Subject: i2c: smbus: reject oversized block transfers in the common path The SMBus block transfer length data->block[0] is validated in i2c_smbus_xfer_emulated() but that check runs too late for tracepoints and is skipped entirely when the adapter provides a native smbus_xfer implementation. This allows user-controlled oversized block lengths to reach tracepoint memcpy calls and driver callbacks unchecked. Add an early validation in __i2c_smbus_xfer() that rejects block transfers whose caller-supplied length is zero or exceeds I2C_SMBUS_BLOCK_MAX before any tracepoint fires or driver callback runs. data->block[0] is filled in by the device on SMBus block reads, so the check is scoped to operations where the length is actually supplied by the caller. This is consistent with the existing -EINVAL convention in the emulated path and protects all downstream consumers at once: the smbus_write tracepoint, all native smbus_xfer driver implementations, and the emulated path. Two distinct bugs are fixed by this change: Bug 1: smbus_write tracepoint OOB (include/trace/events/smbus.h) trace_smbus_write() fires before any validation and copies data->block[0]+1 bytes into a 34-byte event buffer. With block[0]=0xfe the tracepoint copies 255 bytes, overflowing by 221. BUG: KASAN: stack-out-of-bounds in trace_event_raw_event_smbus_write+0x27c/0x530 Read of size 255 at addr ffff88800d98fcf8 by task poc_smbus/91 Call Trace: __asan_memcpy+0x23/0x80 trace_event_raw_event_smbus_write+0x27c/0x530 __i2c_smbus_xfer+0x43a/0xa40 i2c_smbus_xfer+0x19e/0x340 i2cdev_ioctl_smbus+0x38f/0x7f0 i2cdev_ioctl+0x35e/0x680 __x64_sys_ioctl+0x147/0x1e0 do_syscall_64+0xcf/0x15a0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Bug 2: i2c-stub I2C_SMBUS_I2C_BLOCK_DATA OOB (drivers/i2c/i2c-stub.c) stub_xfer() implements .smbus_xfer directly and only clamps block[0] against 256-command, not I2C_SMBUS_BLOCK_MAX. With block[0]=0xff and command=0 the loop accesses block[1+i] for i up to 254, far past the 34-byte union. UBSAN: array-index-out-of-bounds in drivers/i2c/i2c-stub.c:223:44 index 34 is out of range for type '__u8 [34]' Call Trace: __ubsan_handle_out_of_bounds+0xd7/0x120 stub_xfer+0x1971/0x198f [i2c_stub] __i2c_smbus_xfer+0x306/0xa40 i2c_smbus_xfer+0x19e/0x340 i2cdev_ioctl_smbus+0x38f/0x7f0 i2cdev_ioctl+0x35e/0x680 __x64_sys_ioctl+0x147/0x1e0 do_syscall_64+0xcf/0x15a0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Both traces reproduced on v7.0-rc6+i2c/for-current with KASAN+UBSAN. Fixes: 8a325997d95d ("i2c: Add message transfer tracepoints for SMBUS [ver #2]") Fixes: 4710317891e4 ("i2c-stub: Implement I2C block support") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-smbus.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index 71eb1ef56f0c..ad6acb5ebadc 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -566,6 +566,18 @@ s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, if (res) return res; + /* Reject invalid caller-supplied block lengths before any + * tracepoint or native smbus_xfer callback runs. + */ + if (data && + (protocol == I2C_SMBUS_I2C_BLOCK_DATA || + protocol == I2C_SMBUS_BLOCK_PROC_CALL || + (protocol == I2C_SMBUS_BLOCK_DATA && + read_write == I2C_SMBUS_WRITE)) && + (data->block[0] == 0 || + data->block[0] > I2C_SMBUS_BLOCK_MAX)) + return -EINVAL; + /* If enabled, the following two tracepoints are conditional on * read_write and protocol. */ -- cgit v1.2.3 From 593dfd40a94ca0ab20297ea4629d94268deed0ed Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 4 May 2026 18:42:11 -0700 Subject: eth: fbnic: fix double-free of PCS on phylink creation failure fbnic_phylink_create() stores the newly allocated PCS in fbn->pcs and then calls phylink_create(). When phylink_create() fails, the error path correctly destroys the PCS via xpcs_destroy_pcs(), but the caller, fbnic_netdev_alloc(), responds by invoking fbnic_netdev_free() which calls fbnic_phylink_destroy(). That function finds fbn->pcs non-NULL and calls xpcs_destroy_pcs() a second time on the already-freed object, triggering a refcount underflow use-after-free: [ 1.934973] fbnic 0000:01:00.0: Failed to create Phylink interface, err: -22 [ 1.935103] ------------[ cut here ]------------ [ 1.935179] refcount_t: underflow; use-after-free. [ 1.935252] WARNING: lib/refcount.c:28 at refcount_warn_saturate+0x59/0x90, CPU#0: swapper/0/1 [ 1.935389] Modules linked in: [ 1.935484] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 7.0.0-virtme-04244-g1f5ffc672165-dirty #1 PREEMPT(lazy) [ 1.935661] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 1.935826] RIP: 0010:refcount_warn_saturate+0x59/0x90 [ 1.935931] Code: 44 48 8d 3d 49 f9 a7 01 67 48 0f b9 3a e9 bf 1e 96 00 48 8d 3d 48 f9 a7 01 67 48 0f b9 3a c3 cc cc cc cc 48 8d 3d 47 f9 a7 01 <67> 48 0f b9 3a c3 cc cc cc cc 48 8d 3d 46 f9 a7 01 67 48 0f b9 3a [ 1.936274] RSP: 0000:ffffd0d440013c58 EFLAGS: 00010246 [ 1.936376] RAX: 0000000000000000 RBX: ffff8f39c188c278 RCX: 000000000000002b [ 1.936524] RDX: ffff8f39c004f000 RSI: 0000000000000003 RDI: ffffffff96abab00 [ 1.936692] RBP: ffff8f39c188c240 R08: ffffffff96988e88 R09: 00000000ffffdfff [ 1.936835] R10: ffffffff96878ea0 R11: 0000000000000187 R12: 0000000000000000 [ 1.936970] R13: ffff8f39c0cef0c8 R14: ffff8f39c1ac01c0 R15: 0000000000000000 [ 1.937114] FS: 0000000000000000(0000) GS:ffff8f3ba08b4000(0000) knlGS:0000000000000000 [ 1.937273] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.937382] CR2: ffff8f3b3ffff000 CR3: 0000000172642001 CR4: 0000000000372ef0 [ 1.937540] Call Trace: [ 1.937619] [ 1.937698] xpcs_destroy_pcs+0x25/0x40 [ 1.937783] fbnic_netdev_alloc+0x1e5/0x200 [ 1.937859] fbnic_probe+0x230/0x370 [ 1.937939] local_pci_probe+0x3e/0x90 [ 1.938013] pci_device_probe+0xbb/0x1e0 [ 1.938091] ? sysfs_do_create_link_sd+0x6d/0xe0 [ 1.938188] really_probe+0xc1/0x2b0 [ 1.938282] __driver_probe_device+0x73/0x120 [ 1.938371] driver_probe_device+0x1e/0xe0 [ 1.938466] __driver_attach+0x8d/0x190 [ 1.938560] ? __pfx___driver_attach+0x10/0x10 [ 1.938663] bus_for_each_dev+0x7b/0xd0 [ 1.938758] bus_add_driver+0xe8/0x210 [ 1.938854] driver_register+0x60/0x120 [ 1.938929] ? __pfx_fbnic_init_module+0x10/0x10 [ 1.939026] fbnic_init_module+0x25/0x60 [ 1.939109] do_one_initcall+0x49/0x220 [ 1.939202] ? rdinit_setup+0x20/0x40 [ 1.939304] kernel_init_freeable+0x1b0/0x310 [ 1.939449] ? __pfx_kernel_init+0x10/0x10 [ 1.939560] kernel_init+0x1a/0x1c0 [ 1.939640] ret_from_fork+0x1ed/0x240 [ 1.939730] ? __pfx_kernel_init+0x10/0x10 [ 1.939805] ret_from_fork_asm+0x1a/0x30 [ 1.939886] [ 1.939927] ---[ end trace 0000000000000000 ]--- [ 1.940184] fbnic 0000:01:00.0: Netdev allocation failed Instead of calling fbnic_phylink_destroy(), the prior initialization of netdev should just be unrolled with free_netdev() and clearing fbd->netdev. Clearing fbd->netdev to NULL avoids UAF in init_failure_mode where callers guard by checking !fbd->netdev, such as fbnic_mdio_read_pmd(). These callers remain active even after a failed probe, so fdb->netdev still needs to be cleared. Fixes: d0fe7104c795 ("fbnic: Replace use of internal PCS w/ Designware XPCS") Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260504-fbnic-pcs-fix-v2-1-de45192821d9@meta.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index c406a3b56b37..4dea2bb58d2f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -826,7 +826,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netif_tx_stop_all_queues(netdev); if (fbnic_phylink_create(netdev)) { - fbnic_netdev_free(fbd); + free_netdev(netdev); + fbd->netdev = NULL; return NULL; } -- cgit v1.2.3 From f040e590c035bfd9553fe79ee9585caf1b14d67b Mon Sep 17 00:00:00 2001 From: Ashutosh Desai Date: Tue, 5 May 2026 17:07:12 +0000 Subject: nfc: hci: fix out-of-bounds read in HCP header parsing Both nfc_hci_recv_from_llc() and nci_hci_data_received_cb() read packet->header from skb->data at function entry without first checking that the buffer holds at least one byte. A malicious NFC peer can send a 0-byte HCP frame that passes through the SHDLC layer and reaches these functions, causing an out-of-bounds heap read of packet->header. The same 0-byte frame, if queued as a non-final fragment, also causes the reassembly loop to underflow msg_len to UINT_MAX, triggering skb_over_panic() when the reassembled skb is written. Fix this by adding a pskb_may_pull() check at the entry of each function before packet->header is first accessed. The existing pskb_may_pull() checks before the reassembled hcp_skb is cast to struct hcp_packet remain in place to guard the 2-byte HCP message header. Fixes: 8b8d2e08bf0d ("NFC: HCI support") Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Signed-off-by: Ashutosh Desai Link: https://patch.msgid.link/20260505170712.96560-1-ashutoshdesai993@gmail.com Signed-off-by: David Heidelberg --- net/nfc/hci/core.c | 10 ++++++++++ net/nfc/nci/hci.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 0d33c81a15fe..ba6f0310ffd7 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -861,6 +861,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) struct sk_buff *frag_skb; int msg_len; + if (!pskb_may_pull(skb, NFC_HCI_HCP_PACKET_HEADER_LEN)) { + kfree_skb(skb); + return; + } + packet = (struct hcp_packet *)skb->data; if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { skb_queue_tail(&hdev->rx_hcp_frags, skb); @@ -904,6 +909,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ + if (!pskb_may_pull(hcp_skb, NFC_HCI_HCP_HEADER_LEN)) { + kfree_skb(hcp_skb); + return; + } + packet = (struct hcp_packet *)hcp_skb->data; type = HCP_MSG_GET_TYPE(packet->message.header); if (type == NFC_HCI_HCP_RESPONSE) { diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 40ae8e5a7ec7..c03e8a0bd3bd 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -439,6 +439,11 @@ void nci_hci_data_received_cb(void *context, return; } + if (!pskb_may_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN)) { + kfree_skb(skb); + return; + } + packet = (struct nci_hcp_packet *)skb->data; if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); @@ -482,6 +487,11 @@ void nci_hci_data_received_cb(void *context, * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ + if (!pskb_may_pull(hcp_skb, NCI_HCI_HCP_HEADER_LEN)) { + kfree_skb(hcp_skb); + return; + } + packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { -- cgit v1.2.3 From 91892231ae5e638326e7eaa0174de86fac9aa5fd Mon Sep 17 00:00:00 2001 From: Rámon van Raaij Date: Wed, 6 May 2026 20:31:18 +0200 Subject: ALSA: hda/realtek: Add codec SSID quirk for Lenovo Yoga Pro 9 16IMH9 (17aa:38d5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Lenovo Yoga Pro 9 16IMH9 units carry codec SSID 17aa:38d5 instead of 17aa:38d6, which was added in commit 56722cfbb78d ("ALSA: hda/realtek: Add codec SSID quirk for Lenovo Yoga Pro 9 16IMH9"). The corresponding firmware blob TAS2XXX38D5.bin already ships in linux-firmware, and the hardware is otherwise identical: same PCI subsystem ID 17aa:3811 shared with the Legion S7 15IMH05, same TI TAS2781 amplifiers behind ACPI HID TIAS2781, same ALC287_FIXUP_TAS2781_I2C requirement. Add a second HDA_CODEC_QUIRK entry directly above the existing 17aa:38d6 entry so both variants resolve to the correct fixup. Reported and verified on hardware by GitHub user 0xEthamin. Link: https://github.com/ramonvanraaij/yoga9-tas2781-hda/issues/1 Signed-off-by: Rámon van Raaij Link: https://patch.msgid.link/20260506183118.patch1-ramon@vanraaij.eu Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 11d0ea8ed859..55bb98e2e55a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7678,6 +7678,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { /* Yoga Pro 9 16IMH9 shares PCI SSID 17aa:3811 with Legion S7 15IMH05; * use codec SSID to distinguish them */ + HDA_CODEC_QUIRK(0x17aa, 0x38d5, "Lenovo Yoga Pro 9 16IMH9", ALC287_FIXUP_TAS2781_I2C), HDA_CODEC_QUIRK(0x17aa, 0x38d6, "Lenovo Yoga Pro 9 16IMH9", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3811, "Legion S7 15IMH05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), -- cgit v1.2.3 From d6854daa67be623860f4e1873fd3d3c275aba4ed Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Thu, 7 May 2026 00:40:51 -0300 Subject: ALSA: usb-audio: Bound MIDI endpoint descriptor scans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snd_usbmidi_get_ms_info() validates the internal MIDIStreaming endpoint descriptor size before using baAssocJackID[], but the descriptor walker can still return a class-specific endpoint descriptor whose bLength exceeds the remaining bytes in the endpoint-extra scan. That leaves later flexible-array reads bounded by bLength, but not by the remaining bytes in the endpoint-extra scan. Stop walking when bLength is zero or extends past the remaining endpoint-extra scan. Fixes: 5c6cd7021a05 ("ALSA: usb-audio: Fix case when USB MIDI interface has more than one extra endpoint descriptor") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260507-usb-midi-endpoint-scan-bounds-v1-1-329d7348160e@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 0a5b8941ebda..d87e3f357cf7 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1951,15 +1951,17 @@ static struct usb_ms_endpoint_descriptor *find_usb_ms_endpoint_descriptor( while (extralen > 3) { struct usb_ms_endpoint_descriptor *ms_ep = (struct usb_ms_endpoint_descriptor *)extra; + int length = ms_ep->bLength; - if (ms_ep->bLength > 3 && + if (!length || length > extralen) + break; + + if (length > 3 && ms_ep->bDescriptorType == USB_DT_CS_ENDPOINT && ms_ep->bDescriptorSubtype == UAC_MS_GENERAL) return ms_ep; - if (!extra[0]) - break; - extralen -= extra[0]; - extra += extra[0]; + extralen -= length; + extra += length; } return NULL; } -- cgit v1.2.3 From 918be519c7876329e1b6e2ea1c59f0b75e792dca Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Thu, 7 May 2026 00:40:52 -0300 Subject: ALSA: usb-audio: Bound MIDI 2.0 endpoint descriptor scans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The USB MIDI 2.0 endpoint parser has the same descriptor walking pattern as the legacy MIDI parser. It validates bLength against bNumGrpTrmBlock before reading baAssoGrpTrmBlkID[], but not against the remaining bytes in the endpoint-extra scan. A malformed device can therefore make later baAssoGrpTrmBlkID[] reads consume bytes past the walked descriptor. Reject zero-length and overlong descriptors while walking endpoint extras. Fixes: ff49d1df79ae ("ALSA: usb-audio: USB MIDI 2.0 UMP support") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260507-usb-midi-endpoint-scan-bounds-v1-2-329d7348160e@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/midi2.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c index 2785600d2312..04aeb9052f13 100644 --- a/sound/usb/midi2.c +++ b/sound/usb/midi2.c @@ -496,15 +496,17 @@ static void *find_usb_ms_endpoint_descriptor(struct usb_host_endpoint *hostep, while (extralen > 3) { struct usb_ms_endpoint_descriptor *ms_ep = (struct usb_ms_endpoint_descriptor *)extra; + int length = ms_ep->bLength; - if (ms_ep->bLength > 3 && + if (!length || length > extralen) + break; + + if (length > 3 && ms_ep->bDescriptorType == USB_DT_CS_ENDPOINT && ms_ep->bDescriptorSubtype == subtype) return ms_ep; - if (!extra[0]) - break; - extralen -= extra[0]; - extra += extra[0]; + extralen -= length; + extra += length; } return NULL; } -- cgit v1.2.3 From 72d52bac023b376b73277804b24315ea2a49ad1e Mon Sep 17 00:00:00 2001 From: Ayaan Mirza Baig Date: Sat, 18 Apr 2026 00:46:13 +0000 Subject: platform/x86: samsung-galaxybook: Refactor camera lens cover input device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the camera_lens_cover_switch input device to a generic input device which can be used for multiple input events. Move input device allocation and registration into a dedicated galaxybook_input_init() helper which is called early in probe so that the device is available to all features. No functional change. Signed-off-by: Ayaan Mirza Baig Link: https://patch.msgid.link/20260418004613.93981-2-ayaanmirzabaig85@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/samsung-galaxybook.c | 46 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c index 755cb82bdb60..a51ad6b03164 100644 --- a/drivers/platform/x86/samsung-galaxybook.c +++ b/drivers/platform/x86/samsung-galaxybook.c @@ -53,7 +53,7 @@ struct samsung_galaxybook { void *i8042_filter_ptr; struct work_struct block_recording_hotkey_work; - struct input_dev *camera_lens_cover_switch; + struct input_dev *input; struct acpi_battery_hook battery_hook; @@ -859,13 +859,28 @@ static int block_recording_acpi_set(struct samsung_galaxybook *galaxybook, const if (err) return err; - input_report_switch(galaxybook->camera_lens_cover_switch, + input_report_switch(galaxybook->input, SW_CAMERA_LENS_COVER, value ? 1 : 0); - input_sync(galaxybook->camera_lens_cover_switch); + input_sync(galaxybook->input); return 0; } +static int galaxybook_input_init(struct samsung_galaxybook *galaxybook) +{ + galaxybook->input = devm_input_allocate_device(&galaxybook->platform->dev); + if (!galaxybook->input) + return -ENOMEM; + + galaxybook->input->name = "Samsung Galaxy Book Camera Lens Cover"; + galaxybook->input->phys = DRIVER_NAME "/input0"; + galaxybook->input->id.bustype = BUS_HOST; + + input_set_capability(galaxybook->input, EV_SW, SW_CAMERA_LENS_COVER); + + return input_register_device(galaxybook->input); +} + static int galaxybook_block_recording_init(struct samsung_galaxybook *galaxybook) { bool value; @@ -887,24 +902,8 @@ static int galaxybook_block_recording_init(struct samsung_galaxybook *galaxybook return GB_NOT_SUPPORTED; } - galaxybook->camera_lens_cover_switch = - devm_input_allocate_device(&galaxybook->platform->dev); - if (!galaxybook->camera_lens_cover_switch) - return -ENOMEM; - - galaxybook->camera_lens_cover_switch->name = "Samsung Galaxy Book Camera Lens Cover"; - galaxybook->camera_lens_cover_switch->phys = DRIVER_NAME "/input0"; - galaxybook->camera_lens_cover_switch->id.bustype = BUS_HOST; - - input_set_capability(galaxybook->camera_lens_cover_switch, EV_SW, SW_CAMERA_LENS_COVER); - - err = input_register_device(galaxybook->camera_lens_cover_switch); - if (err) - return err; - - input_report_switch(galaxybook->camera_lens_cover_switch, - SW_CAMERA_LENS_COVER, value ? 1 : 0); - input_sync(galaxybook->camera_lens_cover_switch); + input_report_switch(galaxybook->input, SW_CAMERA_LENS_COVER, value ? 1 : 0); + input_sync(galaxybook->input); return 0; } @@ -1392,6 +1391,11 @@ static int galaxybook_probe(struct platform_device *pdev) return dev_err_probe(&galaxybook->platform->dev, err, "failed to initialize kbd_backlight\n"); + err = galaxybook_input_init(galaxybook); + if (err) + return dev_err_probe(&galaxybook->platform->dev, err, + "failed to initialize input device\n"); + err = galaxybook_fw_attrs_init(galaxybook); if (err) return dev_err_probe(&galaxybook->platform->dev, err, -- cgit v1.2.3 From 90dc96c61be35a1f81b56dfc5dcb80d50debbf89 Mon Sep 17 00:00:00 2001 From: Ayaan Mirza Baig Date: Sat, 18 Apr 2026 00:46:14 +0000 Subject: platform/x86: samsung-galaxybook: Handle ACPI hotkey notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Samsung Galaxy Book 5 (SAM0430), the keyboard backlight, microphone mute, and camera block hotkeys do not generate i8042 scancodes. Instead they arrive as ACPI notifications 0x7d, 0x6e, and 0x6f respectively, all of which previously fell through to the default "unknown" warning in galaxybook_acpi_notify(). Add handling for these three events: - 0x7d (Fn+F9, keyboard backlight): schedule the existing kbd_backlight_hotkey_work which cycles brightness. - 0x6e (Fn+F10, microphone mute): emit KEY_MICMUTE via the driver's input device. - 0x6f (Fn+F11, camera block): if block_recording is active use the existing block_recording_hotkey_work; otherwise emit a toggle of SW_CAMERA_LENS_COVER via the driver's input device on models where the block_recording ACPI feature is not supported. Tested on Samsung Galaxy Book 5 (SAM0430) and Samsung Galaxy Book2 Pro (SAM0429). Signed-off-by: Ayaan Mirza Baig Co-developed-by: Joshua Grisham Signed-off-by: Joshua Grisham Link: https://patch.msgid.link/20260418004613.93981-3-ayaanmirzabaig85@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/samsung-galaxybook.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c index a51ad6b03164..6382af0b106c 100644 --- a/drivers/platform/x86/samsung-galaxybook.c +++ b/drivers/platform/x86/samsung-galaxybook.c @@ -197,6 +197,9 @@ static const guid_t performance_mode_guid = #define GB_ACPI_NOTIFY_DEVICE_ON_TABLE 0x6c #define GB_ACPI_NOTIFY_DEVICE_OFF_TABLE 0x6d #define GB_ACPI_NOTIFY_HOTKEY_PERFORMANCE_MODE 0x70 +#define GB_ACPI_NOTIFY_HOTKEY_KBD_BACKLIGHT 0x7d +#define GB_ACPI_NOTIFY_HOTKEY_MICMUTE 0x6e +#define GB_ACPI_NOTIFY_HOTKEY_CAMERA 0x6f #define GB_KEY_KBD_BACKLIGHT_KEYDOWN 0x2c #define GB_KEY_KBD_BACKLIGHT_KEYUP 0xac @@ -876,6 +879,7 @@ static int galaxybook_input_init(struct samsung_galaxybook *galaxybook) galaxybook->input->phys = DRIVER_NAME "/input0"; galaxybook->input->id.bustype = BUS_HOST; + input_set_capability(galaxybook->input, EV_KEY, KEY_MICMUTE); input_set_capability(galaxybook->input, EV_SW, SW_CAMERA_LENS_COVER); return input_register_device(galaxybook->input); @@ -1259,6 +1263,25 @@ static void galaxybook_acpi_notify(acpi_handle handle, u32 event, void *data) if (galaxybook->has_performance_mode) platform_profile_cycle(); break; + case GB_ACPI_NOTIFY_HOTKEY_KBD_BACKLIGHT: + if (galaxybook->has_kbd_backlight) + schedule_work(&galaxybook->kbd_backlight_hotkey_work); + break; + case GB_ACPI_NOTIFY_HOTKEY_MICMUTE: + input_report_key(galaxybook->input, KEY_MICMUTE, 1); + input_sync(galaxybook->input); + input_report_key(galaxybook->input, KEY_MICMUTE, 0); + input_sync(galaxybook->input); + break; + case GB_ACPI_NOTIFY_HOTKEY_CAMERA: + if (galaxybook->has_block_recording) { + schedule_work(&galaxybook->block_recording_hotkey_work); + } else { + input_report_switch(galaxybook->input, SW_CAMERA_LENS_COVER, + !test_bit(SW_CAMERA_LENS_COVER, galaxybook->input->sw)); + input_sync(galaxybook->input); + } + break; default: dev_warn(&galaxybook->platform->dev, "unknown ACPI notification event: 0x%x\n", event); -- cgit v1.2.3 From 90d77b30a666049ad24df463f52e5d529c44e8cd Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 5 May 2026 21:15:37 +0200 Subject: ARM: integrator: Fix early initialization Starting with commit bdb249fce9ad4 ("ARM: integrator: read counter using syscon/regmap"), intcp_init_early calls syscon_regmap_lookup_by_compatible which in turn calls of_syscon_register. This function allocates memory. Since the memory management code has not been initialized at that time, the call always fails. It either returns -ENOMEM or crashes as follows. Unable to handle kernel NULL pointer dereference at virtual address 0000000c when read [0000000c] *pgd=00000000 Internal error: Oops: 5 [#1] ARM Modules linked in: CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.15.0-rc5-00026-g5fcc9bf84ee5 #1 PREEMPT Hardware name: ARM Integrator/CP (Device Tree) PC is at __kmalloc_cache_noprof+0xec/0x39c LR is at __kmalloc_cache_noprof+0x34/0x39c ... Call trace: __kmalloc_cache_noprof from of_syscon_register+0x7c/0x310 of_syscon_register from device_node_get_regmap+0xa4/0xb0 device_node_get_regmap from intcp_init_early+0xc/0x40 intcp_init_early from start_kernel+0x60/0x688 start_kernel from 0x0 The crash is seen due to a dereferenced pointer which is not supposed to be NULL but is NULL if the memory management subsystem has not been initialized. The crash is not seen with all versions of gcc. Some versions such as gcc 9.x apparently do not dereference the pointer, presumably if tracing is disabled. The problem has been reproduced with gcc 10.x, 11.x, and 13.x. Either case, if the crash is not seen, the call to syscon_regmap_lookup_by_compatible returns -ENOMEM, and sched_clock_register is never called. Fix the problem by moving the early initialization code into the standard machine initialization code. Fixes: bdb249fce9ad4 ("ARM: integrator: read counter using syscon/regmap") Cc: Linus Walleij Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/20250518164118.3859567-1-linux@roeck-us.net Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20260505-integrator-fixes-v1-1-56ab9aac59db@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/mach-versatile/integrator_cp.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-versatile/integrator_cp.c b/arch/arm/mach-versatile/integrator_cp.c index 2ed4ded56b3f..03dfb5f720b7 100644 --- a/arch/arm/mach-versatile/integrator_cp.c +++ b/arch/arm/mach-versatile/integrator_cp.c @@ -86,14 +86,6 @@ static u64 notrace intcp_read_sched_clock(void) return val; } -static void __init intcp_init_early(void) -{ - cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator"); - if (IS_ERR(cm_map)) - return; - sched_clock_register(intcp_read_sched_clock, 32, 24000000); -} - static void __init intcp_init_irq_of(void) { cm_init(); @@ -119,6 +111,10 @@ static void __init intcp_init_of(void) { struct device_node *cpcon; + cm_map = syscon_regmap_lookup_by_compatible("arm,core-module-integrator"); + if (!IS_ERR(cm_map)) + sched_clock_register(intcp_read_sched_clock, 32, 24000000); + cpcon = of_find_matching_node(NULL, intcp_syscon_match); if (!cpcon) return; @@ -138,7 +134,6 @@ static const char * intcp_dt_board_compat[] = { DT_MACHINE_START(INTEGRATOR_CP_DT, "ARM Integrator/CP (Device Tree)") .reserve = integrator_reserve, .map_io = intcp_map_io, - .init_early = intcp_init_early, .init_irq = intcp_init_irq_of, .init_machine = intcp_init_of, .dt_compat = intcp_dt_board_compat, -- cgit v1.2.3 From 856540ac9b441a8c0e39f1f1787277edc4097c9b Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Tue, 5 May 2026 18:39:53 +0800 Subject: MAINTAINERS: Add maintainers for ARM/REALTEK ARCHITECTURE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add James Tai and Yu-Chun Lin as co-maintainers for the ARM/REALTEK ARCHITECTURE to continue supporting Realtek SoCs. Additionally, based on the discussion, move Andreas Färber to a reviewer role and update his email address accordingly. Link: https://lore.kernel.org/lkml/bbabf0f1-99fa-4822-85c8-df76ce89da01@suse.com/ Reviewed-by: Krzysztof Kozlowski Acked-by: James Tai Signed-off-by: Yu-Chun Lin Link: https://lore.kernel.org/r/20260505103955.1010130-2-eleanor.lin@realtek.com Signed-off-by: Arnd Bergmann --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 882214b0e7db..18c9b6dce479 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3361,7 +3361,9 @@ F: drivers/irqchip/irq-rda-intc.c F: drivers/tty/serial/rda-uart.c ARM/REALTEK ARCHITECTURE -M: Andreas Färber +M: James Tai +M: Yu-Chun Lin +R: Andreas Färber L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-realtek-soc@lists.infradead.org (moderated for non-subscribers) S: Maintained -- cgit v1.2.3 From 79524bed532bc7acd7d5209a6cdd0a17dbb8e65b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 5 May 2026 18:58:37 +0800 Subject: ARM: realtek: MAINTAINERS: Include pin controller drivers No dedicated maintainers are shown for Realtek SoC pin controllers, except pinctrl subsystem maintainer, which means reduced review and impression of abandoned drivers. Pin controller drivers are essential part of an SoC, so in case of lack of dedicated entry at least cover it by the SoC platform maintainers. Acked-by: Yu-Chun Lin Signed-off-by: Krzysztof Kozlowski Signed-off-by: Yu-Chun Lin Link: https://lore.kernel.org/r/20260505105838.1014771-2-eleanor.lin@realtek.com Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 18c9b6dce479..984cb4252f38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3371,6 +3371,7 @@ F: Documentation/devicetree/bindings/arm/realtek.yaml F: arch/arm/boot/dts/realtek/ F: arch/arm/mach-realtek/ F: arch/arm64/boot/dts/realtek/ +F: drivers/pinctrl/realtek/ ARM/RISC-V/RENESAS ARCHITECTURE M: Geert Uytterhoeven -- cgit v1.2.3 From 7602c0ec0bbfd3985d49f4f0cad281c1414008c9 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 31 Dec 2025 21:51:26 +0530 Subject: firmware: psci: Set pm_set_resume/suspend_via_firmware() for SYSTEM_SUSPEND PSCI specification defines the SYSTEM_SUSPEND feature which enables the firmware to implement the suspend to RAM (S2RAM) functionality by transitioning the system to a deeper low power state. When the system enters such state, the power to the peripheral devices might be removed. So the respective device drivers must prepare for the possible removal of the power by performing actions such as shutting down or resetting the device in their system suspend callbacks. The Linux PM framework allows the platform drivers to convey this info to device drivers by calling the pm_set_suspend_via_firmware() and pm_set_resume_via_firmware() APIs. Hence, if the PSCI firmware supports SYSTEM_SUSPEND feature, call the above mentioned APIs in the psci_system_suspend_begin() and psci_system_suspend_enter() callbacks. Signed-off-by: Konrad Dybcio Reviewed-by: Sudeep Holla [mani: reworded the description to be more elaborative] Signed-off-by: Manivannan Sadhasivam Tested-by: Jon Hunter Acked-by: Jon Hunter Acked-by: Lorenzo Pieralisi Signed-off-by: Arnd Bergmann --- drivers/firmware/psci/psci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 38ca190d4a22..e73bae6cb23a 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -539,12 +539,22 @@ static int psci_system_suspend(unsigned long unused) static int psci_system_suspend_enter(suspend_state_t state) { + pm_set_resume_via_firmware(); + return cpu_suspend(0, psci_system_suspend); } +static int psci_system_suspend_begin(suspend_state_t state) +{ + pm_set_suspend_via_firmware(); + + return 0; +} + static const struct platform_suspend_ops psci_suspend_ops = { .valid = suspend_valid_only_mem, .enter = psci_system_suspend_enter, + .begin = psci_system_suspend_begin, }; static void __init psci_init_system_reset2(void) -- cgit v1.2.3 From ad3bff944c0f4f2e913298a9664391af32f87491 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 30 Apr 2026 08:11:01 -0700 Subject: platform/x86: intel: Move debugfs register before creating devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that the driver handling device is enumerated before registering debugfs. If the driver wants to access debugfs by calling tpmi_get_debugfs_dir(), this will return error in this case. Hence register debugfs before creating devices. Fixes: 811f67c51636 ("platform/x86/intel/tpmi: Add new auxiliary driver for performance limits") Signed-off-by: Srinivas Pandruvada Cc: Stable@vger.kernel.org Link: https://patch.msgid.link/20260430151103.1549733-2-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vsec_tpmi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/vsec_tpmi.c b/drivers/platform/x86/intel/vsec_tpmi.c index 7fc6ff8d1040..a38014e81e85 100644 --- a/drivers/platform/x86/intel/vsec_tpmi.c +++ b/drivers/platform/x86/intel/vsec_tpmi.c @@ -817,10 +817,6 @@ static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev) auxiliary_set_drvdata(auxdev, tpmi_info); - ret = tpmi_create_devices(tpmi_info); - if (ret) - return ret; - /* * Allow debugfs when security policy allows. Everything this debugfs * interface provides, can also be done via /dev/mem access. If @@ -830,6 +826,12 @@ static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev) if (!security_locked_down(LOCKDOWN_DEV_MEM) && capable(CAP_SYS_RAWIO)) tpmi_dbgfs_register(tpmi_info); + ret = tpmi_create_devices(tpmi_info); + if (ret) { + debugfs_remove_recursive(tpmi_info->dbgfs_dir); + return ret; + } + return 0; } -- cgit v1.2.3 From 57c347a2e2473bfb5c1f1132a3209c55efbe640b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 30 Apr 2026 08:11:02 -0700 Subject: platform/x86: intel: Add notifiers support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases a driver using services of vsec_tpmi driver requires some processing before vsec_tpmi exits. For example a children using debugfs can't use debugfs as this will be deleted by the vsec_tpmi driver. This is the case when unbind using PCI driver interface. In this case the remove callback of vsec_tpmi driver is called first, then remove callback of its children. Add support of blocking chain notifiers support. Notify on successful probe and before clean up in the remove callback. Fixes: 811f67c51636 ("platform/x86/intel/tpmi: Add new auxiliary driver for performance limits") Signed-off-by: Srinivas Pandruvada Cc: Stable@vger.kernel.org Link: https://patch.msgid.link/20260430151103.1549733-3-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vsec_tpmi.c | 19 +++++++++++++++++++ include/linux/intel_tpmi.h | 6 ++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/platform/x86/intel/vsec_tpmi.c b/drivers/platform/x86/intel/vsec_tpmi.c index a38014e81e85..16fd7aa41f20 100644 --- a/drivers/platform/x86/intel/vsec_tpmi.c +++ b/drivers/platform/x86/intel/vsec_tpmi.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,20 @@ struct tpmi_feature_state { /* Used during auxbus device creation */ static DEFINE_IDA(intel_vsec_tpmi_ida); +static BLOCKING_NOTIFIER_HEAD(tpmi_notify_list); + +int tpmi_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&tpmi_notify_list, nb); +} +EXPORT_SYMBOL_NS_GPL(tpmi_register_notifier, "INTEL_TPMI"); + +int tpmi_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&tpmi_notify_list, nb); +} +EXPORT_SYMBOL_NS_GPL(tpmi_unregister_notifier, "INTEL_TPMI"); + struct oobmsm_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev) { struct intel_vsec_device *vsec_dev = auxdev_to_ivdev(auxdev); @@ -832,6 +847,8 @@ static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev) return ret; } + blocking_notifier_call_chain(&tpmi_notify_list, TPMI_CORE_INIT, auxdev); + return 0; } @@ -845,6 +862,8 @@ static void tpmi_remove(struct auxiliary_device *auxdev) { struct intel_tpmi_info *tpmi_info = auxiliary_get_drvdata(auxdev); + blocking_notifier_call_chain(&tpmi_notify_list, TPMI_CORE_EXIT, auxdev); + debugfs_remove_recursive(tpmi_info->dbgfs_dir); } diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 94c06bf214fb..15f02422e9ca 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -28,6 +28,12 @@ enum intel_tpmi_id { TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ }; +#define TPMI_CORE_INIT 0 +#define TPMI_CORE_EXIT 1 + +int tpmi_register_notifier(struct notifier_block *nb); +int tpmi_unregister_notifier(struct notifier_block *nb); + struct oobmsm_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev); struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index); int tpmi_get_resource_count(struct auxiliary_device *auxdev); -- cgit v1.2.3 From 14473e8c4e97d51eff9b2f384ae696f7a32f182b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 30 Apr 2026 08:11:03 -0700 Subject: platform/x86/intel/tpmi/plr: Prevent fault during unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver faults when intel vsec driver is unbound from PCI driver interface. For example: echo 0000:00:03.1 > /sys/bus/pci/drivers/intel_vsec/unbind This is caused by accessing plr->dbgfs_dir after vsec_tpmi driver is removed. Here vsec_tpmi driver is the parent. On unbind, the parent device remove callback is called first which here will remove debugfs interface. Hence plr->dbgfs_dir is no longer valid. Register notifier for TPMI_CORE_EXIT and make this pointer to NULL, so that debugfs_remove_recursive() is not called with bad plr->dbgfs_dir pointer. After notifier is returned the vsec_tpmi driver will call remove debugfs by calling debugfs_remove_recursive(). Fixes: 811f67c51636 ("platform/x86/intel/tpmi: Add new auxiliary driver for performance limits") Signed-off-by: Srinivas Pandruvada Cc: Stable@vger.kernel.org Link: https://patch.msgid.link/20260430151103.1549733-4-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/plr_tpmi.c | 45 +++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/plr_tpmi.c b/drivers/platform/x86/intel/plr_tpmi.c index 05727169f49c..8faecc311038 100644 --- a/drivers/platform/x86/intel/plr_tpmi.c +++ b/drivers/platform/x86/intel/plr_tpmi.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,8 @@ struct tpmi_plr { struct tpmi_plr_die *die_info; int num_dies; struct auxiliary_device *auxdev; + struct notifier_block nb; + struct mutex lock; /* Protect access to dbgfs_dir */ }; static const char * const plr_coarse_reasons[] = { @@ -255,6 +258,30 @@ static ssize_t plr_status_write(struct file *filp, const char __user *ubuf, } DEFINE_SHOW_STORE_ATTRIBUTE(plr_status); +static int intel_plr_notify(struct notifier_block *self, unsigned long action, void *data) +{ + struct tpmi_plr *plr = container_of(self, struct tpmi_plr, nb); + + if (action == TPMI_CORE_EXIT) { + guard(mutex)(&plr->lock); + plr->dbgfs_dir = NULL; + } + + return NOTIFY_DONE; +} + +static int intel_plr_register_notifier(struct notifier_block *nb) +{ + nb->notifier_call = intel_plr_notify; + nb->priority = 0; + return tpmi_register_notifier(nb); +} + +static void intel_plr_unregister_notifier(struct notifier_block *nb) +{ + tpmi_unregister_notifier(nb); +} + static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id) { struct oobmsm_plat_info *plat_info; @@ -282,10 +309,18 @@ static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxilia if (!plr) return -ENOMEM; + err = devm_mutex_init(&auxdev->dev, &plr->lock); + if (err) + return err; + + intel_plr_register_notifier(&plr->nb); + plr->die_info = devm_kcalloc(&auxdev->dev, num_resources, sizeof(*plr->die_info), GFP_KERNEL); - if (!plr->die_info) - return -ENOMEM; + if (!plr->die_info) { + err = -ENOMEM; + goto err_notify; + } plr->num_dies = num_resources; plr->dbgfs_dir = debugfs_create_dir("plr", dentry); @@ -326,6 +361,9 @@ static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxilia err: debugfs_remove_recursive(plr->dbgfs_dir); +err_notify: + intel_plr_unregister_notifier(&plr->nb); + return err; } @@ -333,6 +371,9 @@ static void intel_plr_remove(struct auxiliary_device *auxdev) { struct tpmi_plr *plr = auxiliary_get_drvdata(auxdev); + intel_plr_unregister_notifier(&plr->nb); + + guard(mutex)(&plr->lock); debugfs_remove_recursive(plr->dbgfs_dir); } -- cgit v1.2.3 From d7396a72eae795d7f968fb451237b6ac1616d712 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 1 May 2026 12:21:44 +0100 Subject: KVM: arm64: Make EL2 exception entry and exit context-synchronization events SCTLR_EL2.EIS and SCTLR_EL2.EOS control whether exception entry and exit at EL2 are Context Synchronisation Events (CSEs). Per ARM DDI 0487 M.b D24.2.175 (p. D24-9754): - !FEAT_ExS: the bit is RES1, so the entry/exit is unconditionally a CSE. - FEAT_ExS: the reset value is architecturally UNKNOWN; software must set the bit to make the entry/exit a CSE. INIT_SCTLR_EL2_MMU_ON in arch/arm64/include/asm/sysreg.h sets neither bit. KVM/arm64 hot paths rely on ERET from EL2 being a CSE, and on synchronous EL1->EL2 entry being a CSE, to elide explicit ISBs after MSRs to context-switching system registers (HCR_EL2, ZCR_EL2, ptrauth keys, etc.). On FEAT_ExS hardware those reliances are not architecturally backed unless EOS=1 (and, for entry, EIS=1). Until commit 0a35bd285f43 ("arm64: Convert SCTLR_EL2 to sysreg infrastructure"), SCTLR_EL2_RES1 was a hand-rolled mask that included BIT(11) (EOS) and BIT(22) (EIS), so INIT_SCTLR_EL2_MMU_ON was setting both unconditionally. The conversion made SCTLR_EL2_RES1 auto-generated; because the sysreg tooling only models unconditionally-RES1 fields and EIS/EOS are RES1 only when FEAT_ExS is absent, the auto-generated mask is UL(0). The seven other bits dropped from the old mask (positions 4, 5, 16, 18, 23, 28, 29) are unconditionally RES1 in the E2H=0 SCTLR_EL2 layout per DDI 0487 M.b D24.2.175, so dropping them is harmless. EIS and EOS are the only bits whose semantics changed for FEAT_ExS hardware and where the kernel relies on the value being 1. Make the guarantee explicit: include SCTLR_ELx_EIS | SCTLR_ELx_EOS in INIT_SCTLR_EL2_MMU_ON so that EL2 exception entry and exit are unconditionally CSEs regardless of whether FEAT_ExS is implemented. This matches the pairing in arch/arm64/kvm/config.c which treats EIS and EOS together as RES1 under !FEAT_ExS. Fixes: 0a35bd285f43 ("arm64: Convert SCTLR_EL2 to sysreg infrastructure") Reviewed-by: Yuan Yao Assisted-by: Gemini:gemini-3.1-pro review-prompts Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260501112149.2824881-2-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 736561480f36..7aa08d59d494 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -844,7 +844,7 @@ #define INIT_SCTLR_EL2_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ - SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) + SCTLR_ELx_ITFSB | SCTLR_ELx_EIS | SCTLR_ELx_EOS | SCTLR_EL2_RES1) #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) -- cgit v1.2.3 From 300fac4cc266b7782d88602b6b6a7faf31ce6405 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 1 May 2026 12:21:45 +0100 Subject: KVM: arm64: Guard against NULL vcpu on VHE hyp panic path On VHE, __hyp_call_panic() unconditionally calls __deactivate_traps(vcpu) on the vcpu pointer read from host_ctxt->__hyp_running_vcpu. That pointer is cleared after every guest exit (and is never set when no guest is running), so an unexpected EL2 exception landing in _guest_exit_panic, e.g. via the el2t*_invalid / el2h_irq_invalid vectors - reaches this function with vcpu == NULL. __deactivate_traps() then dereferences vcpu via ___deactivate_traps() -> vserror_state_is_nested() -> vcpu_has_nv() -> vcpu->arch.features, faulting inside the panic handler and obscuring the original failure. The nVHE counterpart (hyp_panic() in arch/arm64/kvm/hyp/nvhe/switch.c) already guards its vcpu-using cleanup with "if (vcpu)"; mirror that here. sysreg_restore_host_state_vhe() does not depend on vcpu and continues to run unconditionally, preserving panic forensics. The trailing panic("...VCPU:%p", vcpu) prints "(null)" safely via printk's %p handling. Fixes: 6a0259ed29bb ("KVM: arm64: Remove hyp_panic arguments") Assisted-by: Gemini:gemini-3.1-pro review-prompts Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260501112149.2824881-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/vhe/switch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 9db3f11a4754..1e8995add14f 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par) host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; - __deactivate_traps(vcpu); + if (vcpu) + __deactivate_traps(vcpu); sysreg_restore_host_state_vhe(host_ctxt); panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n", -- cgit v1.2.3 From d4d215e5b81ba5acb17752cab12c514a8062bada Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 1 May 2026 12:21:46 +0100 Subject: KVM: arm64: Fix __deactivate_fgt macro parameter typo __deactivate_fgt() declares its first parameter as "htcxt" but the body references "hctxt". The parameter is unused; the macro silently captures "hctxt" from the enclosing scope. Both existing callers (__deactivate_traps_hfgxtr() and __deactivate_traps_ich_hfgxtr()) happen to define a local "struct kvm_cpu_context *hctxt", so the macro works by coincidence. A future caller without an "hctxt" local in scope, or naming it differently, would compile but bind to the wrong context. Align the parameter name with the sibling __activate_fgt() macro. The "vcpu" parameter remains unused in the body, kept for API symmetry with __activate_fgt() (which uses it). Fixes: f5a5a406b4b8 ("KVM: arm64: Propagate and handle Fine-Grained UNDEF bits") Assisted-by: Gemini:gemini-3.1-pro review-prompts Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260501112149.2824881-4-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 98b2976837b1..bf0eb5e43427 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu) __activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2); } -#define __deactivate_fgt(htcxt, vcpu, reg) \ +#define __deactivate_fgt(hctxt, vcpu, reg) \ do { \ write_sysreg_s(ctxt_sys_reg(hctxt, reg), \ SYS_ ## reg); \ -- cgit v1.2.3 From 5130d450d1488e62e1b5310f41910a3c7320e827 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 1 May 2026 12:21:47 +0100 Subject: KVM: arm64: Seed pkvm_ownership_selftest vcpu memcache The hypercall handlers call pkvm_refill_memcache() to top up the hyp_vcpu memcache before invoking __pkvm_host_{share,donate}_guest(). pkvm_ownership_selftest invokes those functions directly with a static selftest_vcpu that has an empty memcache. Seed selftest_vcpu's memcache from the prepopulated selftest pages, leaving the remainder for selftest_vm.pool. Required by the memcache-sufficiency pre-check added in the following patches. Assisted-by: Gemini:gemini-3.1-pro review-prompts Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260501112149.2824881-5-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index e7496eb85628..eb1c10120f9f 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = { struct pkvm_hyp_vcpu *init_selftest_vm(void *virt) { struct hyp_page *p = hyp_virt_to_page(virt); + unsigned long min_pages, seeded = 0; int i; selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt)); + /* + * Mirror pkvm_refill_memcache() for the share/donate pre-checks; + * the selftest invokes those functions directly and would + * otherwise see an empty memcache. + */ + min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu); + for (i = 0; i < pkvm_selftest_pages(); i++) { if (p[i].refcount) continue; p[i].refcount = 1; - hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); + if (seeded < min_pages) { + push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache, + hyp_page_to_virt(&p[i]), hyp_virt_to_phys); + seeded++; + } else { + hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); + } } selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm(); -- cgit v1.2.3 From 8234409ffb656970e2f5b29e416f041419980bef Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 1 May 2026 12:21:48 +0100 Subject: KVM: arm64: Pre-check vcpu memcache for host->guest share __pkvm_host_share_guest() ends with kvm_pgtable_stage2_map() to install the guest stage-2 mapping, after a forward pass that mutates the host vmemmap (sets PKVM_PAGE_SHARED_OWNED and increments host_share_guest_count) for every page in the range. The map's return value is wrapped in WARN_ON() and otherwise discarded, asserting that the call cannot fail. WARN_ON() at nVHE EL2 panics, so this assertion is only correct if the call genuinely cannot fail. kvm_pgtable_stage2_map() can fail with -ENOMEM when the stage-2 walker exhausts the caller's memcache, and the host controls the vcpu memcache via the topup interface, so an under-provisioned share request would otherwise turn a recoverable -ENOMEM into a fatal hyp panic. Bound the worst-case walker allocation in the existing pre-check pass so that kvm_pgtable_stage2_map() cannot fail at the call site, using kvm_mmu_cache_min_pages() -- the same bound host EL1 uses for its own stage-2 maps. If the vcpu memcache holds fewer pages, return -ENOMEM before any state mutation. Fixes: d0bd3e6570ae ("KVM: arm64: Introduce __pkvm_host_share_guest()") Assisted-by: Gemini:gemini-3.1-pro review-prompts Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260501112149.2824881-6-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index a3050e2b65b1..ffb123fc1145 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1390,6 +1390,22 @@ unlock: return ret && ret != -EHWPOISON ? ret : 0; } +/* + * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one + * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages() + * bounds the worst-case allocation: exact for the PAGE_SIZE leaf, + * conservative by one for the block. + */ +static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu) +{ + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + + if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu)) + return -ENOMEM; + + return 0; +} + int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); @@ -1474,6 +1490,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu } } + ret = __guest_check_pgtable_memcache(vcpu); + if (ret) + goto unlock; + for_each_hyp_page(page, phys, size) { set_host_state(page, PKVM_PAGE_SHARED_OWNED); page->host_share_guest_count++; -- cgit v1.2.3 From effc0a39b8e0f30670fe24f51e44329d4324e566 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 1 May 2026 12:21:49 +0100 Subject: KVM: arm64: Pre-check vcpu memcache for host->guest donate __pkvm_host_donate_guest() flips the host stage-2 PTE for the donated page to a non-valid annotation via host_stage2_set_owner_metadata_locked() and then calls kvm_pgtable_stage2_map() to install the matching guest stage-2 mapping. The map's return value is wrapped in WARN_ON() and otherwise discarded, asserting that the call cannot fail. WARN_ON() at nVHE EL2 panics, so this assertion is only correct if the call genuinely cannot fail. kvm_pgtable_stage2_map() can fail with -ENOMEM even at PAGE_SIZE granularity: the donate path verifies PKVM_NOPAGE for the guest IPA before the map, so the walker must allocate fresh page-table pages from the vcpu memcache, and the host controls the vcpu memcache via the topup interface. An under-provisioned donation request would otherwise turn a recoverable -ENOMEM into a fatal hyp panic. Bound the worst-case walker allocation alongside the existing __host_check_page_state_range() / __guest_check_page_state_range() pre-checks, using the helper introduced for host->guest share. If the vcpu memcache holds fewer pages than kvm_mmu_cache_min_pages(), return -ENOMEM before any state mutation. Fixes: 1e579adca177 ("KVM: arm64: Introduce __pkvm_host_donate_guest()") Assisted-by: Gemini:gemini-3.1-pro review-prompts Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260501112149.2824881-7-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ffb123fc1145..25f04629014e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1425,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu) if (ret) goto unlock; + ret = __guest_check_pgtable_memcache(vcpu); + if (ret) + goto unlock; + meta = host_stage2_encode_gfn_meta(vm, gfn); WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE, PKVM_ID_GUEST, meta)); -- cgit v1.2.3 From 74570e12b4705ea11dcdfbfbd0a0b0fdaeff3059 Mon Sep 17 00:00:00 2001 From: Gyeyoung Baek Date: Sun, 19 Apr 2026 16:17:15 +0900 Subject: accel/rocket: Fix prep_bo ioctl leaking positive return from dma_resv_wait_timeout() dma_resv_wait_timeout() returns a positive 'remaining jiffies' value on success, 0 on timeout, and -errno on failure. rocket_ioctl_prep_bo() returns this 'long' result from an int-typed ioctl handler, so positive values reach userspace as bogus errors. Explicitly set ret to 0 on the success path. Fixes: 525ad89dd904 ("accel/rocket: Add IOCTLs for synchronizing memory accesses") Cc: stable@vger.kernel.org Signed-off-by: Gyeyoung Baek Reviewed-by: Tomeu Vizoso Link: https://patch.msgid.link/c0ebf83b345721701b22d8f5bc41c52c0ecf5e16.1776581974.git.gye976@gmail.com Signed-off-by: Steven Price --- drivers/accel/rocket/rocket_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c index b6a385d2edfc..c8084719208a 100644 --- a/drivers/accel/rocket/rocket_gem.c +++ b/drivers/accel/rocket/rocket_gem.c @@ -145,6 +145,8 @@ int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *fi ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_WRITE, true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; + else if (ret > 0) + ret = 0; shmem_obj = &to_rocket_bo(gem_obj)->base; -- cgit v1.2.3 From 459d75523b71c0ec254d153d8850d0b7008af396 Mon Sep 17 00:00:00 2001 From: Gyeyoung Baek Date: Sun, 19 Apr 2026 16:17:16 +0900 Subject: drm/panfrost: Fix wait_bo ioctl leaking positive return from dma_resv_wait_timeout() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dma_resv_wait_timeout() returns a positive 'remaining jiffies' value on success, 0 on timeout, and -errno on failure. panfrost_ioctl_wait_bo() returns this 'long' result from an int-typed ioctl handler, so positive values reach userspace as bogus errors. Explicitly set ret to 0 on the success path. Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") Cc: stable@vger.kernel.org Signed-off-by: Gyeyoung Baek Reviewed-by: Adrián Larumbe Reviewed-by: Boris Brezillon Reviewed-by: Steven Price Link: https://patch.msgid.link/fe33f82fded7be1c18e2e0eb2db451d5a738cf39.1776581974.git.gye976@gmail.com Signed-off-by: Steven Price --- drivers/gpu/drm/panfrost/panfrost_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 711f5101aa04..074c0995ddc2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -390,6 +390,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; + else if (ret > 0) + ret = 0; drm_gem_object_put(gem_obj); -- cgit v1.2.3 From a59e45221df82e8a6246c617615c1ccc12e3545d Mon Sep 17 00:00:00 2001 From: Haichen Feng <2806891994@qq.com> Date: Thu, 7 May 2026 22:02:42 +0800 Subject: platform/x86: hp-wmi: Add support for Victus 16-r0xxx (8BC2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Victus 16-r0xxx (board ID: 8BC2) has the same WMI as other Victus S boards, but requires quirks for correctly switching thermal profile. Add the DMI board name to victus_s_thermal_profile_boards[] table and map it to omen_v1_thermal_params. Testing on board 8BC2 confirmed that platform profile is registered successfully and fan RPMs are readable and controllable. Signed-off-by: Haichen Feng <2806891994@qq.com> Link: https://patch.msgid.link/tencent_8E29805D8DC7B6005244C3433C62DD9DF606@qq.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 24c151289dd3..6950bec2a9d8 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -205,6 +205,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BBE") }, .driver_data = (void *)&victus_s_thermal_params, }, + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BC2") }, + .driver_data = (void *)&omen_v1_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCA") }, .driver_data = (void *)&omen_v1_thermal_params, -- cgit v1.2.3 From 08f566e8f83bb70f04ad5aba5be352c490a01c8a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 5 May 2026 15:21:53 +0200 Subject: veth: fix OOB txq access in veth_poll() with asymmetric queue counts XDP redirect into a veth device (via bpf_redirect()) calls veth_xdp_xmit(), which enqueues frames into the peer's ptr_ring using smp_processor_id() % peer->real_num_rx_queues as the ring index. With an asymmetric veth pair where the peer has fewer TX queues than RX queues, that index can exceed peer->real_num_tx_queues. veth_poll() then resolves peer_txq for the ring via: peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; where queue_idx = rq->xdp_rxq.queue_index. When queue_idx exceeds peer_dev->real_num_tx_queues this is an out-of-bounds (OOB) access into the peer's netdev_queue array, triggering DEBUG_NET_WARN_ON_ONCE in netdev_get_tx_queue(). The normal ndo_start_xmit path is not affected: the stack clamps skb->queue_mapping via netdev_cap_txqueue() before invoking ndo_start_xmit, so rxq in veth_xmit() never exceeds real_num_tx_queues. Fix veth_poll() by clamping: only dereference peer_txq when queue_idx is within bounds, otherwise set it to NULL. The out-of-range rings are fed exclusively via XDP redirect (veth_xdp_xmit), never via ndo_start_xmit (veth_xmit), so the peer txq was never stopped and there is nothing to wake; NULL is the correct fallback. Reported-by: Sashiko Closes: https://lore.kernel.org/all/20260502071828.616C3C19425@smtp.kernel.org/ Fixes: dc82a33297fc ("veth: apply qdisc backpressure on full ptr_ring to reduce TX drops") Signed-off-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20260505132159.241305-2-hawk@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/veth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e35df717e65e..0cfb19b760dd 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -972,7 +972,8 @@ static int veth_poll(struct napi_struct *napi, int budget) /* NAPI functions as RCU section */ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); - peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; + peer_txq = (peer_dev && queue_idx < peer_dev->real_num_tx_queues) ? + netdev_get_tx_queue(peer_dev, queue_idx) : NULL; xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats); -- cgit v1.2.3 From aa2fbece1b07954ef26488c800d126a36a8ab93e Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Tue, 28 Apr 2026 16:01:39 +0800 Subject: ALSA: hda: cs35l56: Put ACPI device after setting companion acpi_dev_get_first_match_dev() returns a refcounted ACPI device and callers are expected to balance it with acpi_dev_put(). When no companion is already attached, cs35l56_hda_read_acpi() looks up an ACPI device and sets it with ACPI_COMPANION_SET(), but leaves the lookup reference held. ACPI_COMPANION_SET() does not take ownership of that reference, so drop it with acpi_dev_put() after attaching the companion. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Signed-off-by: Shuhao Fu Tested-by: Simon Trimmer Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260428080139.GA1649104@chcpu16 --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index 4c8d01799931..cdbc576569ef 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -1041,6 +1041,7 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) return -ENODEV; } ACPI_COMPANION_SET(cs35l56->base.dev, adev); + acpi_dev_put(adev); } /* Initialize things that could be overwritten by a fixup */ -- cgit v1.2.3 From fca7401fe37f7abc6e54147ea560f37279231137 Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Tue, 28 Apr 2026 16:12:38 +0800 Subject: ALSA: hda: cs35l41: Put ACPI device on missing physical node acpi_dev_get_first_match_dev() returns a refcounted ACPI device and callers must balance it with acpi_dev_put(). cs35l41_hda_read_acpi() stores the returned ACPI device in cs35l41->dacpi. That reference is normally released by the later probe cleanup or the remove path, but the NULL-check on physdev exits before either of those paths can run. Drop the lookup reference before returning -ENODEV. Fixes: c34b04cc6178 ("ALSA: hda: cs35l41: Fix NULL pointer dereference in cs35l41_hda_read_acpi()") Signed-off-by: Shuhao Fu Tested-by: Simon Trimmer Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260428081238.GA1659932@chcpu16 --- sound/hda/codecs/side-codecs/cs35l41_hda.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/cs35l41_hda.c b/sound/hda/codecs/side-codecs/cs35l41_hda.c index b64890006bb7..acfccc848f82 100644 --- a/sound/hda/codecs/side-codecs/cs35l41_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l41_hda.c @@ -1896,8 +1896,10 @@ static int cs35l41_hda_read_acpi(struct cs35l41_hda *cs35l41, const char *hid, i cs35l41->dacpi = adev; physdev = get_device(acpi_get_first_physical_node(adev)); - if (!physdev) + if (!physdev) { + acpi_dev_put(adev); return -ENODEV; + } sub = acpi_get_subsystem_id(ACPI_HANDLE(physdev)); if (IS_ERR(sub)) -- cgit v1.2.3 From d119775f2bad827edc28071c061fdd4a91f889a5 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Wed, 6 May 2026 22:08:23 +0800 Subject: af_unix: Reject SIOCATMARK on non-stream sockets SIOCATMARK reports whether the receive queue is at the urgent mark for MSG_OOB. In AF_UNIX, MSG_OOB is supported only for SOCK_STREAM sockets. SOCK_DGRAM and SOCK_SEQPACKET reject MSG_OOB in sendmsg() and recvmsg(), so they should not support SIOCATMARK either. Return -EOPNOTSUPP for non-stream sockets before checking the receive queue. Fixes: 314001f0bf92 ("af_unix: Add OOB support") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Suggested-by: Kuniyuki Iwashima Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260506140825.2987635-1-n05ec@lzu.edu.cn Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d787ca3e74..1cbf36ea043b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3323,6 +3323,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sk_buff *skb; int answ = 0; + if (sk->sk_type != SOCK_STREAM) + return -EOPNOTSUPP; + mutex_lock(&u->iolock); skb = skb_peek(&sk->sk_receive_queue); -- cgit v1.2.3 From 9032f7676935a13fd402608223d326c5f62da9c0 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 6 May 2026 09:41:05 +0800 Subject: net/smc: fix missing sk_err when TCP handshake fails In smc_connect_work(), when the underlying TCP handshake fails, the error code (rc) must be propagated to sk_err to ensure userspace can correctly retrieve the error status via SO_ERROR. Currently, the code only handles a restricted set of error codes (e.g., EPIPE, ECONNREFUSED). If other errors occurs, such as EHOSTUNREACH, sk_err remains unset (zero). This affects applications that rely on SO_ERROR to determine connect outcome. For example, higher versions of Go's netpoller treats SO_ERROR == 0 combined with a failed getpeername() as a spurious wakeup and re-enters epoll_wait(). Under ET mode, no further edge will be generated since the socket is already in a terminal state, causing the connect to hang indefinitely or until a user-specified timeout, if one is set. Fixes: 50717a37db03 ("net/smc: nonblocking connect rework") Signed-off-by: D. Wythe Reviewed-by: Dust Li Link: https://patch.msgid.link/20260506014105.27093-1-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1a565095376a..185dbed7de5d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1628,12 +1628,8 @@ static void smc_connect_work(struct work_struct *work) lock_sock(&smc->sk); if (rc != 0 || smc->sk.sk_err) { smc->sk.sk_state = SMC_CLOSED; - if (rc == -EPIPE || rc == -EAGAIN) - smc->sk.sk_err = EPIPE; - else if (rc == -ECONNREFUSED) - smc->sk.sk_err = ECONNREFUSED; - else if (signal_pending(current)) - smc->sk.sk_err = -sock_intr_errno(timeo); + if (!smc->sk.sk_err) + smc->sk.sk_err = (rc == -EAGAIN) ? EPIPE : -rc; sock_put(&smc->sk); /* passive closing */ goto out; } -- cgit v1.2.3 From 32cd651d14fc72a93703ea2384cb5cd8998523a8 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Tue, 5 May 2026 10:39:26 -0700 Subject: net: phy: broadcom: Save PHY counters during suspend The PHY counters can be lost if the PHY is reset during suspend. We need to save the values into the shadow counters or the accounting will be incorrect over multiple suspend and resume cycles. Fixes: 820ee17b8d3b ("net: phy: broadcom: Add support code for reading PHY counters") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20260505173926.2870069-1-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm-phy-lib.c | 9 +++++++++ drivers/net/phy/bcm-phy-lib.h | 1 + drivers/net/phy/bcm7xxx.c | 14 ++++++++++++++ drivers/net/phy/broadcom.c | 5 +++++ 4 files changed, 29 insertions(+) diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 5198d66dbbc0..b64beade8dd9 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -563,6 +563,15 @@ void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, } EXPORT_SYMBOL_GPL(bcm_phy_get_stats); +void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++) + bcm_phy_get_stat(phydev, shadow, i); +} +EXPORT_SYMBOL_GPL(bcm_phy_update_stats_shadow); + void bcm_phy_r_rc_cal_reset(struct phy_device *phydev) { /* Reset R_CAL/RC_CAL Engine */ diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index bceddbc860eb..bba94ce96195 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -85,6 +85,7 @@ int bcm_phy_get_sset_count(struct phy_device *phydev); void bcm_phy_get_strings(struct phy_device *phydev, u8 *data); void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, struct ethtool_stats *stats, u64 *data); +void bcm_phy_update_stats_shadow(struct phy_device *phydev, u64 *shadow); void bcm_phy_r_rc_cal_reset(struct phy_device *phydev); int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev); int bcm_phy_enable_jumbo(struct phy_device *phydev); diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 00e8fa14aa77..71a163f62c0e 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -807,6 +807,17 @@ static void bcm7xxx_28nm_get_phy_stats(struct phy_device *phydev, bcm_phy_get_stats(phydev, priv->stats, stats, data); } +static int bcm7xxx_28nm_suspend(struct phy_device *phydev) +{ + struct bcm7xxx_phy_priv *priv = phydev->priv; + + mutex_lock(&phydev->lock); + bcm_phy_update_stats_shadow(phydev, priv->stats); + mutex_unlock(&phydev->lock); + + return genphy_suspend(phydev); +} + static int bcm7xxx_28nm_probe(struct phy_device *phydev) { struct bcm7xxx_phy_priv *priv; @@ -849,6 +860,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_config_init, \ .resume = bcm7xxx_28nm_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ .get_tunable = bcm7xxx_28nm_get_tunable, \ .set_tunable = bcm7xxx_28nm_set_tunable, \ .get_sset_count = bcm_phy_get_sset_count, \ @@ -866,6 +878,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .flags = PHY_IS_INTERNAL, \ .config_init = bcm7xxx_28nm_ephy_config_init, \ .resume = bcm7xxx_28nm_ephy_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ .get_sset_count = bcm_phy_get_sset_count, \ .get_strings = bcm_phy_get_strings, \ .get_stats = bcm7xxx_28nm_get_phy_stats, \ @@ -902,6 +915,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .config_aneg = genphy_config_aneg, \ .read_status = genphy_read_status, \ .resume = bcm7xxx_16nm_ephy_resume, \ + .suspend = bcm7xxx_28nm_suspend, \ } static struct phy_driver bcm7xxx_driver[] = { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index bf0c6a04481e..d1a4edb34ad2 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -592,8 +592,13 @@ static int bcm54xx_set_wakeup_irq(struct phy_device *phydev, bool state) static int bcm54xx_suspend(struct phy_device *phydev) { + struct bcm54xx_phy_priv *priv = phydev->priv; int ret = 0; + mutex_lock(&phydev->lock); + bcm_phy_update_stats_shadow(phydev, priv->stats); + mutex_unlock(&phydev->lock); + bcm54xx_ptp_stop(phydev); /* Acknowledge any Wake-on-LAN interrupt prior to suspend */ -- cgit v1.2.3 From 019c892e46544af0ae94ec833f79aa903c837666 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 May 2026 06:59:53 +0000 Subject: ipmr: Call ipmr_fib_lookup() under RCU. Yi Lai reported RCU splat in reg_vif_xmit() below. [0] When CONFIG_IP_MROUTE_MULTIPLE_TABLES=n, ipmr_fib_lookup() uses rcu_dereference() without explicit rcu_read_lock(). Although rcu_read_lock_bh() is already held by the caller __dev_queue_xmit(), lockdep requires explicit rcu_read_lock() for rcu_dereference(). Let's move up rcu_read_lock() in reg_vif_xmit() to cover ipmr_fib_lookup(). [0]: WARNING: suspicious RCU usage 7.1.0-rc2-next-20260504-9d0d467c3572 #1 Not tainted ----------------------------- net/ipv4/ipmr.c:329 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by syz.2.17/1779: #0: ffffffff87896440 (rcu_read_lock_bh){....}-{1:3}, at: local_bh_disable include/linux/bottom_half.h:20 [inline] #0: ffffffff87896440 (rcu_read_lock_bh){....}-{1:3}, at: rcu_read_lock_bh include/linux/rcupdate.h:891 [inline] #0: ffffffff87896440 (rcu_read_lock_bh){....}-{1:3}, at: __dev_queue_xmit+0x239/0x4140 net/core/dev.c:4792 #1: ffff88801a199d18 (_xmit_PIMREG#2){+...}-{3:3}, at: spin_lock include/linux/spinlock.h:342 [inline] #1: ffff88801a199d18 (_xmit_PIMREG#2){+...}-{3:3}, at: __netif_tx_lock include/linux/netdevice.h:4795 [inline] #1: ffff88801a199d18 (_xmit_PIMREG#2){+...}-{3:3}, at: __dev_queue_xmit+0x1d5d/0x4140 net/core/dev.c:4865 stack backtrace: CPU: 1 UID: 0 PID: 1779 Comm: syz.2.17 Not tainted 7.1.0-rc2-next-20260504-9d0d467c3572 #1 PREEMPT(lazy) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x121/0x150 lib/dump_stack.c:120 dump_stack+0x19/0x20 lib/dump_stack.c:129 lockdep_rcu_suspicious+0x15b/0x1f0 kernel/locking/lockdep.c:6878 ipmr_fib_lookup net/ipv4/ipmr.c:329 [inline] reg_vif_xmit+0x2ee/0x3c0 net/ipv4/ipmr.c:540 __netdev_start_xmit include/linux/netdevice.h:5382 [inline] netdev_start_xmit include/linux/netdevice.h:5391 [inline] xmit_one net/core/dev.c:3889 [inline] dev_hard_start_xmit+0x170/0x700 net/core/dev.c:3905 __dev_queue_xmit+0x1df1/0x4140 net/core/dev.c:4871 dev_queue_xmit include/linux/netdevice.h:3423 [inline] packet_xmit+0x252/0x370 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3082 [inline] packet_sendmsg+0x39ad/0x5650 net/packet/af_packet.c:3114 sock_sendmsg_nosec net/socket.c:797 [inline] __sock_sendmsg net/socket.c:812 [inline] ____sys_sendmsg+0xa21/0xba0 net/socket.c:2716 ___sys_sendmsg+0x121/0x1c0 net/socket.c:2770 __sys_sendmsg+0x177/0x220 net/socket.c:2802 __do_sys_sendmsg net/socket.c:2807 [inline] __se_sys_sendmsg net/socket.c:2805 [inline] __x64_sys_sendmsg+0x80/0xc0 net/socket.c:2805 x64_sys_call+0x1d9c/0x21c0 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xc1/0x1020 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f37e563ee5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 93 af 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe5caa7fa8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000005c5fa0 RCX: 00007f37e563ee5d RDX: 0000000000000000 RSI: 00002000000012c0 RDI: 0000000000000004 RBP: 00000000005c5fa0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00000000005c5fac R15: 00000000005c5fa0 Fixes: b3b6babf4751 ("ipmr: Free mr_table after RCU grace period.") Reported-by: syzkaller Reported-by: Yi Lai Closes: https://lore.kernel.org/netdev/afrY34dLXNUboevf@ly-workstation/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260506065955.1695753-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 05fb6eefe0be..2628cd3a93a6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -537,15 +537,16 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) }; int err; + rcu_read_lock(); err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { + rcu_read_unlock(); kfree_skb(skb); return err; } DEV_STATS_ADD(dev, tx_bytes, skb->len); DEV_STATS_INC(dev, tx_packets); - rcu_read_lock(); /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), -- cgit v1.2.3 From ecddc523cfdb85b3e132f13e293224ebfdfab564 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 May 2026 07:04:42 +0000 Subject: tcp: Fix dst leak in tcp_v6_connect(). If a socket is bound to a wildcard address, tcp_v[46]_connect() updates it with a non-wildcard address based on the route lookup. After bhash2 was introduced in the cited commit, we must call inet_bhash2_update_saddr() to update the bhash2 entry as well. If inet_bhash2_update_saddr() fails, we must release the refcount for dst by ip_route_connect() or ip6_dst_lookup_flow(). While tcp_v4_connect() calls ip_rt_put() in the error path, tcp_v6_connect() does not call dst_release(). Let's call dst_release() when inet_bhash2_update_saddr() fails in tcp_v6_connect(). Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Reported-by: Damiano Melotti Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260506070443.1699879-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 51583aef0643..d13d49bfef19 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -288,8 +288,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, saddr = &fl6->saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); - if (err) + if (err) { + dst_release(dst); goto failure; + } } /* set the source address */ -- cgit v1.2.3 From dedf6c90386d99b878763c183a08b61d3ce4824e Mon Sep 17 00:00:00 2001 From: Joey Lu Date: Wed, 6 May 2026 16:46:13 +0800 Subject: net: stmmac: dwmac-nuvoton: fix NULL pointer dereference in nvt_set_phy_intf_sel() priv->dev was never initialized after devm_kzalloc() allocates the private data structure. When nvt_set_phy_intf_sel() is later invoked via the phylink interface_select callback, it calls nvt_gmac_get_delay(priv->dev, ...) which dereferences the NULL pointer. Fix this by assigning priv->dev = dev immediately after allocation. Fixes: 4d7c557f58ef ("net: stmmac: dwmac-nuvoton: Add dwmac glue for Nuvoton MA35 family") Signed-off-by: Joey Lu Link: https://patch.msgid.link/20260506084614.192894-2-a0987203069@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c index e2240b68ad98..2ab6ecac6422 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c @@ -100,6 +100,8 @@ static int nvt_gmac_probe(struct platform_device *pdev) if (!priv) return dev_err_probe(dev, -ENOMEM, "Failed to allocate private data\n"); + priv->dev = dev; + priv->regmap = syscon_regmap_lookup_by_phandle_args(dev->of_node, "nuvoton,sys", 1, &priv->macid); if (IS_ERR(priv->regmap)) -- cgit v1.2.3 From b131dc93f7bf1b1461f5bde0c06c4c2384aa5b58 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Wed, 6 May 2026 09:25:38 +0200 Subject: net: sparx5: fix wrong chip ids for TSN SKUs The TSN SKUs in enum spx5_target_chiptype have incorrect IDs: SPX5_TARGET_CT_7546TSN = 0x47546, SPX5_TARGET_CT_7549TSN = 0x47549, SPX5_TARGET_CT_7552TSN = 0x47552, SPX5_TARGET_CT_7556TSN = 0x47556, SPX5_TARGET_CT_7558TSN = 0x47558, The value read back from the chip is GCB_CHIP_ID_PART_ID, which is a GENMASK(27, 12) field, i.e. at most 16 bits wide. It can never match these IDs, so probing a TSN part fails with a "Target not supported" error. Fix the enum to use the actual 16-bit part IDs returned by the hardware: 0x0546, 0x0549, 0x0552, 0x0556 and 0x0558. Reported-by: Andrew Lunn Fixes: 3cfa11bac9bb ("net: sparx5: add the basic sparx5 driver") Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260506-misc-fixes-sparx5-lan969x-v2-3-fb236aa96908@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6a745bb71b5c..eb57b86fbe22 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -31,11 +31,11 @@ enum spx5_target_chiptype { SPX5_TARGET_CT_7552 = 0x7552, /* SparX-5-128 Enterprise */ SPX5_TARGET_CT_7556 = 0x7556, /* SparX-5-160 Enterprise */ SPX5_TARGET_CT_7558 = 0x7558, /* SparX-5-200 Enterprise */ - SPX5_TARGET_CT_7546TSN = 0x47546, /* SparX-5-64i Industrial */ - SPX5_TARGET_CT_7549TSN = 0x47549, /* SparX-5-90i Industrial */ - SPX5_TARGET_CT_7552TSN = 0x47552, /* SparX-5-128i Industrial */ - SPX5_TARGET_CT_7556TSN = 0x47556, /* SparX-5-160i Industrial */ - SPX5_TARGET_CT_7558TSN = 0x47558, /* SparX-5-200i Industrial */ + SPX5_TARGET_CT_7546TSN = 0x0546, /* SparX-5-64i Industrial */ + SPX5_TARGET_CT_7549TSN = 0x0549, /* SparX-5-90i Industrial */ + SPX5_TARGET_CT_7552TSN = 0x0552, /* SparX-5-128i Industrial */ + SPX5_TARGET_CT_7556TSN = 0x0556, /* SparX-5-160i Industrial */ + SPX5_TARGET_CT_7558TSN = 0x0558, /* SparX-5-200i Industrial */ SPX5_TARGET_CT_LAN9694 = 0x9694, /* lan969x-40 */ SPX5_TARGET_CT_LAN9691VAO = 0x9691, /* lan969x-40-VAO */ SPX5_TARGET_CT_LAN9694TSN = 0x9695, /* lan969x-40-TSN */ -- cgit v1.2.3 From 41ae14071cd7f6a7770e2fe1f8a0859d4c2c6ba4 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Wed, 6 May 2026 09:25:39 +0200 Subject: net: sparx5: configure serdes for 1000BASE-X in sparx5_port_init() sparx5_port_init() only invokes sparx5_serdes_set() and the associated shadow-device enable and low-speed device switch for SGMII and QSGMII. On any port with a high-speed primary device (DEV5G/DEV10G/DEV25G) configured for 1000BASE-X the serdes is therefore left uninitialized, the DEV2G5 shadow is never enabled, and the port stays pointed at its high-speed device rather than the DEV2G5. The PCS1G block looks healthy in isolation, but no frames reach the link partner. Add 1000BASE-X to the check so the same three steps run. Note: the same issue might apply to 2500BASE-X, but that will, eventually, be addressed in a separate commit. Reported-by: Andrew Lunn Fixes: 946e7fd5053a ("net: sparx5: add port module support") Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260506-misc-fixes-sparx5-lan969x-v2-4-fb236aa96908@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 04bc8fffaf96..62c49893de3c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -1128,7 +1128,8 @@ int sparx5_port_init(struct sparx5 *sparx5, DEV2G5_PCS1G_SD_CFG(port->portno)); if (conf->portmode == PHY_INTERFACE_MODE_QSGMII || - conf->portmode == PHY_INTERFACE_MODE_SGMII) { + conf->portmode == PHY_INTERFACE_MODE_SGMII || + conf->portmode == PHY_INTERFACE_MODE_1000BASEX) { err = sparx5_serdes_set(sparx5, port, conf); if (err) return err; -- cgit v1.2.3 From 74d695fd6f9d70df849c555f358ddfd26e2d85bf Mon Sep 17 00:00:00 2001 From: "Uwe Kleine-König (The Capable Hub)" Date: Thu, 7 May 2026 18:00:51 +0200 Subject: Input: fm801-gp - simplify initialisation of pci_device_id array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of assigning the pci_device_id members using a list (which is hard to read as you need to look at the order of the members in that struct in parallel) use the PCI_VDEVICE() convenience macro to compact the initialisation while improving readability. Also drop trailing zeros that the compiler will care about then. The change doesn't introduce binary changes to the compiled driver, verified on both ARCH=x86 and ARCH=arm64. Signed-off-by: Uwe Kleine-König (The Capable Hub) Link: https://patch.msgid.link/20260507160051.3315630-2-u.kleine-koenig@baylibre.com Signed-off-by: Dmitry Torokhov --- drivers/input/gameport/fm801-gp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/gameport/fm801-gp.c b/drivers/input/gameport/fm801-gp.c index 423cccdea34f..1e8c6c044844 100644 --- a/drivers/input/gameport/fm801-gp.c +++ b/drivers/input/gameport/fm801-gp.c @@ -125,8 +125,8 @@ static void fm801_gp_remove(struct pci_dev *pci) } static const struct pci_device_id fm801_gp_id_table[] = { - { PCI_VENDOR_ID_FORTEMEDIA, PCI_DEVICE_ID_FM801_GP, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, - { 0 } + { PCI_VDEVICE(FORTEMEDIA, PCI_DEVICE_ID_FM801_GP) }, + { } }; MODULE_DEVICE_TABLE(pci, fm801_gp_id_table); -- cgit v1.2.3 From baa0210fb6a9dc3882509a9411b6d284d88fe30e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 4 May 2026 11:54:45 -0700 Subject: Input: atmel_mxt_ts - fix boundary check in mxt_prepare_cfg_mem When a configuration file provides an object size that is larger than the driver's known mxt_obj_size(object), the driver intends to discard the extra bytes. The loop iterates using for (i = 0; i < size; i++). Inside the loop, the condition to skip processing extra bytes is: if (i > mxt_obj_size(object)) continue; Since i is a 0-based index, the valid indices for the object are 0 through mxt_obj_size(object) - 1. When i == mxt_obj_size(object), the condition evaluates to false, and the code processes the byte instead of discarding it. This causes the code to calculate byte_offset = reg + i - cfg->start_ofs and writes the byte there, overwriting exactly one byte of the adjacent instance or object. Update the boundary check to skip extra bytes correctly by using >=. Fixes: 50a77c658b80 ("Input: atmel_mxt_ts - download device config using firmware loader") Cc: stable@vger.kernel.org Assisted-by: Gemini:gemini-3.1-pro Reviewed-by: Ricardo Ribalda Link: https://patch.msgid.link/20260504185448.4055973-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 87c6a10381f2..fad1b3f4138b 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1473,7 +1473,7 @@ static int mxt_prepare_cfg_mem(struct mxt_data *data, struct mxt_cfg *cfg) } cfg->raw_pos += offset; - if (i > mxt_obj_size(object)) + if (i >= mxt_obj_size(object)) continue; byte_offset = reg + i - cfg->start_ofs; -- cgit v1.2.3 From a5fd88a5d63f812422e69682f3cb663d9d7f3e9c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 4 May 2026 11:54:46 -0700 Subject: Input: atmel_mxt_ts - check mem_size before calculating config memory size In mxt_update_cfg(), the driver calculates the memory size needed to store the configuration as data->mem_size - cfg.start_ofs. If data->mem_size is less than or equal to cfg.start_ofs, this calculation will underflow or result in a zero-size buffer, neither of which is valid for a configuration update. Add a check to return -EINVAL if data->mem_size is too small. While at it, change the types of start_ofs and mem_size in struct mxt_cfg to u16 to match the device address space. Assisted-by: Gemini:gemini-3.1-pro Link: https://patch.msgid.link/20260504185448.4055973-2-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index fad1b3f4138b..f21bf2844112 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -275,8 +275,8 @@ struct mxt_cfg { off_t raw_pos; u8 *mem; - size_t mem_size; - int start_ofs; + u16 mem_size; + u16 start_ofs; struct mxt_info info; }; @@ -1627,6 +1627,13 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *fw) cfg.start_ofs = MXT_OBJECT_START + data->info->object_num * sizeof(struct mxt_object) + MXT_INFO_CHECKSUM_SIZE; + + if (data->mem_size <= cfg.start_ofs) { + dev_err(dev, "Memory size too small: %u < %u\n", + data->mem_size, cfg.start_ofs); + return -EINVAL; + } + cfg.mem_size = data->mem_size - cfg.start_ofs; u8 *mem_buf __free(kfree) = cfg.mem = kzalloc(cfg.mem_size, GFP_KERNEL); -- cgit v1.2.3 From 5be7a0cef3229fb3b63a07c0d289daf752545424 Mon Sep 17 00:00:00 2001 From: Piyush Sachdeva Date: Thu, 7 May 2026 22:22:13 +0530 Subject: smb: client: Use FullSessionKey for AES-256 encryption key derivation When Kerberos authentication is used with AES-256 encryption (AES-256-CCM or AES-256-GCM), the SMB3 encryption and decryption keys must be derived using the full session key (Session.FullSessionKey) rather than just the first 16 bytes (Session.SessionKey). Per MS-SMB2 section 3.2.5.3.1, when Connection.Dialect is "3.1.1" and Connection.CipherId is AES-256-CCM or AES-256-GCM, Session.FullSessionKey must be set to the full cryptographic key from the GSS authentication context. The encryption and decryption key derivation (SMBC2SCipherKey, SMBS2CCipherKey) must use this FullSessionKey as the KDF input. The signing key derivation continues to use Session.SessionKey (first 16 bytes) in all cases. Previously, generate_key() hardcoded SMB2_NTLMV2_SESSKEY_SIZE (16) as the HMAC-SHA256 key input length for all derivations. When Kerberos with AES-256 provides a 32-byte session key, the KDF for encryption/decryption was using only the first 16 bytes, producing keys that did not match the server's, causing mount failures with sec=krb5 and require_gcm_256=1. Add a full_key_size parameter to generate_key() and pass the appropriate size from generate_smb3signingkey(): - Signing: always SMB2_NTLMV2_SESSKEY_SIZE (16 bytes) - Encryption/Decryption: ses->auth_key.len when AES-256, otherwise 16 Also fix cifs_dump_full_key() to report the actual session key length for AES-256 instead of hardcoded CIFS_SESS_KEY_SIZE, so that userspace tools like Wireshark receive the correct key for decryption. Cc: Reviewed-by: Bharath SM Signed-off-by: Piyush Sachdeva Signed-off-by: Piyush Sachdeva Signed-off-by: Steve French --- fs/smb/client/ioctl.c | 2 +- fs/smb/client/smb2transport.c | 35 ++++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 9afab3237e54..17408bb8ab65 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -296,7 +296,7 @@ search_end: break; case SMB2_ENCRYPTION_AES256_CCM: case SMB2_ENCRYPTION_AES256_GCM: - out.session_key_length = CIFS_SESS_KEY_SIZE; + out.session_key_length = ses->auth_key.len; out.server_in_key_length = out.server_out_key_length = SMB3_GCM256_CRYPTKEY_SIZE; break; default: diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 41009039b4cb..e8eeff9e50d6 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -251,7 +251,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } static void generate_key(struct cifs_ses *ses, struct kvec label, - struct kvec context, __u8 *key, unsigned int key_size) + struct kvec context, __u8 *key, unsigned int key_size, + unsigned int full_key_size) { unsigned char zero = 0x0; __u8 i[4] = {0, 0, 0, 1}; @@ -265,7 +266,7 @@ static void generate_key(struct cifs_ses *ses, struct kvec label, memset(key, 0x0, key_size); hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response, - SMB2_NTLMV2_SESSKEY_SIZE); + full_key_size); hmac_sha256_update(&hmac_ctx, i, 4); hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len); hmac_sha256_update(&hmac_ctx, &zero, 1); @@ -298,6 +299,7 @@ generate_smb3signingkey(struct cifs_ses *ses, struct TCP_Server_Info *server, const struct derivation_triplet *ptriplet) { + unsigned int full_key_size = SMB2_NTLMV2_SESSKEY_SIZE; bool is_binding = false; int chan_index = 0; @@ -330,12 +332,24 @@ generate_smb3signingkey(struct cifs_ses *ses, if (is_binding) { generate_key(ses, ptriplet->signing.label, ptriplet->signing.context, - ses->chans[chan_index].signkey, - SMB3_SIGN_KEY_SIZE); + ses->chans[chan_index].signkey, SMB3_SIGN_KEY_SIZE, + SMB2_NTLMV2_SESSKEY_SIZE); } else { generate_key(ses, ptriplet->signing.label, - ptriplet->signing.context, - ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); + ptriplet->signing.context, ses->smb3signingkey, + SMB3_SIGN_KEY_SIZE, SMB2_NTLMV2_SESSKEY_SIZE); + + /* + * Per MS-SMB2 3.2.5.3.1, signing key always uses Session.SessionKey + * (first 16 bytes). Encryption/decryption keys use + * Session.FullSessionKey when dialect is 3.1.1 and cipher is + * AES-256-CCM or AES-256-GCM, otherwise Session.SessionKey. + */ + + if (server->dialect == SMB311_PROT_ID && + (server->cipher_type == SMB2_ENCRYPTION_AES256_CCM || + server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) + full_key_size = ses->auth_key.len; /* safe to access primary channel, since it will never go away */ spin_lock(&ses->chan_lock); @@ -345,10 +359,13 @@ generate_smb3signingkey(struct cifs_ses *ses, generate_key(ses, ptriplet->encryption.label, ptriplet->encryption.context, - ses->smb3encryptionkey, SMB3_ENC_DEC_KEY_SIZE); + ses->smb3encryptionkey, SMB3_ENC_DEC_KEY_SIZE, + full_key_size); + generate_key(ses, ptriplet->decryption.label, ptriplet->decryption.context, - ses->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE); + ses->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE, + full_key_size); } #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS @@ -361,7 +378,7 @@ generate_smb3signingkey(struct cifs_ses *ses, &ses->Suid); cifs_dbg(VFS, "Cipher type %d\n", server->cipher_type); cifs_dbg(VFS, "Session Key %*ph\n", - SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response); + (int)ses->auth_key.len, ses->auth_key.response); cifs_dbg(VFS, "Signing Key %*ph\n", SMB3_SIGN_KEY_SIZE, ses->smb3signingkey); if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || -- cgit v1.2.3 From 8cb6fc3231500233ddaf63cb7fd5435008d9ed5f Mon Sep 17 00:00:00 2001 From: Piyush Sachdeva Date: Thu, 7 May 2026 22:22:14 +0530 Subject: smb: client: Zero-pad short GSS session keys per MS-SMB2 Per MS-SMB2 section 3.2.5.3, Session.SessionKey is the first 16 bytes of the GSS cryptographic key, right-padded with zero bytes if the key is shorter than 16 bytes. SMB2_auth_kerberos() copies the GSS session key from the cifs.upcall response using kmemdup(msg->data, msg->sesskey_len, ...) and stores the GSS-reported length verbatim in ses->auth_key.len. generate_key() reads SMB2_NTLMV2_SESSKEY_SIZE bytes from this buffer when feeding the HMAC-SHA256 KDF for signing key derivation. If a GSS mechanism returns a session key shorter than 16 bytes (e.g. a deprecated single-DES Kerberos enctype with an 8-byte session key), the KDF call performs an out-of-bounds slab read and derives keys that do not match the server, which pads per the spec. Modern KDCs disable short-key enctypes by default, so this is latent rather than reachable in production, but it is still a kernel heap over-read. Allocate auth_key.response with kzalloc() at a length of max(msg->sesskey_len, SMB2_NTLMV2_SESSKEY_SIZE), copy the GSS key in, and rely on kzalloc()'s zero initialization for the spec-mandated padding. Set ses->auth_key.len to the padded length. Larger GSS keys (e.g. the 32-byte aes256-cts-hmac-sha1-96 session key) continue to be stored at their natural length, preserving the FullSessionKey path. Emit a cifs_dbg(VFS, ...) message when a short key is encountered to surface deprecated-enctype usage. NTLMv2 and NTLMSSP code paths produce a 16-byte session key by construction and are unaffected. Signed-off-by: Piyush Sachdeva Signed-off-by: Piyush Sachdeva Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index cb61051f9af3..995fcdd30681 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1713,17 +1713,30 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) is_binding = (ses->ses_status == SES_GOOD); spin_unlock(&ses->ses_lock); + /* + * Per MS-SMB2 3.2.5.3, Session.SessionKey is the first 16 bytes of the + * GSS cryptographic key, right-padded with zero bytes if shorter. + * Allocate at least SMB2_NTLMV2_SESSKEY_SIZE bytes (zeroed) so the KDF + * input buffer is always valid for HMAC-SHA256 even with deprecated + * Kerberos enctypes that return a short session key. + */ + if (unlikely(msg->sesskey_len < SMB2_NTLMV2_SESSKEY_SIZE)) + cifs_dbg(VFS, + "short GSS session key (%u bytes); zero-padding per MS-SMB2 3.2.5.3\n", + msg->sesskey_len); + kfree_sensitive(ses->auth_key.response); - ses->auth_key.response = kmemdup(msg->data, - msg->sesskey_len, - GFP_KERNEL); + ses->auth_key.len = max_t(unsigned int, msg->sesskey_len, + SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { cifs_dbg(VFS, "%s: can't allocate (%u bytes) memory\n", - __func__, msg->sesskey_len); + __func__, ses->auth_key.len); + ses->auth_key.len = 0; rc = -ENOMEM; goto out_put_spnego_key; } - ses->auth_key.len = msg->sesskey_len; + memcpy(ses->auth_key.response, msg->data, msg->sesskey_len); sess_data->iov[1].iov_base = msg->data + msg->sesskey_len; sess_data->iov[1].iov_len = msg->secblob_len; -- cgit v1.2.3 From d62b8d236fab503c6fec1d3e9a38bea71feaca20 Mon Sep 17 00:00:00 2001 From: Zisen Ye Date: Sat, 2 May 2026 18:48:36 +0800 Subject: smb/client: fix out-of-bounds read in symlink_data() Since smb2_check_message() returns success without length validation for the symlink error response, in symlink_data() it is possible for iov->iov_len to be smaller than sizeof(struct smb2_err_rsp). If the buffer only contains the base SMB2 header (64 bytes), accessing err->ErrorContextCount (at offset 66) or err->ByteCount later in symlink_data() will cause an out-of-bounds read. Link: https://lore.kernel.org/linux-cifs/297d8d9b-adf7-42fd-a1c2-5b1f230032bc@chenxiaosong.com/ Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Cc: Stable@vger.kernel.org Signed-off-by: Zisen Ye Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2misc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 973fce3c959c..2a7355ce1a07 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -241,7 +241,8 @@ smb2_check_message(char *buf, unsigned int pdu_len, unsigned int len, if (len != calc_len) { /* create failed on symlink */ if (command == SMB2_CREATE_HE && - shdr->Status == STATUS_STOPPED_ON_SYMLINK) + shdr->Status == STATUS_STOPPED_ON_SYMLINK && + len > calc_len) return 0; /* Windows 7 server returns 24 bytes more */ if (calc_len + 24 == len && command == SMB2_OPLOCK_BREAK_HE) -- cgit v1.2.3 From 8d09328dfda089675e4c049f3f256064a1d1996b Mon Sep 17 00:00:00 2001 From: Zisen Ye Date: Wed, 6 May 2026 11:49:08 +0800 Subject: smb/client: fix out-of-bounds read in smb2_compound_op() If a server sends a truncated response but a large OutputBufferLength, and terminates the EA list early, check_wsl_eas() returns success without validating that the entire OutputBufferLength fits within iov_len. Then smb2_compound_op() does: memcpy(idata->wsl.eas, data[0], size[0]); Where size[0] is OutputBufferLength. If iov_len is smaller than size[0], memcpy can read beyond the end of the rsp_iov allocation and leak adjacent kernel heap memory. Link: https://lore.kernel.org/linux-cifs/d998240c-aca9-420d-9dbd-f5ba24af19e0@chenxiaosong.com/ Fixes: ea41367b2a60 ("smb: client: introduce SMB2_OP_QUERY_WSL_EA") Cc: stable@vger.kernel.org Signed-off-by: Zisen Ye Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 286912616c73..6c9c229b91f6 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -111,7 +111,7 @@ static int check_wsl_eas(struct kvec *rsp_iov) u32 outlen, next; u16 vlen; u8 nlen; - u8 *end; + u8 *ea_end, *iov_end; outlen = le32_to_cpu(rsp->OutputBufferLength); if (outlen < SMB2_WSL_MIN_QUERY_EA_RESP_SIZE || @@ -120,15 +120,19 @@ static int check_wsl_eas(struct kvec *rsp_iov) ea = (void *)((u8 *)rsp_iov->iov_base + le16_to_cpu(rsp->OutputBufferOffset)); - end = (u8 *)rsp_iov->iov_base + rsp_iov->iov_len; + ea_end = (u8 *)ea + outlen; + iov_end = (u8 *)rsp_iov->iov_base + rsp_iov->iov_len; + if (ea_end > iov_end) + return -EINVAL; + for (;;) { - if ((u8 *)ea > end - sizeof(*ea)) + if ((u8 *)ea > ea_end - sizeof(*ea)) return -EINVAL; nlen = ea->ea_name_length; vlen = le16_to_cpu(ea->ea_value_length); if (nlen != SMB2_WSL_XATTR_NAME_LEN || - (u8 *)ea->ea_data + nlen + 1 + vlen > end) + (u8 *)ea->ea_data + nlen + 1 + vlen > ea_end) return -EINVAL; switch (vlen) { -- cgit v1.2.3 From f98b48151cc502ada59d9778f0112d21f2586ca3 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 20 Apr 2026 10:47:47 -0400 Subject: smb: client: validate dacloffset before building DACL pointers parse_sec_desc(), build_sec_desc(), and the chown path in id_mode_to_cifs_acl() all add the server-supplied dacloffset to pntsd before proving a DACL header fits inside the returned security descriptor. On 32-bit builds a malicious server can return dacloffset near U32_MAX, wrap the derived DACL pointer below end_of_acl, and then slip past the later pointer-based bounds checks. build_sec_desc() and id_mode_to_cifs_acl() can then dereference DACL fields from the wrapped pointer in the chmod/chown rewrite paths. Validate dacloffset numerically before building any DACL pointer and reuse the same helper at the three DACL entry points. Fixes: bc3e9dd9d104 ("cifs: Change SIDs in ACEs while transferring file ownership.") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Michael Bommarito Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index a2750f1e3d90..786dbbc43c5b 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -1264,6 +1264,17 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl) return 0; } +static bool dacl_offset_valid(unsigned int acl_len, __u32 dacloffset) +{ + if (acl_len < sizeof(struct smb_acl)) + return false; + + if (dacloffset < sizeof(struct smb_ntsd)) + return false; + + return dacloffset <= acl_len - sizeof(struct smb_acl); +} + /* Convert CIFS ACL to POSIX form */ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, @@ -1284,7 +1295,6 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, group_sid_ptr = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); dacloffset = le32_to_cpu(pntsd->dacloffset); - dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); cifs_dbg(NOISY, "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n", pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), le32_to_cpu(pntsd->gsidoffset), @@ -1315,11 +1325,18 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, return rc; } - if (dacloffset) + if (dacloffset) { + if (!dacl_offset_valid(acl_len, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + return -EINVAL; + } + + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr, get_mode_from_special_sid); - else + } else { cifs_dbg(FYI, "no ACL\n"); /* BB grant all or default perms? */ + } return rc; } @@ -1342,6 +1359,11 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd, dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { + if (!dacl_offset_valid(secdesclen, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + return -EINVAL; + } + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); rc = validate_dacl(dacl_ptr, end_of_acl); if (rc) @@ -1710,6 +1732,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, nsecdesclen = sizeof(struct smb_ntsd) + (sizeof(struct smb_sid) * 2); dacloffset = le32_to_cpu(pntsd->dacloffset); if (dacloffset) { + if (!dacl_offset_valid(secdesclen, dacloffset)) { + cifs_dbg(VFS, "Server returned illegal DACL offset\n"); + rc = -EINVAL; + goto id_mode_to_cifs_acl_exit; + } + dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset); rc = validate_dacl(dacl_ptr, (char *)pntsd + secdesclen); if (rc) { @@ -1752,6 +1780,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode, rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } +id_mode_to_cifs_acl_exit: cifs_put_tlink(tlink); kfree(pnntsd); -- cgit v1.2.3 From 4616a9c36be7e2e051ef53b0e8fd729da0277abf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2026 11:05:31 -1000 Subject: sched_ext: Move scx_error() out of scx_link_sched()'s lock region scx_link_sched() holds scx_sched_lock. The scx_error() calls inside take the same lock through scx_claim_exit() and deadlock. Move them out of the guard. Fixes: 6b4576b09714 ("sched_ext: Reject sub-sched attachment to a disabled parent") Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 3f0d8aeaed81..7d367c140a36 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5585,10 +5585,12 @@ static void refresh_watchdog(void) static s32 scx_link_sched(struct scx_sched *sch) { + const char *err_msg; + s32 ret = 0; + scoped_guard(raw_spinlock_irq, &scx_sched_lock) { #ifdef CONFIG_EXT_SUB_SCHED struct scx_sched *parent = scx_parent(sch); - s32 ret; if (parent) { /* @@ -5598,15 +5600,16 @@ static s32 scx_link_sched(struct scx_sched *sch) * parent can shoot us down. */ if (atomic_read(&parent->exit_kind) != SCX_EXIT_NONE) { - scx_error(sch, "parent disabled"); - return -ENOENT; + err_msg = "parent disabled"; + ret = -ENOENT; + break; } ret = rhashtable_lookup_insert_fast(&scx_sched_hash, &sch->hash_node, scx_sched_hash_params); if (ret) { - scx_error(sch, "failed to insert into scx_sched_hash (%d)", ret); - return ret; + err_msg = "failed to insert into scx_sched_hash"; + break; } list_add_tail(&sch->sibling, &parent->children); @@ -5616,6 +5619,15 @@ static s32 scx_link_sched(struct scx_sched *sch) list_add_tail_rcu(&sch->all, &scx_sched_all); } + /* + * scx_error() takes scx_sched_lock via scx_claim_exit(), so it must run after + * the guard above is released. + */ + if (ret) { + scx_error(sch, "%s (%d)", err_msg, ret); + return ret; + } + refresh_watchdog(); return 0; } -- cgit v1.2.3 From dde2f938d02f2c740d49bb5113dea941f941026a Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 7 May 2026 18:54:34 +0800 Subject: cgroup/cpuset: move PF_EXITING check before __GFP_HARDWALL in cpuset_current_node_allowed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since prepare_alloc_pages() unconditionally adds __GFP_HARDWALL for the fast path when cpusets are enabled, the __GFP_HARDWALL check in cpuset_current_node_allowed() causes the PF_EXITING escape path to be skipped on the first allocation attempt. This makes it unreachable in the common case, so dying tasks can get stuck in direct reclaim or even trigger OOM while trying to exit, despite being allowed to allocate from any node. Move the PF_EXITING check before __GFP_HARDWALL so that dying tasks can allocate memory from any node to exit quickly, even when cpusets are enabled. Also update the function comment to reflect the actual behavior of prepare_alloc_pages() and the corrected check ordering. Signed-off-by: Chen Wandun Acked-by: Michal Koutný Acked-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e3a081a07c6d..a48901a0416a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -4176,11 +4176,11 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) * current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this * node is set in the nearest hardwalled cpuset ancestor to current's cpuset, * yes. If current has access to memory reserves as an oom victim, yes. - * Otherwise, no. + * If the current task is PF_EXITING, yes. Otherwise, no. * * GFP_USER allocations are marked with the __GFP_HARDWALL bit, * and do not allow allocations outside the current tasks cpuset - * unless the task has been OOM killed. + * unless the task has been OOM killed or is exiting. * GFP_KERNEL allocations are not so marked, so can escape to the * nearest enclosing hardwalled ancestor cpuset. * @@ -4194,7 +4194,9 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) * The first call here from mm/page_alloc:get_page_from_freelist() * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, * so no allocation on a node outside the cpuset is allowed (unless - * in interrupt, of course). + * in interrupt, of course). The PF_EXITING check must therefore + * come before the __GFP_HARDWALL check, otherwise a dying task + * would be blocked on the fast path. * * The second pass through get_page_from_freelist() doesn't even call * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() @@ -4204,6 +4206,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * tsk_is_oom_victim - any node ok + * PF_EXITING - any node ok (let dying task exit quickly) * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. */ @@ -4223,11 +4226,10 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask) */ if (unlikely(tsk_is_oom_victim(current))) return true; - if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ - return false; - if (current->flags & PF_EXITING) /* Let dying task have memory */ return true; + if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ + return false; /* Not hardwall and node outside mems_allowed: scan up cpusets */ spin_lock_irqsave(&callback_lock, flags); -- cgit v1.2.3 From 363a53749cc483409498e8f6e1525fe081f1d9d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2026 12:09:21 -1000 Subject: sched_ext: Drop unused scx_find_sub_sched() stub scx_find_sub_sched()'s only caller, scx_bpf_sub_dispatch(), is gated on CONFIG_EXT_SUB_SCHED. When CONFIG_EXT_SUB_SCHED=n the caller compiles out and the stub becomes dead code, tripping -Wunused-function on randconfigs. Drop the stub. Fixes: 25037af712eb ("sched_ext: Add rhashtable lookup for sub-schedulers") Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202605080556.42PXw8U9-lkp@intel.com/ Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7d367c140a36..48b4834c7027 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -297,7 +297,6 @@ static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) #else /* CONFIG_EXT_SUB_SCHED */ static struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; } static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; } -static struct scx_sched *scx_find_sub_sched(u64 cgroup_id) { return NULL; } static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) {} #endif /* CONFIG_EXT_SUB_SCHED */ -- cgit v1.2.3 From fc51cba3ebae67f967120e27162e94cfb8594479 Mon Sep 17 00:00:00 2001 From: ZhengYuan Huang Date: Wed, 25 Mar 2026 08:43:39 +0800 Subject: btrfs: fix check_chunk_block_group_mappings() to iterate all chunk maps [BUG] A corrupted image with a chunk present in the chunk tree but whose corresponding block group item is missing from the extent tree can be mounted successfully, even though check_chunk_block_group_mappings() is supposed to catch exactly this corruption at mount time. Once mounted, running btrfs balance with a usage filter (-dusage=N or -dusage=min..max) triggers a null-ptr-deref: KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] RIP: 0010:chunk_usage_filter fs/btrfs/volumes.c:3874 [inline] RIP: 0010:should_balance_chunk fs/btrfs/volumes.c:4018 [inline] RIP: 0010:__btrfs_balance fs/btrfs/volumes.c:4172 [inline] RIP: 0010:btrfs_balance+0x2024/0x42b0 fs/btrfs/volumes.c:4604 [CAUSE] The crash occurs because __btrfs_balance() iterates the on-disk chunk tree, finds the orphaned chunk, calls chunk_usage_filter() (or chunk_usage_range_filter()), which queries the in-memory block group cache via btrfs_lookup_block_group(). Since no block group was ever inserted for this chunk, the lookup returns NULL, and the subsequent dereference of cache->used crashes. check_chunk_block_group_mappings() uses btrfs_find_chunk_map() to iterate the in-memory chunk map (fs_info->mapping_tree): map = btrfs_find_chunk_map(fs_info, start, 1); With @start = 0 and @length = 1, btrfs_find_chunk_map() looks for a chunk map that *contains* the logical address 0. If no chunk contains logical address 0, btrfs_find_chunk_map(fs_info, 0, 1) returns NULL immediately and the loop breaks after the very first iteration, having checked zero chunks. The entire verification function is therefore a no-op, and the corrupted image passes the mount-time check undetected. [FIX] Replace the btrfs_find_chunk_map() based loop with a direct in-order walk of fs_info->mapping_tree using rb_first_cached() + rb_next(). This guarantees that every chunk map in the tree is visited regardless of the logical addresses involved. No lock is taken around the traversal. This function is called during mount from btrfs_read_block_groups(), which is invoked from open_ctree() before any background threads (cleaner, transaction kthread, etc.) are started. There are therefore no concurrent writers that could modify mapping_tree at this point. An analogous lockless direct traversal of mapping_tree already exists in fill_dummy_bgs() in the same file. Since we walk the rb-tree directly via rb_entry() without going through btrfs_find_chunk_map(), no reference is taken on each map entry, so the btrfs_free_chunk_map() calls are also removed. Signed-off-by: ZhengYuan Huang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index e6f5a17a13e3..b611c64119db 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2412,29 +2412,25 @@ static struct btrfs_block_group *btrfs_create_block_group( */ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) { - u64 start = 0; + struct rb_node *node; int ret = 0; - while (1) { + /* + * This is called during mount from btrfs_read_block_groups(), before + * any background threads are started, so no concurrent writers can + * modify the mapping_tree. No lock is needed here. + */ + for (node = rb_first_cached(&fs_info->mapping_tree); node; node = rb_next(node)) { struct btrfs_chunk_map *map; struct btrfs_block_group *bg; - /* - * btrfs_find_chunk_map() will return the first chunk map - * intersecting the range, so setting @length to 1 is enough to - * get the first chunk. - */ - map = btrfs_find_chunk_map(fs_info, start, 1); - if (!map) - break; - + map = rb_entry(node, struct btrfs_chunk_map, rb_node); bg = btrfs_lookup_block_group(fs_info, map->start); if (unlikely(!bg)) { btrfs_err(fs_info, "chunk start=%llu len=%llu doesn't have corresponding block group", map->start, map->chunk_len); ret = -EUCLEAN; - btrfs_free_chunk_map(map); break; } if (unlikely(bg->start != map->start || bg->length != map->chunk_len || @@ -2447,12 +2443,9 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) bg->start, bg->length, bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK); ret = -EUCLEAN; - btrfs_free_chunk_map(map); btrfs_put_block_group(bg); break; } - start = map->start + map->chunk_len; - btrfs_free_chunk_map(map); btrfs_put_block_group(bg); } return ret; -- cgit v1.2.3 From 4822703b150fc25f7bdb8cf266a482619881a97e Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Wed, 29 Apr 2026 00:10:25 -0700 Subject: btrfs: always pass __GFP_NOWARN from add_ra_bio_pages() A build workload newly prints order-0 allocation failures on 7.1-rc1: sh: page allocation failure: order:0 mode:0x14084a(__GFP_HIGHMEM|__GFP_MOVABLE|__GFP_IO|__GFP_KSWAPD_RECLAIM| __GFP_COMP|__GFP_HARDWALL) CPU: 27 UID: 1000 PID: 855540 Comm: sh Not tainted 7.1.0-rc1-llvm-00058-gdca922e019dd #1 PREEMPTLAZY Call Trace: dump_stack_lvl+0x50/0x70 warn_alloc+0xeb/0x100 __alloc_pages_slowpath+0x567/0x5a0 ? filemap_get_entry+0x11a/0x140 __alloc_frozen_pages_noprof+0x249/0x2d0 alloc_pages_mpol+0xe4/0x180 folio_alloc_noprof+0x80/0xa0 add_ra_bio_pages+0x13c/0x4b0 btrfs_submit_compressed_read+0x229/0x300 submit_one_bio+0x9e/0xe0 btrfs_readahead+0x185/0x1a0 [...] (lldb) source list -a add_ra_bio_pages+0x13c .../vmlinux.unstripped add_ra_bio_pages + 316 at .../fs/btrfs/compression.c:454:8 451 452 folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, constraint_gfp), 453 0, NULL); -> 454 if (!folio) 455 break; I can reproduce this consistently by running a memory hog concurrently with a buffered writer on a machine with a very large amount of swap. Commit 7ae37b2c94ed ("btrfs: prevent direct reclaim during compressed readahead") clearly intended to suppress these warnings. But because the mask set in the address_space with mapping_set_gfp_mask() doesn't include __GFP_NOWARN, mapping_gfp_constraint() removes it from constraint_gfp before it is passed to filemap_alloc_folio(). Fix by refactoring the code to add __GFP_NOWARN after the call to mapping_gfp_constraint(). Fixes: 7ae37b2c94ed ("btrfs: prevent direct reclaim during compressed readahead") Signed-off-by: Calvin Owens Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index c5783ac1b646..e2ef01a59d04 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -407,22 +407,18 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; - /* - * Avoid direct reclaim when the caller does not allow it. Since - * add_ra_bio_pages() is always speculative, suppress allocation warnings - * in either case. - */ + /* Avoid direct reclaim when the caller does not allow it. */ + constraint_gfp = ~__GFP_FS; + cache_gfp = GFP_NOFS | __GFP_NOWARN; if (!direct_reclaim) { - constraint_gfp = ~(__GFP_FS | __GFP_DIRECT_RECLAIM) | __GFP_NOWARN; - cache_gfp = (GFP_NOFS & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN; - } else { - constraint_gfp = (~__GFP_FS) | __GFP_NOWARN; - cache_gfp = GFP_NOFS | __GFP_NOWARN; + constraint_gfp &= ~__GFP_DIRECT_RECLAIM; + cache_gfp &= ~__GFP_DIRECT_RECLAIM; } while (cur < compressed_end) { pgoff_t page_end; pgoff_t pg_index = cur >> PAGE_SHIFT; + gfp_t masked_constraint_gfp; u32 add_size; if (pg_index > end_index) @@ -449,8 +445,14 @@ static noinline int add_ra_bio_pages(struct inode *inode, continue; } - folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, constraint_gfp), - 0, NULL); + /* + * Since add_ra_bio_pages() is always speculative, suppress + * allocation warnings. + */ + masked_constraint_gfp = mapping_gfp_constraint(mapping, constraint_gfp); + masked_constraint_gfp |= __GFP_NOWARN; + + folio = filemap_alloc_folio(masked_constraint_gfp, 0, NULL); if (!folio) break; -- cgit v1.2.3 From c73370c677646e86fc4b1780fb07027bdf847375 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 28 Apr 2026 16:58:56 +0100 Subject: btrfs: tracepoints: fix sleep while in atomic context in btrfs_sync_file() The trace event btrfs_sync_file() is called in an atomic context (all trace events are) and its call to dput(), which is needed due to the call to dget_parent(), can sleep, triggering a kernel splat. This can be reproduced by enabling the trace event and running btrfs/056 from fstests for example. The splat shown in dmesg is the following: [53.919] BUG: sleeping function called from invalid context at fs/dcache.c:970 [53.947] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 32773, name: xfs_io [53.988] preempt_count: 2, expected: 0 [53.967] RCU nest depth: 0, expected: 0 [53.943] Preemption disabled at: [53.944] [<0000000000000000>] 0x0 [54.078] CPU: 0 UID: 0 PID: 32773 Comm: xfs_io Tainted: G W 7.1.0-rc1-btrfs-next-232+ #1 PREEMPT(full) [54.070] Tainted: [W]=WARN [54.071] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [54.072] Call Trace: [54.074] [54.076] dump_stack_lvl+0x56/0x80 [54.079] __might_resched.cold+0xd6/0x10f [54.072] dput.part.0+0x24/0x110 [54.078] trace_event_raw_event_btrfs_sync_file+0x75/0x140 [btrfs] [54.089] btrfs_sync_file+0x1ed/0x530 [btrfs] [54.087] ? __handle_mm_fault+0x8ae/0xed0 [54.089] btrfs_do_write_iter+0x172/0x210 [btrfs] [54.091] vfs_write+0x21f/0x450 [54.094] __x64_sys_pwrite64+0x8d/0xc0 [54.096] ? do_user_addr_fault+0x20c/0x670 [54.099] do_syscall_64+0x60/0xf20 [54.092] ? clear_bhb_loop+0x60/0xb0 [54.094] entry_SYSCALL_64_after_hwframe+0x76/0x7e So stop using dget_parent() and dput() and access the parent dentry directly as dentry->d_parent. This is also what ext4 is doing in its equivalent trace event ext4_sync_file_enter(). Fixes: a85b46db143f ("btrfs: tracepoints: get correct superblock from dentry in event btrfs_sync_file()") Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8ad7a2d76c1d..ec1df8b94517 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -771,10 +771,8 @@ TRACE_EVENT(btrfs_sync_file, TP_fast_assign( struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); - struct dentry *parent = dget_parent(dentry); - struct inode *parent_inode = d_inode(parent); + struct inode *parent_inode = d_inode(dentry->d_parent); - dput(parent); TP_fast_assign_fsid(btrfs_sb(inode->i_sb)); __entry->ino = btrfs_ino(BTRFS_I(inode)); __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); -- cgit v1.2.3 From 4066c55e109475a06d18a1f127c939d551211956 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 30 Apr 2026 10:37:22 +0930 Subject: btrfs: only release the dirty pages io tree after successful writes [WARNING] With extra warning on dirty extent buffers at umount (aka, the next patch in the series), test case generic/388 can trigger the following warning about dirty extent buffers at unmount time: BTRFS critical (device dm-2 state E): emergency shutdown BTRFS error (device dm-2 state E): error while writing out transaction: -30 BTRFS warning (device dm-2 state E): Skipping commit of aborted transaction. BTRFS error (device dm-2 state EA): Transaction 9 aborted (error -30) BTRFS: error (device dm-2 state EA) in cleanup_transaction:2068: errno=-30 Readonly filesystem BTRFS info (device dm-2 state EA): forced readonly BTRFS info (device dm-2 state EA): last unmount of filesystem 4fbf2e15-f941-49a0-bc7c-716315d2777c ------------[ cut here ]------------ WARNING: disk-io.c:3311 at invalidate_and_check_btree_folios+0xfd/0x1ca [btrfs], CPU#8: umount/914368 CPU: 8 UID: 0 PID: 914368 Comm: umount Tainted: G OE 7.1.0-rc1-custom+ #372 PREEMPT(full) 2de38db8d1deae71fde295430a0ff3ab98ccf596 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 02/02/2022 RIP: 0010:invalidate_and_check_btree_folios+0xfd/0x1ca [btrfs] Call Trace: close_ctree+0x52e/0x574 [btrfs d2f0b1cd330d1287e7a9919d112eadfc0e914efd] generic_shutdown_super+0x89/0x1a0 kill_anon_super+0x16/0x40 btrfs_kill_super+0x16/0x20 [btrfs d2f0b1cd330d1287e7a9919d112eadfc0e914efd] deactivate_locked_super+0x2d/0xb0 cleanup_mnt+0xdc/0x140 task_work_run+0x5a/0xa0 exit_to_user_mode_loop+0x123/0x4b0 do_syscall_64+0x243/0x7c0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 ---[ end trace 0000000000000000 ]--- BTRFS warning (device dm-2 state EA): unable to release extent buffer 30539776 owner 9 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30621696 owner 257 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30638080 owner 258 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30654464 owner 7 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30703616 owner 2 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30720000 owner 10 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30736384 owner 4 gen 9 refs 2 flags 0x7 BTRFS warning (device dm-2 state EA): unable to release extent buffer 30752768 owner 11 gen 9 refs 2 flags 0x7 I'm using a stripped down version, which seems to trigger the warning more reliably: _fsstress_pid="" workload() { dmesg -C mkfs.btrfs -f -K $dev > /dev/null echo 1 > /sys/kernel/debug/clear_warn_once mount $dev $mnt $fsstress -w -n 1024 -p 4 -d $mnt & _fsstress_pid=$! sleep 0 $godown $mnt pkill --echo -PIPE fsstress > /dev/null wait $_fsstress_pid unset _fsstress_pid umount $mnt if dmesg | grep -q "WARNING"; then fail fi } for (( i = 0; i < $runtime; i++ )); do echo "=== $i/$runtime ===" workload done [CAUSE] Inside btrfs_write_and_wait_transaction(), we first try to write all dirty ebs, then wait for them to finish. After that we call btrfs_extent_io_tree_release() to free all extent states from dirty_pages io tree. However if we hit an error from btrfs_write_marked_extent(), then we still call btrfs_extent_io_tree_release() to clear that dirty_pages io tree, which may contain dirty records that we haven't yet submitted. Furthermore, the later transaction cleanup path will utilize that dirty_pages io tree to properly cleanup those dirty ebs, but since it's already empty, no dirty ebs are properly cleaned up, thus will later trigger the warnings inside invalidate_btree_folios(). [FIX] Normally such dirty ebs won't cause problems, as when the iput() is called on the btree inode, the dirty ebs will be forcibly written back, and since the fs is already in an error status, such writeback will not reach disk and finish immediately. But it's still better to get rid of such dirty ebs, if we ended up with dirty ebs but the fs is not in an error status, then such writeback at iput() time will be too late, as all workers are already stopped but writeback will utilize workers, which will lead to NULL pointer dereferences. Instead of unconditionally calling btrfs_extent_io_tree_release(), only call it if btrfs_write_and_wait_transaction() finished successfully, so that @dirty_pages extent io tree is kept untouched for transaction cleanup. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 1 + fs/btrfs/transaction.c | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a11be02eeb9..c0a30bb213d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4686,6 +4686,7 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, free_extent_buffer_stale(eb); } } + btrfs_extent_io_tree_release(dirty_pages); } static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 248adb785051..194f581b36f3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1293,14 +1293,13 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans) blk_finish_plug(&plug); ret2 = btrfs_wait_extents(fs_info, dirty_pages); - btrfs_extent_io_tree_release(&trans->transaction->dirty_pages); - if (ret) return ret; - else if (ret2) + if (ret2) return ret2; - else - return 0; + + btrfs_extent_io_tree_release(&trans->transaction->dirty_pages); + return 0; } /* -- cgit v1.2.3 From c562ba61fc5e11798720acc1b172862158f1fa0b Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 1 May 2026 10:41:56 +0800 Subject: btrfs: fix incorrect i_size after remount caused by KEEP_SIZE prealloc gap When fallocate() with FALLOC_FL_KEEP_SIZE preallocates an extent past the current i_size, the file_extent_tree of the inode is updated to cover that range. However, on the next mount, btrfs_read_locked_inode() only re-populates file_extent_tree with [0, round_up(i_size, sectorsize)), losing the marks that belonged to the KEEP_SIZE prealloc extent beyond i_size. Later, when a non-KEEP_SIZE fallocate() extends i_size into / past that old prealloc extent, the reservation loop in btrfs_fallocate() skips already-prealloc segments and does not call into the path that marks the file_extent_tree, so a gap remains inside the file_extent_tree across [old_aligned_i_size, start_of_new_alloc). Then __btrfs_prealloc_file_range() calls btrfs_inode_safe_disk_i_size_write(), which uses find_contiguous_extent_bit() starting at offset 0 to derive disk_i_size. The walk stops at the gap, so disk_i_size ends up smaller than i_size and gets persisted. After the next mount, the file shows the wrong (smaller) size. The following reproducer triggers the problem: $ cat test.sh MNT=/mnt/sdi DEV=/dev/sdi mkdir -p $MNT mkfs.btrfs -f -O ^no-holes $DEV mount $DEV $MNT touch $MNT/file1 # KEEP_SIZE prealloc beyond i_size (i_size stays 0) fallocate -n -o 4M -l 4M $MNT/file1 umount $MNT mount $DEV $MNT # non-KEEP_SIZE fallocate that overlaps the previous prealloc tail # and extends past it fallocate -o 7M -l 2M $MNT/file1 ls -lh $MNT/file1 umount $MNT mount $DEV $MNT ls -lh $MNT/file1 umount $MNT Running the reproducer gives the following result: $ ./test.sh (...) -rw-rw-r-- 1 root root 9.0M May 4 16:35 /mnt/sdi/file1 -rw-rw-r-- 1 root root 7.0M May 4 16:35 /mnt/sdi/file1 The size before the second mount is correct (9M), but after the remount it drops to 7M, i.e. the start of the gap inside file_extent_tree. Fix this in __btrfs_prealloc_file_range() by marking the entire range [round_down(old_i_size, sectorsize), round_up(new_i_size, sectorsize)) in file_extent_tree before updating i_size and calling btrfs_inode_safe_disk_i_size_write(). This ensures the contiguous bit search starting from 0 is not truncated by a stale gap left behind by a previous KEEP_SIZE prealloc that was not restored on inode load. The fix has no effect when the NO_HOLES feature is enabled because btrfs_inode_safe_disk_i_size_write() and btrfs_inode_set_file_extent_range() both take the fast path that directly tracks disk_i_size without consulting file_extent_tree. Fixes: 9ddc959e802b ("btrfs: use the file extent tree infrastructure") Reviewed-by: Filipe Manana Signed-off-by: Robbie Ko [ Minor updates to the change log ] Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 906d5c21ebc4..75136a172710 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9299,10 +9299,38 @@ next: if (!(mode & FALLOC_FL_KEEP_SIZE) && (actual_len > inode->i_size) && (cur_offset > inode->i_size)) { + u64 range_start; + u64 range_end; + if (cur_offset > actual_len) i_size = actual_len; else i_size = cur_offset; + + /* + * Make sure the file_extent_tree covers the entire + * range [old_i_size, new_i_size) before we update + * disk_i_size. Without this, a previous KEEP_SIZE + * prealloc that extended past i_size (and was lost + * across umount/mount because file_extent_tree is + * only populated up to round_up(i_size) on inode + * load) can leave a gap inside this range. That gap + * would cause btrfs_inode_safe_disk_i_size_write() + * (via find_contiguous_extent_bit() starting at 0) + * to truncate disk_i_size to the start of the gap, + * making the persisted size smaller than i_size. + */ + range_start = round_down(inode->i_size, fs_info->sectorsize); + range_end = round_up(i_size, fs_info->sectorsize); + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), + range_start, range_end - range_start); + if (ret) { + btrfs_abort_transaction(trans, ret); + if (own_trans) + btrfs_end_transaction(trans); + break; + } + i_size_write(inode, i_size); btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0); } -- cgit v1.2.3 From 3607422cdeebd1f0c259964bf33aaa9792e21930 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Sat, 2 May 2026 19:32:06 +0200 Subject: hwmon: (lm75) Fix AS6200 and TMP112 setup and alarm handling The initialization of the AS6200 has two shortcomings - The device-add-commit states "Conversion mode: continuous" but the the lm75_params structure uses set_mask = 0x94c0. This activates single shot mode (bit 15). According to the datasheet "The device features a single shot measurement mode if the device is in sleep mode (SM=1)". This is quite contradictionary. - It is the only device that activates polarity active-high (bit 10) All this is paired with a undefined clear mask bug in function lm75_write_config() that was introduced with a later refactoring commit. [as6200] = { .config_reg_16bits = true, .set_mask = 0x94C0, -> .clr_mask not defined here .default_resolution = 12, ... static inline int lm75_write_config(struct lm75_data *data, u16 set_mask, u16 clr_mask) { return regmap_update_bits(data->regmap, LM75_REG_CONF, clr_mask | LM75_SHUTDOWN, set_mask); } regmap_update_bits() requires clr_mask to be a superset of set_mask. So basically all sensors with "wrong" masks like the AS6200 are not initialized as intended. Fix that by - Change the set_mask to 0xc010 to reflect the current active-low setup properly and to drive the sensor in continous mode. This takes into account that the config register is little endian and the first byte sent to the chip is the LSB. - Adapt the alarm handling so it can report the alarm correctly even if it is high active. This is done by comparing config register bit 5 and 10 (translated to 2 and 13). This commit does not introduce any ABI breakage as the mutliple bugs effectly drive the AS6200 in standard active-low mode. Fixes: 4b6358e1fe46 ("hwmon: (lm75) Add AMS AS6200 temperature sensor") Suggested-by: Guenter Roeck Signed-off-by: Markus Stockhausen Link: https://lore.kernel.org/r/20260502173207.3567876-2-markus.stockhausen@gmx.de [groeck: Update set_mask for as6200 further: As modeled, the upper bits contain the conversion rate, so the config register needs to be set to 0xc010 instead of 0x10c0 to reflect 8 samples/s and 4 consecutive faults. Fix the same problem for TMP112.] Signed-off-by: Guenter Roeck --- drivers/hwmon/lm75.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index f1a1e5b888f6..a1bf4e9813ed 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -137,7 +137,7 @@ static const struct lm75_params device_params[] = { }, [as6200] = { .config_reg_16bits = true, - .set_mask = 0x94C0, /* 8 sample/s, 4 CF, positive polarity */ + .set_mask = 0xC010, /* 8 sample/s, 4 CF */ .default_resolution = 12, .default_sample_time = 125, .num_sample_times = 4, @@ -286,8 +286,8 @@ static const struct lm75_params device_params[] = { }, [tmp112] = { .config_reg_16bits = true, - .set_mask = 0x60C0, /* 12-bit mode, 8 samples / second */ - .clr_mask = 1 << 15, /* no one-shot mode*/ + .set_mask = 0xC060, /* 12-bit mode, 8 samples / second */ + .clr_mask = 1 << 7, /* no one-shot mode*/ .default_resolution = 12, .default_sample_time = 125, .num_sample_times = 4, @@ -416,7 +416,7 @@ static int lm75_read(struct device *dev, enum hwmon_sensor_types type, switch (data->kind) { case as6200: case tmp112: - *val = (regval >> 13) & 0x1; + *val = !!(regval & BIT(13)) == !!(regval & BIT(2)); break; default: return -EINVAL; -- cgit v1.2.3 From 05aaac8746c5786eaa779b163fae4ddcd5172707 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Sat, 2 May 2026 19:32:07 +0200 Subject: hwmon: (lm75) Fix configuration register writes. Sensors configurations are defined by set and clear masks. These do not follow a consistent "clear mask is a superset of set mask" rule. This relaxed definition breaks lm75_write_config() static inline int lm75_write_config(struct lm75_data *data, u16 set_mask, u16 clr_mask) { return regmap_update_bits(data->regmap, LM75_REG_CONF, clr_mask | LM75_SHUTDOWN, set_mask); } Basically all bits from set_mask that are not defined in clr_mask are dropped. Fix that by enhancing the helper to always combine clr_mask and set_mask into the mask bits of regmap_update_bits(). Fixes: 6da24a25f766 ("hwmon: (lm75) Hide register size differences in regmap access functions") Suggested-by: Guenter Roeck Signed-off-by: Markus Stockhausen Link: https://lore.kernel.org/r/20260502173207.3567876-3-markus.stockhausen@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/lm75.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index a1bf4e9813ed..c283443e363b 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -353,7 +353,7 @@ static inline int lm75_write_config(struct lm75_data *data, u16 set_mask, u16 clr_mask) { return regmap_update_bits(data->regmap, LM75_REG_CONF, - clr_mask | LM75_SHUTDOWN, set_mask); + clr_mask | set_mask | LM75_SHUTDOWN, set_mask); } static irqreturn_t lm75_alarm_handler(int irq, void *private) -- cgit v1.2.3 From 99076a17a112ac43cbd37f6883898ae649166303 Mon Sep 17 00:00:00 2001 From: Tabrez Ahmed Date: Sat, 2 May 2026 07:38:42 +0530 Subject: hwmon: (ads7871) Fix endianness bug in 16-bit register reads The ads7871_read_reg16() function relies on spi_w8r16() to read the 16-bit sensor output. The ADS7871 device transmits the Least Significant Byte (LSB) first. On Little-Endian architectures, spi_w8r16() correctly reconstructs the 16-bit value. However, on Big-Endian architectures, the byte swapping causes the first received byte (LSB) to be placed in the most significant byte of the u16, resulting in corrupted voltage readings. To fix this, cast the integer result of spi_w8r16() to a restricted __le16 type and convert it to the host CPU's native byte order using le16_to_cpu(). Negative error codes returned by the SPI core are caught and returned prior to the conversion to avoid mangling the error status. Reported-by: Sashiko Closes: https://sashiko.dev/#/patchset/20260418034601.90226-1-tabreztalks@gmail.com Fixes: e0c70b8078629 ("hwmon: add TI ads7871 a/d converter driver") Suggested-by: David Laight Signed-off-by: Tabrez Ahmed Link: https://lore.kernel.org/r/20260502020844.110038-2-tabreztalks@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ads7871.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/ads7871.c b/drivers/hwmon/ads7871.c index 9bfdf9e6bcd7..9ee3ce01f130 100644 --- a/drivers/hwmon/ads7871.c +++ b/drivers/hwmon/ads7871.c @@ -77,9 +77,13 @@ static int ads7871_read_reg8(struct spi_device *spi, int reg) static int ads7871_read_reg16(struct spi_device *spi, int reg) { int ret; + reg = reg | INST_READ_BM | INST_16BIT_BM; ret = spi_w8r16(spi, reg); - return ret; + if (ret < 0) + return ret; + + return le16_to_cpu((__force __le16)ret); } static int ads7871_write_reg8(struct spi_device *spi, int reg, u8 val) -- cgit v1.2.3 From 8e72510db9fa2d41f2b06d5c01fe9020e076fee4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:13 +0200 Subject: netfilter: x_tables: allow initial table replace without emitting audit log message At the moment we emit the audit log a bit too early, which makes it necessary to also emit an unregister log in case we have to unwind errors after possible hook register failure. Followup patch will be slightly simpler if we can delay the register message until after the hooks have been wired up. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2c67c2e6b132..bb0cb3959551 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1472,11 +1472,9 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); -struct xt_table_info * -xt_replace_table(struct xt_table *table, - unsigned int num_counters, - struct xt_table_info *newinfo, - int *error) +static struct xt_table_info * +do_replace_table(struct xt_table *table, unsigned int num_counters, + struct xt_table_info *newinfo, int *error) { struct xt_table_info *private; unsigned int cpu; @@ -1531,10 +1529,23 @@ xt_replace_table(struct xt_table *table, } } - audit_log_nfcfg(table->name, table->af, private->number, - !private->number ? AUDIT_XT_OP_REGISTER : - AUDIT_XT_OP_REPLACE, - GFP_KERNEL); + return private; +} + +struct xt_table_info * +xt_replace_table(struct xt_table *table, unsigned int num_counters, + struct xt_table_info *newinfo, + int *error) +{ + struct xt_table_info *private; + + private = do_replace_table(table, num_counters, newinfo, error); + if (private) + audit_log_nfcfg(table->name, table->af, private->number, + !private->number ? AUDIT_XT_OP_REGISTER : + AUDIT_XT_OP_REPLACE, + GFP_KERNEL); + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); -- cgit v1.2.3 From b62eb8dcf2c47d4d676a434efbd57c4f776f7829 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:14 +0200 Subject: netfilter: x_tables: allocate hook ops while under mutex arp/ip(6)t_register_table() add the table to the per-netns list via xt_register_table() before allocating the per-netns hook ops copy via kmemdup_array(). This leaves a window where the table is visible in the list with ops=NULL. If the pernet exit happens runs concurrently the pre_exit callback finds the table via xt_find_table() and passes the NULL ops pointer to nf_unregister_net_hooks(), causing a NULL dereference: general protection fault in nf_unregister_net_hooks+0xbc/0x150 RIP: nf_unregister_net_hooks (net/netfilter/core.c:613) Call Trace: ipt_unregister_table_pre_exit iptable_mangle_net_pre_exit ops_pre_exit_list cleanup_net Fix by moving the ops allocation into the xtables core so the table is never in the list without valid ops. Also ensure the table is no longer processing packets before its torn down on error unwind. nf_register_net_hooks might have published at least one hook; call synchronize_rcu() if there was an error. audit log register message gets deferred until all operations have passed, this avoids need to emit another ureg message in case of error unwinding. Based on earlier patch by Tristan Madani. Fixes: f9006acc8dfe5 ("netfilter: arp_tables: pass table pointer via nf_hook_ops") Fixes: ee177a54413a ("netfilter: ip6_tables: pass table pointer via nf_hook_ops") Fixes: ae689334225f ("netfilter: ip_tables: pass table pointer via nf_hook_ops") Link: https://lore.kernel.org/netfilter-devel/20260429175613.1459342-1-tristmd@gmail.com/ Signed-off-by: Tristan Madani Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 + net/ipv4/netfilter/arp_tables.c | 35 +++----------------------- net/ipv4/netfilter/ip_tables.c | 41 +++---------------------------- net/ipv6/netfilter/ip6_tables.c | 38 +++-------------------------- net/netfilter/x_tables.c | 50 ++++++++++++++++++++++++++++++++------ 5 files changed, 55 insertions(+), 110 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a81b46af5118..cb4b694dd9e4 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -305,6 +305,7 @@ struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, + const struct nf_hook_ops *template_ops, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); void *xt_unregister_table(struct xt_table *table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 97ead883e4a1..c02e46a0271a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1522,13 +1522,11 @@ int arpt_register_table(struct net *net, const struct arpt_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1543,7 +1541,7 @@ int arpt_register_table(struct net *net, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct arpt_entry *iter; @@ -1553,31 +1551,6 @@ int arpt_register_table(struct net *net, return PTR_ERR(new_table); } - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - - return ret; - -out_free: - __arpt_unregister_table(net, new_table); return ret; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 23c8deff8095..488c5945ebb2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1724,13 +1724,11 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1745,7 +1743,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ipt_entry *iter; @@ -1755,37 +1753,6 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return PTR_ERR(new_table); } - /* No template? No need to do anything. This is used by 'nat' table, it registers - * with the nat core instead of the netfilter core. - */ - if (!template_ops) - return 0; - - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - - return ret; - -out_free: - __ipt_unregister_table(net, new_table); return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d585ac3c1113..dbe7c7acd702 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1733,13 +1733,11 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1754,7 +1752,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ip6t_entry *iter; @@ -1764,34 +1762,6 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return PTR_ERR(new_table); } - if (!template_ops) - return 0; - - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - - return ret; - -out_free: - __ip6t_unregister_table(net, new_table); return ret; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index bb0cb3959551..06f27bea9eed 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1542,7 +1542,6 @@ xt_replace_table(struct xt_table *table, unsigned int num_counters, private = do_replace_table(table, num_counters, newinfo, error); if (private) audit_log_nfcfg(table->name, table->af, private->number, - !private->number ? AUDIT_XT_OP_REGISTER : AUDIT_XT_OP_REPLACE, GFP_KERNEL); @@ -1552,20 +1551,32 @@ EXPORT_SYMBOL_GPL(xt_replace_table); struct xt_table *xt_register_table(struct net *net, const struct xt_table *input_table, + const struct nf_hook_ops *template_ops, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t, *table = NULL; + struct nf_hook_ops *ops = NULL; struct xt_table_info *private; - struct xt_table *t, *table; - int ret; + unsigned int num_ops; + int ret = -EINVAL; + + num_ops = hweight32(input_table->valid_hooks); + if (num_ops == 0) + goto out; + + ret = -ENOMEM; + if (template_ops) { + ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); + if (!ops) + goto out; + } /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); - if (!table) { - ret = -ENOMEM; + if (!table) goto out; - } mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ @@ -1579,7 +1590,7 @@ struct xt_table *xt_register_table(struct net *net, /* Simplifies replace_table code. */ table->private = bootstrap; - if (!xt_replace_table(table, 0, newinfo, &ret)) + if (!do_replace_table(table, 0, newinfo, &ret)) goto unlock; private = table->private; @@ -1588,14 +1599,37 @@ struct xt_table *xt_register_table(struct net *net, /* save number of initial entries */ private->initial_entries = private->number; + if (ops) { + int i; + + for (i = 0; i < num_ops; i++) + ops[i].priv = table; + + ret = nf_register_net_hooks(net, ops, num_ops); + if (ret != 0) { + mutex_unlock(&xt[table->af].mutex); + /* nf_register_net_hooks() might have published a + * base chain before internal error unwind. + */ + synchronize_rcu(); + goto out; + } + + table->ops = ops; + } + + audit_log_nfcfg(table->name, table->af, private->number, + AUDIT_XT_OP_REGISTER, GFP_KERNEL); + list_add(&table->list, &xt_net->tables[table->af]); mutex_unlock(&xt[table->af].mutex); return table; unlock: mutex_unlock(&xt[table->af].mutex); - kfree(table); out: + kfree(table); + kfree(ops); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(xt_register_table); -- cgit v1.2.3 From 527d6931473b75d90e38942aae6537d1a527f1fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:15 +0200 Subject: netfilter: x_tables: add and use xt_unregister_table_pre_exit Remove the copypasted variants of _pre_exit and add one single function in the xtables core. ebtables is not compatible with x_tables and therefore unchanged. This is a preparation patch to reduce noise in the followup bug fixes. Reviewed-by: Tristan Madani Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 + include/linux/netfilter_arp/arp_tables.h | 1 - include/linux/netfilter_ipv4/ip_tables.h | 1 - include/linux/netfilter_ipv6/ip6_tables.h | 1 - net/ipv4/netfilter/arp_tables.c | 9 --------- net/ipv4/netfilter/arptable_filter.c | 2 +- net/ipv4/netfilter/ip_tables.c | 9 --------- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/iptable_nat.c | 1 + net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/iptable_security.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 9 --------- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_nat.c | 1 + net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- net/netfilter/x_tables.c | 29 +++++++++++++++++++++++++++++ 19 files changed, 41 insertions(+), 39 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index cb4b694dd9e4..74486714ae20 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -309,6 +309,7 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); void *xt_unregister_table(struct xt_table *table); +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name); struct xt_table_info *xt_replace_table(struct xt_table *table, unsigned int num_counters, diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index a40aaf645fa4..05631a25e622 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,7 +53,6 @@ int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops); void arpt_unregister_table(struct net *net, const char *name); -void arpt_unregister_table_pre_exit(struct net *net, const char *name); extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 132b0e4a6d4d..13593391d605 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -26,7 +26,6 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops); -void ipt_unregister_table_pre_exit(struct net *net, const char *name); void ipt_unregister_table_exit(struct net *net, const char *name); /* Standard entry. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8b8885a73c76..c6d5b927830d 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -27,7 +27,6 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *); int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *ops); -void ip6t_unregister_table_pre_exit(struct net *net, const char *name); void ip6t_unregister_table_exit(struct net *net, const char *name); extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c02e46a0271a..bd348b7bad2c 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1554,15 +1554,6 @@ int arpt_register_table(struct net *net, return ret; } -void arpt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); -} -EXPORT_SYMBOL(arpt_unregister_table_pre_exit); - void arpt_unregister_table(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 78cd5ee24448..393d9a8c7739 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -43,7 +43,7 @@ static int arptable_filter_table_init(struct net *net) static void __net_exit arptable_filter_net_pre_exit(struct net *net) { - arpt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_ARP, "filter"); } static void __net_exit arptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 488c5945ebb2..864489928fb5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1756,14 +1756,6 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return ret; } -void ipt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); -} - void ipt_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); @@ -1854,7 +1846,6 @@ static void __exit ip_tables_fini(void) } EXPORT_SYMBOL(ipt_register_table); -EXPORT_SYMBOL(ipt_unregister_table_pre_exit); EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 3ab908b74795..b2fbd9651d61 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -61,7 +61,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "filter"); } static void __net_exit iptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 385d945d8ebe..a99e61996197 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -96,7 +96,7 @@ static int iptable_mangle_table_init(struct net *net) static void __net_exit iptable_mangle_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "mangle"); } static void __net_exit iptable_mangle_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 625a1ca13b1b..8fc4912e790d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -129,6 +129,7 @@ static int iptable_nat_table_init(struct net *net) static void __net_exit iptable_nat_net_pre_exit(struct net *net) { ipt_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); } static void __net_exit iptable_nat_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 0e7f53964d0a..42511721e538 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ static int iptable_raw_table_init(struct net *net) static void __net_exit iptable_raw_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "raw"); } static void __net_exit iptable_raw_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index d885443cb267..4646bf6d7d2b 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -50,7 +50,7 @@ static int iptable_security_table_init(struct net *net) static void __net_exit iptable_security_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "security"); } static void __net_exit iptable_security_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dbe7c7acd702..edf50bc7787e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1765,14 +1765,6 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return ret; } -void ip6t_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); -} - void ip6t_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); @@ -1864,7 +1856,6 @@ static void __exit ip6_tables_fini(void) } EXPORT_SYMBOL(ip6t_register_table); -EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index e8992693e14a..f05a9e4b2c67 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,7 +60,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "filter"); } static void __net_exit ip6table_filter_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 8dd4cd0c47bd..afa4a5703e43 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -89,7 +89,7 @@ static int ip6table_mangle_table_init(struct net *net) static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 5be723232df8..bb8aa3fc42b4 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -131,6 +131,7 @@ static int ip6table_nat_table_init(struct net *net) static void __net_exit ip6table_nat_net_pre_exit(struct net *net) { ip6t_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); } static void __net_exit ip6table_nat_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index fc9f6754028f..32d2da81c52a 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -52,7 +52,7 @@ static int ip6table_raw_table_init(struct net *net) static void __net_exit ip6table_raw_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "raw"); } static void __net_exit ip6table_raw_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 4df14a9bae78..3dfd8d6ea4b9 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -49,7 +49,7 @@ static int ip6table_security_table_init(struct net *net) static void __net_exit ip6table_security_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "security"); } static void __net_exit ip6table_security_net_exit(struct net *net) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 06f27bea9eed..9c1e896c7b03 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1650,6 +1650,35 @@ void *xt_unregister_table(struct xt_table *table) return private; } EXPORT_SYMBOL_GPL(xt_unregister_table); + +/** + * xt_unregister_table_pre_exit - pre-shutdown unregister of a table + * @net: network namespace + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Unregisters the specified netfilter table from the given network namespace + * and also unregisters the hooks from netfilter core: no new packets will be + * processed. + */ +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) +{ + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t; + + mutex_lock(&xt[af].mutex); + list_for_each_entry(t, &xt_net->tables[af], list) { + if (strcmp(t->name, name) == 0) { + mutex_unlock(&xt[af].mutex); + + if (t->ops) /* nat table registers with nat core, t->ops is NULL. */ + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; + } + } + mutex_unlock(&xt[af].mutex); +} +EXPORT_SYMBOL(xt_unregister_table_pre_exit); #endif #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From d338693d778579b676a61346849bebd892427158 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:16 +0200 Subject: netfilter: x_tables: unregister the templates first When the module is going away we need to zap the template first. Else there is a small race window where userspace could instantiate a new table after the pernet exit function has removed the current table. Fixes: fdacd57c79b7 ("netfilter: x_tables: never register tables by default") Reported-by: Tristan Madani Reviewed-by: Tristan Madani Closes: https://lore.kernel.org/netfilter-devel/20260429175613.1459342-1-tristmd@gmail.com/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arptable_filter.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/iptable_security.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 393d9a8c7739..382345567a60 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -82,8 +82,8 @@ static int __init arptable_filter_init(void) static void __exit arptable_filter_fini(void) { - unregister_pernet_subsys(&arptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); kfree(arpfilter_ops); } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index b2fbd9651d61..0dea754a9120 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -101,8 +101,8 @@ static int __init iptable_filter_init(void) static void __exit iptable_filter_fini(void) { - unregister_pernet_subsys(&iptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index a99e61996197..4d3b12492308 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -135,8 +135,8 @@ static int __init iptable_mangle_init(void) static void __exit iptable_mangle_fini(void) { - unregister_pernet_subsys(&iptable_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 42511721e538..6f7afec7954b 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -100,9 +100,9 @@ static int __init iptable_raw_init(void) static void __exit iptable_raw_fini(void) { + xt_unregister_template(&packet_raw); unregister_pernet_subsys(&iptable_raw_net_ops); kfree(rawtable_ops); - xt_unregister_template(&packet_raw); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 4646bf6d7d2b..81175c20ccbe 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -89,9 +89,9 @@ static int __init iptable_security_init(void) static void __exit iptable_security_fini(void) { + xt_unregister_template(&security_table); unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); - xt_unregister_template(&security_table); } module_init(iptable_security_init); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index f05a9e4b2c67..cf561919bde8 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -100,8 +100,8 @@ static int __init ip6table_filter_init(void) static void __exit ip6table_filter_fini(void) { - unregister_pernet_subsys(&ip6table_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&ip6table_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index afa4a5703e43..1a758f2bc537 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -128,8 +128,8 @@ static int __init ip6table_mangle_init(void) static void __exit ip6table_mangle_fini(void) { - unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&ip6table_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 32d2da81c52a..923455921c1d 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -98,8 +98,8 @@ static int __init ip6table_raw_init(void) static void __exit ip6table_raw_fini(void) { - unregister_pernet_subsys(&ip6table_raw_net_ops); xt_unregister_template(&packet_raw); + unregister_pernet_subsys(&ip6table_raw_net_ops); kfree(rawtable_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 3dfd8d6ea4b9..c44834d93fc7 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -88,8 +88,8 @@ static int __init ip6table_security_init(void) static void __exit ip6table_security_fini(void) { - unregister_pernet_subsys(&ip6table_security_net_ops); xt_unregister_template(&security_table); + unregister_pernet_subsys(&ip6table_security_net_ops); kfree(sectbl_ops); } -- cgit v1.2.3 From b4597d5fd7d2f8cebfffd40dffb5e003cc78964c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:17 +0200 Subject: netfilter: x_tables: add and use xtables_unregister_table_exit Previous change added xtables_unregister_table_pre_exit to detach the table from the packetpath and to unlink it from the active table list. In case of rmmod, userspace that is doing set/getsockopt for this table will not be able to re-instantiate the table: 1. The larval table has been removed already 2. existing instantiated table is no longer on the xt pernet table list. This adds the second stage helper: unlink the table from the dying list, free the hook ops (if any) and do the audit notification. It replaces xt_unregister_table(). Fixes: fdacd57c79b7 ("netfilter: x_tables: never register tables by default") Reported-by: Tristan Madani Reviewed-by: Tristan Madani Closes: https://lore.kernel.org/netfilter-devel/20260429175613.1459342-1-tristmd@gmail.com/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/arp_tables.c | 9 ++--- net/ipv4/netfilter/ip_tables.c | 9 ++--- net/ipv4/netfilter/iptable_nat.c | 5 ++- net/ipv6/netfilter/ip6_tables.c | 9 ++--- net/ipv6/netfilter/ip6table_nat.c | 5 ++- net/netfilter/x_tables.c | 81 +++++++++++++++++++++++++++++--------- 7 files changed, 83 insertions(+), 37 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 74486714ae20..5a1c5c336fa4 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -308,8 +308,8 @@ struct xt_table *xt_register_table(struct net *net, const struct nf_hook_ops *template_ops, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); -void *xt_unregister_table(struct xt_table *table); void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name); +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name); struct xt_table_info *xt_replace_table(struct xt_table *table, unsigned int num_counters, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index bd348b7bad2c..ad2259678c78 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1501,13 +1501,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len static void __arpt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; + void *loc_cpu_entry; struct arpt_entry *iter; - private = xt_unregister_table(table); - /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) @@ -1515,6 +1513,7 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int arpt_register_table(struct net *net, @@ -1556,7 +1555,7 @@ int arpt_register_table(struct net *net, void arpt_unregister_table(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_ARP, name); if (table) __arpt_unregister_table(net, table); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 864489928fb5..5cbdb0815857 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1704,12 +1704,10 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ipt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ipt_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1718,6 +1716,7 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ipt_register_table(struct net *net, const struct xt_table *table, @@ -1758,7 +1757,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table, void ipt_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV4, name); if (table) __ipt_unregister_table(net, table); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 8fc4912e790d..a0df72554025 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -119,8 +119,11 @@ static int iptable_nat_table_init(struct net *net) } ret = ipt_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); + synchronize_rcu(); ipt_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index edf50bc7787e..9d9c3763f2f5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1713,12 +1713,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ip6t_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1727,6 +1725,7 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ip6t_register_table(struct net *net, const struct xt_table *table, @@ -1767,7 +1766,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, void ip6t_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bb8aa3fc42b4..c2394e2c94b5 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -121,8 +121,11 @@ static int ip6table_nat_table_init(struct net *net) } ret = ip6t_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); + synchronize_rcu(); ip6t_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 9c1e896c7b03..4e6708c23922 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -55,6 +55,9 @@ static struct list_head xt_templates[NFPROTO_NUMPROTO]; struct xt_pernet { struct list_head tables[NFPROTO_NUMPROTO]; + + /* stash area used during netns exit */ + struct list_head dead_tables[NFPROTO_NUMPROTO]; }; struct compat_delta { @@ -1634,23 +1637,6 @@ out: } EXPORT_SYMBOL_GPL(xt_register_table); -void *xt_unregister_table(struct xt_table *table) -{ - struct xt_table_info *private; - - mutex_lock(&xt[table->af].mutex); - private = table->private; - list_del(&table->list); - mutex_unlock(&xt[table->af].mutex); - audit_log_nfcfg(table->name, table->af, private->number, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); - kfree(table->ops); - kfree(table); - - return private; -} -EXPORT_SYMBOL_GPL(xt_unregister_table); - /** * xt_unregister_table_pre_exit - pre-shutdown unregister of a table * @net: network namespace @@ -1660,6 +1646,14 @@ EXPORT_SYMBOL_GPL(xt_unregister_table); * Unregisters the specified netfilter table from the given network namespace * and also unregisters the hooks from netfilter core: no new packets will be * processed. + * + * This must be called prior to xt_unregister_table_exit() from the pernet + * .pre_exit callback. After this call, the table is no longer visible to + * the get/setsockopt path. In case of rmmod, module exit path must have + * called xt_unregister_template() prior to unregistering pernet ops to + * prevent re-instantiation of the table. + * + * See also: xt_unregister_table_exit() */ void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) { @@ -1669,6 +1663,7 @@ void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt_net->tables[af], list) { if (strcmp(t->name, name) == 0) { + list_move(&t->list, &xt_net->dead_tables[af]); mutex_unlock(&xt[af].mutex); if (t->ops) /* nat table registers with nat core, t->ops is NULL. */ @@ -1679,6 +1674,50 @@ void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_table_pre_exit); + +/** + * xt_unregister_table_exit - remove a table during namespace teardown + * @net: the network namespace from which to unregister the table + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Completes the unregister process for a table. This must be called from + * the pernet ops .exit callback. This is the second stage after + * xt_unregister_table_pre_exit(). + * + * pair with xt_unregister_table_pre_exit() during namespace shutdown. + * + * Return: the unregistered table or NULL if the table was never + * instantiated. The caller needs to kfree() the table after it + * has removed the family specific matches/targets. + */ +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name) +{ + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *table; + + mutex_lock(&xt[af].mutex); + list_for_each_entry(table, &xt_net->dead_tables[af], list) { + struct nf_hook_ops *ops = NULL; + + if (strcmp(table->name, name) != 0) + continue; + + list_del(&table->list); + + audit_log_nfcfg(table->name, table->af, table->private->number, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + swap(table->ops, ops); + mutex_unlock(&xt[af].mutex); + + kfree(ops); + return table; + } + mutex_unlock(&xt[af].mutex); + + return NULL; +} +EXPORT_SYMBOL_GPL(xt_unregister_table_exit); #endif #ifdef CONFIG_PROC_FS @@ -2125,8 +2164,10 @@ static int __net_init xt_net_init(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { INIT_LIST_HEAD(&xt_net->tables[i]); + INIT_LIST_HEAD(&xt_net->dead_tables[i]); + } return 0; } @@ -2135,8 +2176,10 @@ static void __net_exit xt_net_exit(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { WARN_ON_ONCE(!list_empty(&xt_net->tables[i])); + WARN_ON_ONCE(!list_empty(&xt_net->dead_tables[i])); + } } static struct pernet_operations xt_net_ops = { -- cgit v1.2.3 From b7f0544d86d439cb946515d2ef6a0a75e8626710 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:18 +0200 Subject: netfilter: ebtables: move to two-stage removal scheme Like previous patches for x_tables, follow same pattern in ebtables. We can't reuse xt helpers: ebt_table struct layout is incompatible. table->ops assignment is now done while still holding the ebt mutex to make sure we never expose partially-filled table struct. Fixes: 87663c39f898 ("netfilter: ebtables: do not hook tables by default") Reviewed-by: Tristan Madani Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/bridge/netfilter/ebtables.c | 60 +++++++++++++++++++++-------------- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 741360219552..e6f9e343b41f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -128,8 +128,8 @@ static int __init ebtable_broute_init(void) static void __exit ebtable_broute_fini(void) { - unregister_pernet_subsys(&broute_net_ops); ebt_unregister_template(&broute_table); + unregister_pernet_subsys(&broute_net_ops); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index dacd81b12e62..02b6501c15a5 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -109,8 +109,8 @@ static int __init ebtable_filter_init(void) static void __exit ebtable_filter_fini(void) { - unregister_pernet_subsys(&frame_filter_net_ops); ebt_unregister_template(&frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0f2a8c6118d4..9985a82555c4 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -109,8 +109,8 @@ static int __init ebtable_nat_init(void) static void __exit ebtable_nat_fini(void) { - unregister_pernet_subsys(&frame_nat_net_ops); ebt_unregister_template(&frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aea3e19875c6..3578ffbc14ae 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -42,6 +42,7 @@ struct ebt_pernet { struct list_head tables; + struct list_head dead_tables; }; struct ebt_template { @@ -1162,11 +1163,6 @@ free_newinfo: static void __ebt_unregister_table(struct net *net, struct ebt_table *table) { - mutex_lock(&ebt_mutex); - list_del(&table->list); - mutex_unlock(&ebt_mutex); - audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, ebt_cleanup_entry, net, NULL); if (table->private->nentries) @@ -1267,13 +1263,15 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, for (i = 0; i < num_ops; i++) ops[i].priv = table; - list_add(&table->list, &ebt_net->tables); - mutex_unlock(&ebt_mutex); - table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); - if (ret) + if (ret) { + synchronize_rcu(); __ebt_unregister_table(net, table); + } else { + list_add(&table->list, &ebt_net->tables); + } + mutex_unlock(&ebt_mutex); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REGISTER, GFP_KERNEL); @@ -1339,7 +1337,7 @@ void ebt_unregister_template(const struct ebt_table *t) } EXPORT_SYMBOL(ebt_unregister_template); -static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +void ebt_unregister_table_pre_exit(struct net *net, const char *name) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table *t; @@ -1348,30 +1346,36 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name) list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, name) == 0) { + list_move(&t->list, &ebt_net->dead_tables); mutex_unlock(&ebt_mutex); - return t; + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; } } mutex_unlock(&ebt_mutex); - return NULL; -} - -void ebt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct ebt_table *table = __ebt_find_table(net, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } EXPORT_SYMBOL(ebt_unregister_table_pre_exit); void ebt_unregister_table(struct net *net, const char *name) { - struct ebt_table *table = __ebt_find_table(net, name); + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + struct ebt_table *t; - if (table) - __ebt_unregister_table(net, table); + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &ebt_net->dead_tables, list) { + if (strcmp(t->name, name) == 0) { + list_del(&t->list); + audit_log_nfcfg(t->name, AF_BRIDGE, t->private->nentries, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + __ebt_unregister_table(net, t); + mutex_unlock(&ebt_mutex); + return; + } + } + + mutex_unlock(&ebt_mutex); } /* userspace just supplied us with counters */ @@ -2556,11 +2560,21 @@ static int __net_init ebt_pernet_init(struct net *net) struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); INIT_LIST_HEAD(&ebt_net->tables); + INIT_LIST_HEAD(&ebt_net->dead_tables); return 0; } +static void __net_exit ebt_pernet_exit(struct net *net) +{ + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + + WARN_ON_ONCE(!list_empty(&ebt_net->tables)); + WARN_ON_ONCE(!list_empty(&ebt_net->dead_tables)); +} + static struct pernet_operations ebt_net_ops = { .init = ebt_pernet_init, + .exit = ebt_pernet_exit, .id = &ebt_pernet_id, .size = sizeof(struct ebt_pernet), }; -- cgit v1.2.3 From 92c603fa07bc0d6a17345de3ad7954730b8de44b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:19 +0200 Subject: netfilter: ebtables: close dangling table module init race sashiko reported for a related patch: In modules like iptable_raw.c, [..], if register_pernet_subsys() fails, the rollback might call kfree(rawtable_ops) before [..] During this window, could a concurrent userspace process find the globally visible template, trigger table_init(), [..] The table init functions must always register the template last. Otherwise, set/getsockopt can instantiate a table in a namespace while the required pernet ops (contain the destructor) isn't available. This change is also required in x_tables, handled in followup change. Fixes: 87663c39f898 ("netfilter: ebtables: do not hook tables by default") Reviewed-by: Tristan Madani Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtable_broute.c | 12 +++++------- net/bridge/netfilter/ebtable_filter.c | 12 +++++------- net/bridge/netfilter/ebtable_nat.c | 10 ++++------ 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index e6f9e343b41f..f05c79f215ea 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -112,18 +112,16 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret = ebt_register_template(&broute_table, broute_table_init); + int ret = register_pernet_subsys(&broute_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&broute_net_ops); - if (ret) { - ebt_unregister_template(&broute_table); - return ret; - } + ret = ebt_register_template(&broute_table, broute_table_init); + if (ret) + unregister_pernet_subsys(&broute_net_ops); - return 0; + return ret; } static void __exit ebtable_broute_fini(void) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 02b6501c15a5..0fc03b07e62a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,18 +93,16 @@ static struct pernet_operations frame_filter_net_ops = { static int __init ebtable_filter_init(void) { - int ret = ebt_register_template(&frame_filter, frame_filter_table_init); + int ret = register_pernet_subsys(&frame_filter_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_filter_net_ops); - if (ret) { - ebt_unregister_template(&frame_filter); - return ret; - } + ret = ebt_register_template(&frame_filter, frame_filter_table_init); + if (ret) + unregister_pernet_subsys(&frame_filter_net_ops); - return 0; + return ret; } static void __exit ebtable_filter_fini(void) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9985a82555c4..8a10375d8909 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,16 +93,14 @@ static struct pernet_operations frame_nat_net_ops = { static int __init ebtable_nat_init(void) { - int ret = ebt_register_template(&frame_nat, frame_nat_table_init); + int ret = register_pernet_subsys(&frame_nat_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_nat_net_ops); - if (ret) { - ebt_unregister_template(&frame_nat); - return ret; - } + ret = ebt_register_template(&frame_nat, frame_nat_table_init); + if (ret) + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } -- cgit v1.2.3 From 16bc4b6686b2c112c10e67d6b493adc3607256d3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 May 2026 12:07:20 +0200 Subject: netfilter: x_tables: close dangling table module init race Similar to the previous ebtables patch: template add exposes the table to userspace, we must do this last to rnsure the pernet ops are set up (contain the destructors). Fixes: fdacd57c79b7 ("netfilter: x_tables: never register tables by default") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arptable_filter.c | 23 ++++++++++++----------- net/ipv4/netfilter/iptable_filter.c | 23 ++++++++++++----------- net/ipv4/netfilter/iptable_mangle.c | 25 +++++++++++++------------ net/ipv4/netfilter/iptable_raw.c | 22 +++++++++++----------- net/ipv4/netfilter/iptable_security.c | 23 ++++++++++++----------- net/ipv6/netfilter/ip6table_filter.c | 22 +++++++++++----------- net/ipv6/netfilter/ip6table_mangle.c | 23 ++++++++++++----------- net/ipv6/netfilter/ip6table_raw.c | 20 ++++++++++---------- net/ipv6/netfilter/ip6table_security.c | 23 ++++++++++++----------- 9 files changed, 105 insertions(+), 99 deletions(-) diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 382345567a60..370b635e3523 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -58,25 +58,26 @@ static struct pernet_operations arptable_filter_net_ops = { static int __init arptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - arptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); - if (IS_ERR(arpfilter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(arpfilter_ops)) return PTR_ERR(arpfilter_ops); - } ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + arptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(arpfilter_ops); - return ret; + unregister_pernet_subsys(&arptable_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(arpfilter_ops); return ret; } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 0dea754a9120..672d7da1071d 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -77,26 +77,27 @@ static struct pernet_operations iptable_filter_net_ops = { static int __init iptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - iptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + iptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&iptable_filter_net_ops); + goto err_free; } return 0; +err_free: + kfree(filter_ops); + return ret; } static void __exit iptable_filter_fini(void) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4d3b12492308..13d25d9a4610 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -111,25 +111,26 @@ static struct pernet_operations iptable_mangle_net_ops = { static int __init iptable_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - iptable_mangle_table_init); - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); - ret = PTR_ERR(mangle_ops); - return ret; - } + if (IS_ERR(mangle_ops)) + return PTR_ERR(mangle_ops); ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + iptable_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&iptable_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 6f7afec7954b..2745c22f4034 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -77,24 +77,24 @@ static int __init iptable_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, - iptable_raw_table_init); - if (ret < 0) - return ret; - rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, + iptable_raw_table_init); if (ret < 0) { - xt_unregister_template(table); - kfree(rawtable_ops); - return ret; + unregister_pernet_subsys(&iptable_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 81175c20ccbe..491894511c54 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -65,25 +65,26 @@ static struct pernet_operations iptable_security_net_ops = { static int __init iptable_security_init(void) { - int ret = xt_register_template(&security_table, - iptable_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + iptable_security_table_init); if (ret < 0) { - xt_unregister_template(&security_table); - kfree(sectbl_ops); - return ret; + unregister_pernet_subsys(&iptable_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index cf561919bde8..b074fc477676 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -76,25 +76,25 @@ static struct pernet_operations ip6table_filter_net_ops = { static int __init ip6table_filter_init(void) { - int ret = xt_register_template(&packet_filter, - ip6table_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&ip6table_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, ip6table_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&ip6table_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(filter_ops); return ret; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 1a758f2bc537..e6ee036a9b2c 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -104,25 +104,26 @@ static struct pernet_operations ip6table_mangle_net_ops = { static int __init ip6table_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - ip6table_mangle_table_init); - - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); + if (IS_ERR(mangle_ops)) return PTR_ERR(mangle_ops); - } ret = register_pernet_subsys(&ip6table_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + ip6table_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&ip6table_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 923455921c1d..3b161ee875bc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -75,24 +75,24 @@ static int __init ip6table_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, ip6table_raw_table_init); - if (ret < 0) - return ret; - /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&ip6table_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, ip6table_raw_table_init); if (ret < 0) { - kfree(rawtable_ops); - xt_unregister_template(table); - return ret; + unregister_pernet_subsys(&ip6table_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index c44834d93fc7..4bd5d97b8ab6 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -64,25 +64,26 @@ static struct pernet_operations ip6table_security_net_ops = { static int __init ip6table_security_init(void) { - int ret = xt_register_template(&security_table, - ip6table_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + ip6table_security_table_init); if (ret < 0) { - kfree(sectbl_ops); - xt_unregister_template(&security_table); - return ret; + unregister_pernet_subsys(&ip6table_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } -- cgit v1.2.3 From 27414ff1b287ea9a2a11675149ec28e05539f3cc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 May 2026 11:19:22 +0200 Subject: netfilter: bridge: eb_tables: close module init race sashiko reports for unrelated patch: Does the core ebtables initialization in ebtables.c suffer from a similar race? Once nf_register_sockopt() completes, the sockopts are exposed globally. sockopt has to be registered last, just like in ip/ip6/arptables. Fixes: 5b53951cfc85 ("netfilter: ebtables: use net_generic infra") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3578ffbc14ae..b9f4daac09af 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2583,19 +2583,20 @@ static int __init ebtables_init(void) { int ret; - ret = xt_register_target(&ebt_standard_target); + ret = register_pernet_subsys(&ebt_net_ops); if (ret < 0) return ret; - ret = nf_register_sockopt(&ebt_sockopts); + + ret = xt_register_target(&ebt_standard_target); if (ret < 0) { - xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } - ret = register_pernet_subsys(&ebt_net_ops); + ret = nf_register_sockopt(&ebt_sockopts); if (ret < 0) { - nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } -- cgit v1.2.3 From dcb0f9aefdd604d36710fda53c25bd7cf4a3e37a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 May 2026 13:00:28 +0200 Subject: netfilter: nf_conntrack_expect: restore helper propagation via expectation A recent series to fix expectations broke helper propagation via expectation, this mechanism is used by the sip and h323 helper. This also propagates the conntrack helper to expected connections. I changed semantics of exp->helper which now tells us the actual helper that created the expectation. Add an explicit assign_helper field to expectations for this purpose and update helpers to use it. Restore this feature for userspace conntrack helper via ctnetlink nfqueue integration so it is again possible to attach a helper to an expectation, where it makes sense. This is not restored via ctnetlink expectation creation as there is no client for such feature. Use the expectation layer 4 protocol number for the helper lookup for consistency. Make sure the expectation using this helper propagation mechanism also go away when the helper is unregistered. Fixes: 9c42bc9db90a ("netfilter: nf_conntrack_expect: honor expectation helper field") Fixes: 917b61fa2042 ("netfilter: ctnetlink: ignore explicit helper on new expectations") Reported-by: Ilya Maximets Tested-by: Ilya Maximets Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 5 ++++- net/netfilter/nf_conntrack_broadcast.c | 1 + net/netfilter/nf_conntrack_core.c | 7 +++++-- net/netfilter/nf_conntrack_expect.c | 1 + net/netfilter/nf_conntrack_h323_main.c | 12 ++++++------ net/netfilter/nf_conntrack_helper.c | 5 +++++ net/netfilter/nf_conntrack_netlink.c | 18 ++++++++++++++++-- net/netfilter/nf_conntrack_sip.c | 2 +- 8 files changed, 39 insertions(+), 12 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index e9a8350e7ccf..80f50fd0f7ad 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -45,9 +45,12 @@ struct nf_conntrack_expect { void (*expectfn)(struct nf_conn *new, struct nf_conntrack_expect *this); - /* Helper to assign to new connection */ + /* Helper that created this expectation */ struct nf_conntrack_helper __rcu *helper; + /* Helper to assign to new connection */ + struct nf_conntrack_helper __rcu *assign_helper; + /* The conntrack of the master connection */ struct nf_conn *master; diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 4f39bf7c843f..75e53fde6b29 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -72,6 +72,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, NULL); write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b08189226320..8ba5b22a1eef 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1811,14 +1811,17 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl)); if (exp) { + struct nf_conntrack_helper *assign_helper; + /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ ct->master = exp->master; - if (exp->helper) { + assign_helper = rcu_dereference(exp->assign_helper); + if (assign_helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) - rcu_assign_pointer(help->helper, exp->helper); + rcu_assign_pointer(help->helper, assign_helper); } #ifdef CONFIG_NF_CONNTRACK_MARK diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 24d0576d84b7..8e943efbdf0a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -344,6 +344,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, helper = rcu_dereference(help->helper); rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, NULL); write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3f5c50455b71..b2fe6554b9cf 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); + rcu_assign_pointer(exp->assign_helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a715304a53d8..b594cd244fe1 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -400,6 +400,11 @@ static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); + if (this == me) + return true; + + this = rcu_dereference_protected(exp->assign_helper, + lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eda5fe4a75c8..d7209d124111 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2634,6 +2634,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, + const struct nf_conntrack_helper *assign_helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2860,6 +2861,7 @@ static int ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report) { + struct nf_conntrack_helper *assign_helper = NULL; struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; struct nf_conntrack_expect *exp; @@ -2875,8 +2877,18 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; + if (cda[CTA_EXPECT_HELP_NAME]) { + const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); + + assign_helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + tuple.dst.protonum); + if (!assign_helper) + return -EOPNOTSUPP; + } + exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - &tuple, &mask); + assign_helper, &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3515,6 +3527,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, + const struct nf_conntrack_helper *assign_helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { @@ -3568,6 +3581,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->zone = ct->zone; #endif rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, assign_helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; @@ -3623,7 +3637,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, NULL, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1eb55907d470..d24bfa9e8234 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1383,7 +1383,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); -- cgit v1.2.3 From d8ef54c83ad70b81735b506431affadd2f720aa1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 May 2026 23:57:55 +0200 Subject: netfilter: ctnetlink: check tuple and mask in expectations created via nfqueue Ensure the expectation tuple and mask attributes are present in netlink message, otherwise null-ptr-deref is possible. Fixes: bd0779370588 ("netfilter: nfnetlink_queue: allow to attach expectations to conntracks") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d7209d124111..befa7e83ee49 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2872,6 +2872,9 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; + if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK]) + return -EINVAL; + err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda, ct, &tuple, &mask); if (err < 0) -- cgit v1.2.3 From eb6317739b1ea3ab28791e1f91b24781905fa815 Mon Sep 17 00:00:00 2001 From: Li Xiasong Date: Thu, 7 May 2026 22:04:22 +0800 Subject: netfilter: nf_conntrack_sip: get helper before allocating expectation process_register_request() allocates an expectation and then checks whether a conntrack helper is available. If helper lookup fails, the function returns early and the allocated expectation is left behind. Reorder the code to fetch and validate helper before calling nf_ct_expect_alloc(). This keeps the logic simpler and removes the leak path while preserving existing behavior. Fixes: e14575fa7529 ("netfilter: nf_conntrack: use rcu accessors where needed") Cc: stable@vger.kernel.org Signed-off-by: Li Xiasong Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d24bfa9e8234..e69941f1a101 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1366,6 +1366,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, goto store_cseq; } + helper = rcu_dereference(nfct_help(ct)->helper); + if (!helper) + return NF_DROP; + exp = nf_ct_expect_alloc(ct); if (!exp) { nf_ct_helper_log(skb, ct, "cannot alloc expectation"); @@ -1376,10 +1380,6 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, if (sip_direct_signalling) saddr = &ct->tuplehash[!dir].tuple.src.u3; - helper = rcu_dereference(nfct_help(ct)->helper); - if (!helper) - return NF_DROP; - nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; -- cgit v1.2.3 From 19f94b6fee75b3ef7fbc06f3745b9a771a8a19a4 Mon Sep 17 00:00:00 2001 From: Li Xiasong Date: Thu, 7 May 2026 22:04:23 +0800 Subject: netfilter: nft_ct: fix missing expect put in obj eval nft_ct_expect_obj_eval() allocates an expectation and may call nf_ct_expect_related(), but never drops its local reference. Add nf_ct_expect_put(exp) before return to balance allocation. Fixes: 857b46027d6f ("netfilter: nft_ct: add ct expectations support") Cc: stable@vger.kernel.org Signed-off-by: Li Xiasong Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 60ee8d932fcb..fa2cc556331c 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1334,6 +1334,8 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj, if (nf_ct_expect_related(exp, 0) != 0) regs->verdict.code = NF_DROP; + + nf_ct_expect_put(exp); } static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = { -- cgit v1.2.3 From 1f91d0d5827512816789f74f4d72d16269bde1ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 May 2026 14:16:59 -1000 Subject: sched_ext: Fix !CONFIG_EXT_SUB_SCHED build warnings W=1 with CONFIG_EXT_SUB_SCHED=n flags 'err_msg' uninitialized and 'err_free_lb_resched' unused. Initialize err_msg and gate the label. Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 48b4834c7027..f4e2db8e56be 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5584,7 +5584,7 @@ static void refresh_watchdog(void) static s32 scx_link_sched(struct scx_sched *sch) { - const char *err_msg; + const char *err_msg = ""; s32 ret = 0; scoped_guard(raw_spinlock_irq, &scx_sched_lock) { @@ -6652,8 +6652,10 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, #endif /* CONFIG_EXT_SUB_SCHED */ return sch; +#ifdef CONFIG_EXT_SUB_SCHED err_free_lb_resched: free_cpumask_var(sch->bypass_lb_resched_cpumask); +#endif err_free_lb_cpumask: free_cpumask_var(sch->bypass_lb_donee_cpumask); err_stop_helper: -- cgit v1.2.3 From 307abfac04a254c09c5705d816b33354acee97a0 Mon Sep 17 00:00:00 2001 From: Jianpeng Chang Date: Fri, 8 May 2026 09:56:36 +0900 Subject: kprobes: skip non-symbol addresses in kprobe_add_ksym_blacklist() When kprobe_add_area_blacklist() iterates through a section like .kprobes.text, the start address may not correspond to a named symbol. On ARM64 with CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS=y (introduced by commit baaf553d3bc3 ("arm64: Implement HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS")), the compiler flag -fpatchable-function-entry=4,2 inserts 2 NOPs before each function entry point for ftrace call_ops. These pre-function NOPs sit at the section base address, before the first named function symbol. The compiler emits a $x mapping symbol at offset 0x00 to mark the start of code, but find_kallsyms_symbol() ignores mapping symbols. Without CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS (e.g. defconfig), no pre-function NOPs are inserted, the first function starts at offset 0x00, and the bug does not trigger. This only affects modules that have a .kprobes.text section (i.e. those using the __kprobes annotation). Modules using NOKPROBE_SYMBOL() instead (like kretprobe_example.ko) blacklist exact function addresses via the _kprobe_blacklist section and are not affected. For kprobe_example.ko on ARM64 with -fpatchable-function-entry=4,2, the .kprobes.text section layout is: offset 0x00: $x + 2 NOPs (mapping symbol + ftrace preamble) offset 0x08: handler_post (64 bytes) offset 0x50: handler_pre (68 bytes) kprobe_add_area_blacklist() starts iterating from the section base address (offset 0x00), which only has the $x mapping symbol. kprobe_add_ksym_blacklist() then calls kallsyms_lookup_size_offset() for this address, which goes through: kallsyms_lookup_size_offset() -> module_address_lookup() -> find_kallsyms_symbol() find_kallsyms_symbol() scans all module symbols to find the closest preceding symbol. Since no named text symbol exists at offset 0x00, find_kallsyms_symbol() picks __UNIQUE_ID_vermagic (a .modinfo symbol whose address is in the temporary image) as the "best" match. The computed "size" = next_text_symbol - modinfo_symbol spans across these two unrelated memory regions, creating a blacklist entry with a bogus range of tens of terabytes. Whether this causes a visible failure depends on address randomization, here is what happens on Raspberry Pi 4/5: - On RPi5, the bogus size was ~35 TB. start + size stayed within 64-bit range, so the blacklist entry covered the entire kernel text. register_kprobe() in the module's own init function failed with -EINVAL. - On RPi4, the bogus size was ~75 TB. start + size overflowed 64 bits and wrapped to a small address near zero. The range check (addr >= start && addr < end) then failed because end wrapped around, so the bogus entry was accidentally harmless and kprobes worked by luck. The same bug exists on both machines, but randomization determines whether the integer overflow masks it or not. Fix this by adding notrace to the __kprobes macro. Functions in .kprobes.text are kprobe infrastructure handlers that should never be traced by ftrace. With notrace, the compiler stops inserting them and the non-symbol gap at the section start disappears entirely. Link: https://lore.kernel.org/all/20260506012706.2785785-1-jianpeng.chang.cn@windriver.com/ Fixes: baaf553d3bc3 ("arm64: Implement HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS") Signed-off-by: Jianpeng Chang Signed-off-by: Masami Hiramatsu (Google) --- include/asm-generic/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h index 060eab094e5a..5290a2b2e15a 100644 --- a/include/asm-generic/kprobes.h +++ b/include/asm-generic/kprobes.h @@ -14,7 +14,7 @@ static unsigned long __used \ _kbl_addr_##fname = (unsigned long)fname; # define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) /* Use this to forbid a kprobes attach on very low level functions */ -# define __kprobes __section(".kprobes.text") +# define __kprobes notrace __section(".kprobes.text") # define nokprobe_inline __always_inline #else # define NOKPROBE_SYMBOL(fname) -- cgit v1.2.3 From ef5581bb30efb939cc2bf093475c6cc85258e5cd Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 8 May 2026 09:56:36 +0900 Subject: test_kprobes: clear kprobes between test runs Running the kprobes sanity tests twice makes all tests fail and eventually crashes the kernel. [root@martin-riscv-1 ~]# echo 1 > /sys/kernel/debug/kunit/kprobes_test/run ... # Totals: pass:5 fail:0 skip:0 total:5 ok 1 kprobes_test [root@martin-riscv-1 ~]# echo 1 > /sys/kernel/debug/kunit/kprobes_test/run ... # test_kprobe: EXPECTATION FAILED at lib/tests/test_kprobes.c:64 Expected 0 == register_kprobe(&kp), but register_kprobe(&kp) == -22 (0xffffffffffffffea) ... Unable to handle kernel paging request ... The testsuite defines several kprobes and kretprobes as static variables that are preserved across test runs. After register_kprobe and unregister_kprobe, a kprobe contains some leftover data that must be cleared before the kprobe can be registered again. The tests are setting symbol_name to define the probe location. Address and flags must be cleared. The existing code clears some of the probes between subsequent tests, but not between two test runs. The leftover data from a previous test run makes the registrations fail in the next run. Move the cleanups for all kprobes into kprobes_test_init, this function is called before each single test (including the first test of a test run). Link: https://lore.kernel.org/all/20260507134615.1010905-1-martin@kaiser.cx/ Fixes: e44e81c5b90f ("kprobes: convert tests to kunit") Signed-off-by: Martin Kaiser Signed-off-by: Masami Hiramatsu (Google) --- lib/tests/test_kprobes.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/tests/test_kprobes.c b/lib/tests/test_kprobes.c index b7582010125c..06e729e4de05 100644 --- a/lib/tests/test_kprobes.c +++ b/lib/tests/test_kprobes.c @@ -12,6 +12,12 @@ #define div_factor 3 +#define KP_CLEAR(_kp) \ +do { \ + (_kp).addr = NULL; \ + (_kp).flags = 0; \ +} while (0) + static u32 rand1, preh_val, posth_val; static u32 (*target)(u32 value); static u32 (*recursed_target)(u32 value); @@ -125,10 +131,6 @@ static void test_kprobes(struct kunit *test) current_test = test; - /* addr and flags should be cleard for reusing kprobe. */ - kp.addr = NULL; - kp.flags = 0; - KUNIT_EXPECT_EQ(test, 0, register_kprobes(kps, 2)); preh_val = 0; posth_val = 0; @@ -226,9 +228,6 @@ static void test_kretprobes(struct kunit *test) struct kretprobe *rps[2] = {&rp, &rp2}; current_test = test; - /* addr and flags should be cleard for reusing kprobe. */ - rp.kp.addr = NULL; - rp.kp.flags = 0; KUNIT_EXPECT_EQ(test, 0, register_kretprobes(rps, 2)); krph_val = 0; @@ -290,8 +289,6 @@ static void test_stacktrace_on_kretprobe(struct kunit *test) unsigned long myretaddr = (unsigned long)__builtin_return_address(0); current_test = test; - rp3.kp.addr = NULL; - rp3.kp.flags = 0; /* * Run the stacktrace_driver() to record correct return address in @@ -352,8 +349,6 @@ static void test_stacktrace_on_nested_kretprobe(struct kunit *test) struct kretprobe *rps[2] = {&rp3, &rp4}; current_test = test; - rp3.kp.addr = NULL; - rp3.kp.flags = 0; //KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver()); @@ -367,6 +362,18 @@ static void test_stacktrace_on_nested_kretprobe(struct kunit *test) static int kprobes_test_init(struct kunit *test) { + KP_CLEAR(kp); + KP_CLEAR(kp2); + KP_CLEAR(kp_missed); +#ifdef CONFIG_KRETPROBES + KP_CLEAR(rp.kp); + KP_CLEAR(rp2.kp); +#ifdef CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE + KP_CLEAR(rp3.kp); + KP_CLEAR(rp4.kp); +#endif +#endif + target = kprobe_target; target2 = kprobe_target2; recursed_target = kprobe_recursed_target; -- cgit v1.2.3 From 41337097f2823e99478d7cbe68d4893582ed0b18 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 6 May 2026 21:21:52 +0800 Subject: riscv: cpufeature: Use pre-defined ISA ext macros to index isa2hwcap We have pre-defined ISA extension macros, here use those macros to replace a magic number for isa2hwcap definition and some array indexing for isa2hwcap access. This doesn't change the original functionality, just improve the code maintainability and readability. Signed-off-by: Hui Wang Link: https://patch.msgid.link/20260506132152.53239-1-hui.wang@canonical.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/cpufeature.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3dc4c0d31550..f46aa5602d74 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -1102,16 +1102,16 @@ early_param("riscv_isa_fallback", riscv_isa_fallback_setup); void __init riscv_fill_hwcap(void) { char print_str[NUM_ALPHA_EXTS + 1]; - unsigned long isa2hwcap[26] = {0}; + unsigned long isa2hwcap[RISCV_ISA_EXT_BASE] = {0}; int i, j; - isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; - isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; - isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A; - isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F; - isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D; - isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C; - isa2hwcap['v' - 'a'] = COMPAT_HWCAP_ISA_V; + isa2hwcap[RISCV_ISA_EXT_i] = COMPAT_HWCAP_ISA_I; + isa2hwcap[RISCV_ISA_EXT_m] = COMPAT_HWCAP_ISA_M; + isa2hwcap[RISCV_ISA_EXT_a] = COMPAT_HWCAP_ISA_A; + isa2hwcap[RISCV_ISA_EXT_f] = COMPAT_HWCAP_ISA_F; + isa2hwcap[RISCV_ISA_EXT_d] = COMPAT_HWCAP_ISA_D; + isa2hwcap[RISCV_ISA_EXT_c] = COMPAT_HWCAP_ISA_C; + isa2hwcap[RISCV_ISA_EXT_v] = COMPAT_HWCAP_ISA_V; if (!acpi_disabled) { riscv_fill_hwcap_from_isa_string(isa2hwcap); -- cgit v1.2.3 From 9228169d2ae055ed09a163887fc59a710a5eb73b Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 8 May 2026 05:17:43 +0000 Subject: cpufreq/amd-pstate: Grab "amd_pstate_driver_lock" when toggling dynamic_epp Concurrently changing driver mode and dynamic_epp with: echo passive > /sys/devices/system/cpu/amd_pstate/status& echo disable > /sys/devices/system/cpu/amd_pstate/dynamic_epp& hits the WARN_ON_ONCE() in static_key_disable_cpuslocked() and hangs the system since both sysfs writes are trying to do amd_pstate_change_driver_mode() without any synchronization. Grab the "amd_pstate_driver_lock" mutex when modifying "dynamic_epp" to prevent the two paths from racing with each other. Add a lockdep assertion for "amd_pstate_driver_lock" in amd_pstate_change_driver_mode() to formalize the dependency. Since "cppc_mode" is stable under "amd_pstate_driver_lock", only reload the driver when in "AMD_PSTATE_ACTIVE" mode and reject all writes when in passive or guided mode, or if the driver is not loaded, since only active mode operates on EPP. Fixes: e30ca6dd5345 ("cpufreq/amd-pstate: Add dynamic energy performance preference") Reviewed-by: Mario Limonciello Signed-off-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260508051748.10484-2-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 453084c67327..5e2d28c73294 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1707,6 +1707,8 @@ static int amd_pstate_change_driver_mode(int mode) { int ret; + lockdep_assert_held(&amd_pstate_driver_lock); + ret = amd_pstate_unregister_driver(0); if (ret) return ret; @@ -1821,6 +1823,13 @@ static ssize_t dynamic_epp_store(struct device *a, struct device_attribute *b, if (ret) return ret; + guard(mutex)(&amd_pstate_driver_lock); + + if (cppc_state != AMD_PSTATE_ACTIVE) { + pr_debug("dynamic_epp can only be toggled in active mode\n"); + return -EINVAL; + } + if (dynamic_epp == enabled) return -EINVAL; -- cgit v1.2.3 From 87d2a8dec0f02b200eb3527da0ab11ba4d4e7deb Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 8 May 2026 05:17:44 +0000 Subject: cpufreq/amd-pstate: Return -ENOMEM on failure to allocate profile_name Failure to allocate profile name will return -EINVAL from platform_profile_register() while in fact, it is a failure to allocate memory for the profile_name string. Return -ENOMEM when kasprintf() fails to allocate profile_name string. Fixes: e30ca6dd5345 ("cpufreq/amd-pstate: Add dynamic energy performance preference") Reviewed-by: Mario Limonciello Signed-off-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260508051748.10484-3-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 5e2d28c73294..72514be2f30f 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1291,6 +1291,8 @@ static int amd_pstate_set_dynamic_epp(struct cpufreq_policy *policy) return ret; cpudata->profile_name = kasprintf(GFP_KERNEL, "amd-pstate-epp-cpu%d", cpudata->cpu); + if (!cpudata->profile_name) + return -ENOMEM; cpudata->ppdev = platform_profile_register(get_cpu_device(policy->cpu), cpudata->profile_name, -- cgit v1.2.3 From c5eed6ddc757e477f52b3d99bfde9e59975c72ca Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 8 May 2026 05:17:45 +0000 Subject: cpufreq/amd-pstate: Allow writes to dynamic_epp when state isn't modified Writing the current "dynamic_epp" state to sysfs fails with -EINVAL even though the desired result was achieved. Allow writes to "dynamic_epp" that does not modify the state. Fixes: e30ca6dd5345 ("cpufreq/amd-pstate: Add dynamic energy performance preference") Reviewed-by: Mario Limonciello Signed-off-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260508051748.10484-4-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 72514be2f30f..462ddad7bc79 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1832,8 +1832,9 @@ static ssize_t dynamic_epp_store(struct device *a, struct device_attribute *b, return -EINVAL; } + /* Nothing to do */ if (dynamic_epp == enabled) - return -EINVAL; + return count; /* reinitialize with desired dynamic EPP value */ dynamic_epp = enabled; -- cgit v1.2.3 From f3acf7ff113007557538b278ccb0e4ab7ae513ea Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 8 May 2026 05:17:46 +0000 Subject: cpufreq/amd-pstate: Reorder notifier unregistration and floor perf reset An active power supply notifier can race with amd_pstate_epp_cpu_exit() trying to reset the floor perf and can overwrite the floor perf set in MSR_AMD_CPPC_REQ. Unregister the notifier before setting the floor perf to prevent the rare race. Fixes: e30ca6dd5345 ("cpufreq/amd-pstate: Add dynamic energy performance preference") Reviewed-by: Mario Limonciello Signed-off-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260508051748.10484-5-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 462ddad7bc79..175925762a93 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1982,12 +1982,13 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) if (cpudata) { union perf_cached perf = READ_ONCE(cpudata->perf); + if (cpudata->dynamic_epp) + amd_pstate_clear_dynamic_epp(policy); + /* Reset CPPC_REQ MSR to the BIOS value */ amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); amd_pstate_set_floor_perf(policy, cpudata->bios_floor_perf); - if (cpudata->dynamic_epp) - amd_pstate_clear_dynamic_epp(policy); kfree(cpudata); policy->driver_data = NULL; } -- cgit v1.2.3 From caa822d312be54e3fe1a3b52c887e0888e149c12 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 8 May 2026 05:17:47 +0000 Subject: cpufreq/amd-pstate: Use "epp_default_dc" as default when dynamic_epp is disabled If "dynamic_epp" is disabled, the driver initialization and the default EPP selection from sysfs currently sets the EPP based on the power supply state of the system at that time but there is no power supply callbacks registered to toggle it when the power supply state changes. This can lead to faster battery drain on platforms that start off while being plugged to the wall but later move to battery power since the EPP stays at AMD_CPPC_EPP_PERFORMANCE. Use "epp_default_dc" as the default EPP selection when dynamic_epp is disabled, restoring older behavior. On servers, this defaults to AMD_CPPC_EPP_PERFORMANCE and on other platforms, it defaults to AMD_CPPC_EPP_BALANCE_PERFORMANCE. Fixes: e30ca6dd5345 ("cpufreq/amd-pstate: Add dynamic energy performance preference") Reviewed-by: Mario Limonciello Signed-off-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260508051748.10484-6-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 175925762a93..9eb9c3f4e809 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1429,7 +1429,7 @@ ssize_t store_energy_performance_preference(struct cpufreq_policy *policy, if (ret) epp = epp_values[ret]; else - epp = amd_pstate_get_balanced_epp(policy); + epp = cpudata->epp_default_dc; } if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -1954,7 +1954,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) if (dynamic_epp) ret = amd_pstate_set_dynamic_epp(policy); else - ret = amd_pstate_set_epp(policy, amd_pstate_get_balanced_epp(policy)); + ret = amd_pstate_set_epp(policy, cpudata->epp_default_dc); if (ret) goto free_cpudata1; -- cgit v1.2.3 From f9f16835d4dc46113c0a72625ffbf61f1aa95e5c Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 8 May 2026 05:17:48 +0000 Subject: cpufreq/amd-pstate-ut: Drop policy reference before driver switch Recent changes to the EPP unit test tries to perform a driver switch with a cpufreq_policy reference held when the driver is loaded into anything but the active mode which leads to a circular dependency and the unit test hanging indefinitely. Drop the reference before driver switch and grab it back once the driver mode is stabilized for the test. The EPP writes are only possible with CPUFREQ_POLICY_POWERSAVE policy. Temporarily switch the cpudata->policy (while holding the write end of the policy->rwsem) to CPUFREQ_POLICY_POWERSAVE and restore the original policy once tests are done. To ensure the final EPP is correct in case the driver started with CPUFREQ_POLICY_PERFORMANCE, EPP performance is tested last. The __free() based cleanup for cpufreq_policy is lost in the process. Reported-by: Kalpana Shetty Fixes: 7e173bc310d2b ("cpufreq/amd-pstate-ut: Add a unit test for raw EPP") Reviewed-by: Mario Limonciello Signed-off-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260508051748.10484-7-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate-ut.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index aa8a464fab47..13a23dac477d 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -274,20 +274,21 @@ static int amd_pstate_set_mode(enum amd_pstate_mode mode) static int amd_pstate_ut_epp(u32 index) { - struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL; - char *buf __free(cleanup_page) = NULL; static const char * const epp_strings[] = { - "performance", - "balance_performance", - "balance_power", "power", + "balance_power", + "balance_performance", + "performance", }; - struct amd_cpudata *cpudata; + char *buf __free(cleanup_page) = NULL; + struct cpufreq_policy *policy = NULL; enum amd_pstate_mode orig_mode; + struct amd_cpudata *cpudata; + unsigned long orig_policy; bool orig_dynamic_epp; int ret, cpu = 0; - int i; u16 epp; + int i; policy = cpufreq_cpu_get(cpu); if (!policy) @@ -297,6 +298,10 @@ static int amd_pstate_ut_epp(u32 index) orig_mode = amd_pstate_get_status(); orig_dynamic_epp = cpudata->dynamic_epp; + /* Drop reference before potential driver change. */ + cpufreq_cpu_put(policy); + policy = NULL; + /* disable dynamic EPP before running test */ if (cpudata->dynamic_epp) { pr_debug("Dynamic EPP is enabled, disabling it\n"); @@ -311,6 +316,17 @@ static int amd_pstate_ut_epp(u32 index) if (ret) goto out; + policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -ENODEV; + goto out; + } + + down_write(&policy->rwsem); + cpudata = policy->driver_data; + orig_policy = cpudata->policy; + cpudata->policy = CPUFREQ_POLICY_POWERSAVE; + for (epp = 0; epp <= U8_MAX; epp++) { u8 val; @@ -358,6 +374,12 @@ static int amd_pstate_ut_epp(u32 index) ret = 0; out: + if (policy) { + cpudata->policy = orig_policy; + up_write(&policy->rwsem); + cpufreq_cpu_put(policy); + } + if (orig_dynamic_epp) { int ret2; -- cgit v1.2.3 From 7666dbb1bacc4ba522b96740cba7283d243d16e1 Mon Sep 17 00:00:00 2001 From: John Walker Date: Thu, 7 May 2026 17:07:20 -0600 Subject: wifi: cfg80211: advance loop vars in cfg80211_merge_profile() cfg80211_merge_profile() reassembles a Multi-BSSID non-transmitted BSS profile that has been split across multiple consecutive MBSSID elements. Its while-loop calls cfg80211_get_profile_continuation(ie, ielen, mbssid_elem, sub_elem) but never advances mbssid_elem or sub_elem inside the body. Each iteration therefore searches for a continuation that follows the same fixed pair; the helper returns the same next_mbssid; and the same next_sub bytes are memcpy()'d into merged_ie at a growing offset until the buffer fills. Advance both mbssid_elem and sub_elem to the just-consumed continuation so the next call to cfg80211_get_profile_continuation() searches for a further continuation beyond it (or returns NULL when none exists). A specially-crafted malicious beacon can take advantage of this bug to cause the kernel to spend an excessive amount of time in cfg80211_merge_profile (up to as much as 2ms per beacon received), which could theoretically be abused in some way. Cc: stable@vger.kernel.org Fixes: fe806e4992c9 ("cfg80211: support profile split between elements") Signed-off-by: John Walker Link: https://patch.msgid.link/20260507230720.64783-1-johnwalker0@gmail.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 328af43ef832..358cbc9e43d8 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2462,6 +2462,9 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, memcpy(merged_ie + copied_len, next_sub->data, next_sub->datalen); copied_len += next_sub->datalen; + + mbssid_elem = next_mbssid; + sub_elem = next_sub; } return copied_len; -- cgit v1.2.3 From 5e28b7b94408897e41c63477aabc9e1db439bc8c Mon Sep 17 00:00:00 2001 From: "Francis, David" Date: Tue, 28 Apr 2026 19:25:50 +0000 Subject: drm: Set old handle to NULL before prime swap in change_handle There was a potential race condition in change_handle. The ioctl briefly had a single object with two idr entries; a concurrent gem_close could delete the object and remove one of the handles while leaving the other one dangling, which could subsequently be dereferenced for a use-after-free. To fix this, do the same dance that gem_close itself does. (f6cd7daecff5 drm: Release driver references to handle before making it available again) First idr_replace the old handle to NULL. Later, if the prime operations are successful, actually close it. create_tail required a similar dance to avoid a similar problem. (bd46cece51a3 drm/gem: Fix race in drm_gem_handle_create_tail()) It idr_allocs the new handle with NULL, then swaps in the correct object later to avoid races. We don't need to do that here, since the only operations that could race are drm_prime, and change_handle holds the prime lock for the entire duration. v2: cleanups of error paths Signed-off-by: David Francis Co-authored-by: Dave Airlie Reported-by: Puttimet Thammasaeng Tested-by: Vitaly Prosyak Cc: Simona Vetter Cc: stable@vger.kernel.org Cc: Christian Koenig Fixes: 53096728b8910 ("drm: Add DRM prime interface to reassign GEM handle") Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d6424267260b..51a887cc7fd7 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1019,7 +1019,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_change_handle *args = data; - struct drm_gem_object *obj; + struct drm_gem_object *obj, *idrobj; int handle, ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) @@ -1042,8 +1042,29 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->prime.lock); spin_lock(&file_priv->table_lock); + + /* When create_tail allocs an obj idr, it needs to first alloc as NULL, + * then later replace with the correct object. This is not necessary + * here, because the only operations that could race are drm_prime + * bookkeeping, and we hold the prime lock. + */ ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1, GFP_NOWAIT); + + if (ret < 0) { + spin_unlock(&file_priv->table_lock); + goto out_unlock; + } + + idrobj = idr_replace(&file_priv->object_idr, NULL, handle); + if (idrobj != obj) { + idr_replace(&file_priv->object_idr, idrobj, handle); + idr_remove(&file_priv->object_idr, args->new_handle); + spin_unlock(&file_priv->table_lock); + ret = -ENOENT; + goto out_unlock; + } + spin_unlock(&file_priv->table_lock); if (ret < 0) @@ -1055,6 +1076,8 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, if (ret < 0) { spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); + idrobj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(idrobj != NULL); spin_unlock(&file_priv->table_lock); goto out_unlock; } -- cgit v1.2.3 From f03e8583532941b07761c5429de7d50766fa3110 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Sun, 3 May 2026 12:28:58 +0800 Subject: batman-adv: stop caching unowned originator pointers in BAT IV BAT IV keeps the last-hop neighbor address in each neigh_node, but some paths also cache an originator pointer derived from a temporary lookup. That pointer is not owned by the neigh_node and may no longer refer to a live originator entry after purge handling runs. Stop storing the auxiliary originator pointer in the BAT IV neighbor state. When BAT IV needs the neighbor originator data, resolve it from the stored neighbor address and drop the reference again after use. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei [sven: avoid bonding logic for outgoing OGM] Signed-off-by: Sven Eckelmann --- net/batman-adv/bat_iv_ogm.c | 83 ++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 618d1889c04e..74ef7dc2b2f9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -173,19 +173,12 @@ free_orig_node_hash: static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, - struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh) + struct batadv_orig_node *orig_node) { struct batadv_neigh_node *neigh_node; neigh_node = batadv_neigh_node_get_or_create(orig_node, hard_iface, neigh_addr); - if (!neigh_node) - goto out; - - neigh_node->orig_node = orig_neigh; - -out: return neigh_node; } @@ -906,6 +899,31 @@ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node, return sum; } +/** + * batadv_iv_ogm_neigh_ifinfo_sum() - Get bcast_own sum for a last-hop neighbor + * @bat_priv: the bat priv with all the mesh interface information + * @neigh_node: last-hop neighbor of an originator + * + * Return: Number of replied (rebroadcasted) OGMs for the originator currently + * announced by the neighbor. Returns 0 if the neighbor's originator entry is + * not available anymore. + */ +static u8 batadv_iv_ogm_neigh_ifinfo_sum(struct batadv_priv *bat_priv, + const struct batadv_neigh_node *neigh_node) +{ + struct batadv_orig_node *orig_neigh; + u8 sum; + + orig_neigh = batadv_orig_hash_find(bat_priv, neigh_node->addr); + if (!orig_neigh) + return 0; + + sum = batadv_iv_orig_ifinfo_sum(orig_neigh, neigh_node->if_incoming); + batadv_orig_node_put(orig_neigh); + + return sum; +} + /** * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an * originator @@ -975,17 +993,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, } if (!neigh_node) { - struct batadv_orig_node *orig_tmp; - - orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); - if (!orig_tmp) - goto unlock; - neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp); - - batadv_orig_node_put(orig_tmp); + orig_node); if (!neigh_node) goto unlock; } else { @@ -1037,10 +1047,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, */ if (router_ifinfo && neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) { - sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node, - router->if_incoming); - sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node, - neigh_node->if_incoming); + sum_orig = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv, router); + sum_neigh = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv, + neigh_node); if (sum_orig >= sum_neigh) goto out; } @@ -1106,7 +1115,6 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, if (!neigh_node) neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, - orig_neigh_node, orig_neigh_node); if (!neigh_node) @@ -1302,6 +1310,32 @@ out: return ret; } +/** + * batadv_orig_to_direct_router() - get direct next hop neighbor to an orig address + * @bat_priv: the bat priv with all the mesh interface information + * @orig_addr: the originator MAC address to search the best next hop router for + * @if_outgoing: the interface where the OGM should be sent to + * + * Return: A neighbor node which is the best router towards the given originator + * address. Bonding candidates are ignored. + */ +static struct batadv_neigh_node * +batadv_orig_to_direct_router(struct batadv_priv *bat_priv, u8 *orig_addr, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_node *neigh_node; + struct batadv_orig_node *orig_node; + + orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + if (!orig_node) + return NULL; + + neigh_node = batadv_orig_router_get(orig_node, if_outgoing); + batadv_orig_node_put(orig_node); + + return neigh_node; +} + /** * batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing * interface @@ -1372,8 +1406,9 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { - router_router = batadv_orig_router_get(router->orig_node, - if_outgoing); + router_router = batadv_orig_to_direct_router(bat_priv, + router->addr, + if_outgoing); router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); } -- cgit v1.2.3 From ce425dd05d0fe7594930a0fb103634f35ac47bb6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 6 May 2026 22:20:49 +0200 Subject: batman-adv: tp_meter: fix tp_num leak on kmalloc failure When batadv_tp_start() or batadv_tp_init_recv() fail to allocate a new tp_vars object, the previously incremented bat_priv->tp_num counter is never decremented. This causes tp_num to drift upward on each allocation failure. Since only BATADV_TP_MAX_NUM sessions can be started and the count is never reduced for these failed allocations, it causes to an exhaustion of throughput meter sessions. In worst case, no new throughput meter session can be started until the mesh interface is removed. The error handling must decrement tp_num releasing the lock and aborting the creation of an throughput meter session Cc: stable@kernel.org Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 58ca59a2799e..066c76113fc4 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -994,6 +994,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC); if (!tp_vars) { + atomic_dec(&bat_priv->tp_num); spin_unlock_bh(&bat_priv->tp_list_lock); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: %s cannot allocate list elements\n", @@ -1366,8 +1367,10 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, } tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC); - if (!tp_vars) + if (!tp_vars) { + atomic_dec(&bat_priv->tp_num); goto out_unlock; + } ether_addr_copy(tp_vars->other_end, icmp->orig); tp_vars->role = BATADV_TP_RECEIVER; -- cgit v1.2.3 From 4ae1709a314060a196981b344610d023ea841e57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 6 May 2026 22:20:50 +0200 Subject: batman-adv: bla: prevent use-after-free when deleting claims When batadv_bla_del_backbone_claims() removes all claims for a backbone, it does this by dropping the link entry in the hash list. This list entry itself was one of the references which need to be dropped at the same time via batadv_claim_put(). But the batadv_claim_put() must not be done before the last access to the claim object in this function. Otherwise the claim might be freed already by the batadv_claim_release() function before the list entry was dropped. Cc: stable@kernel.org Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 51fe028b9088..8b77dd2ecfa4 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -318,8 +318,8 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) if (claim->backbone_gw != backbone_gw) continue; - batadv_claim_put(claim); hlist_del_rcu(&claim->hash_entry); + batadv_claim_put(claim); } spin_unlock_bh(list_lock); } -- cgit v1.2.3 From cf6b604011591865ae39ac82de8978c1120d17af Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 6 May 2026 22:20:51 +0200 Subject: batman-adv: bla: only purge non-released claims When batadv_bla_purge_claims() goes through the list of claims, it is only traversing the hash list with an rcu_read_lock(). Due to a potential parallel batadv_claim_put(), it can happen that it encounters a claim which was actually in the process of being released+freed by batadv_claim_release(). In this case, backbone_gw is set to NULL before the delayed RCU kfree is started. Calling batadv_bla_claim_get_backbone_gw() is then no longer allowed because it would cause a NULL-ptr derefence. To avoid this, only claims with a valid reference counter must be purged. All others are already taken care of. Cc: stable@kernel.org Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8b77dd2ecfa4..879ab043d57a 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1288,6 +1288,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(claim, head, hash_entry) { + /* only purge claims not currently in the process of being released. + * Such claims could otherwise have a NULL-ptr backbone_gw set because + * they already went through batadv_claim_release() + */ + if (!kref_get_unless_zero(&claim->refcount)) + continue; + backbone_gw = batadv_bla_claim_get_backbone_gw(claim); if (now) goto purge_now; @@ -1313,6 +1320,7 @@ purge_now: claim->addr, claim->vid); skip: batadv_backbone_gw_put(backbone_gw); + batadv_claim_put(claim); } rcu_read_unlock(); } -- cgit v1.2.3 From ba9d20ee9076dac32c371116bacbe72480eb356c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 6 May 2026 22:20:52 +0200 Subject: batman-adv: bla: put backbone reference on failed claim hash insert When batadv_bla_add_claim() fails to insert a new claim into the hash, it leaked a reference to the backbone_gw for which the claim was intended. Call batadv_backbone_gw_put() on the error path to release the reference and avoid leaking the backbone_gw object. Cc: stable@kernel.org Fixes: 3db0decf1185 ("batman-adv: Fix non-atomic bla_claim::backbone_gw access") Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 879ab043d57a..cec11f1251d6 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -723,6 +723,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, if (unlikely(hash_added != 0)) { /* only local changes happened. */ + batadv_backbone_gw_put(backbone_gw); kfree(claim); return; } -- cgit v1.2.3 From f7700a4415afb3ac1767a556094e4ef8bd440e41 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 8 May 2026 20:37:46 +0800 Subject: ublk: fix use-after-free in ublk_cancel_cmd() When ublk_reset_ch_dev() clears io->cmd via ublk_queue_reinit() concurrently with ublk_cancel_cmd(), ublk_cancel_cmd() can read a stale pointer and pass it to io_uring_cmd_done(), causing a use-after-free. Fix by synchronizing the two paths with ubq->cancel_lock: - ublk_cancel_cmd(): read and clear io->cmd under cancel_lock, then call io_uring_cmd_done() on the saved local copy outside the lock. - ublk_reset_ch_dev(): hold cancel_lock across ublk_queue_reinit() so that io->cmd and io->flags are cleared atomically with respect to ublk_cancel_cmd(). Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260508123746.242018-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 57ec900f0ce0..6d13f1481de0 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2411,8 +2411,14 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) { int i; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_queue_reinit(ub, ublk_get_queue(ub, i)); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + /* Sync with ublk_cancel_cmd() */ + spin_lock(&ubq->cancel_lock); + ublk_queue_reinit(ub, ubq); + spin_unlock(&ubq->cancel_lock); + } /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; @@ -2753,6 +2759,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, { struct ublk_io *io = &ubq->ios[tag]; struct ublk_device *ub = ubq->dev; + struct io_uring_cmd *cmd = NULL; struct request *req; bool done; @@ -2775,12 +2782,15 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); - if (!done) + if (!done) { io->flags |= UBLK_IO_FLAG_CANCELED; + cmd = io->cmd; + io->cmd = NULL; + } spin_unlock(&ubq->cancel_lock); - if (!done) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, issue_flags); + if (!done && cmd) + io_uring_cmd_done(cmd, UBLK_IO_RES_ABORT, issue_flags); } /* -- cgit v1.2.3 From 47773fa85e470e9896a22a99ccd5b5930d469680 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Thu, 30 Apr 2026 20:54:47 +0900 Subject: ntfs: use base mft_no when looking up base inode for extent record When the mft record is an extent record, ntfs_may_write_mft_record() looks up its base inode in the icache. The hash key passed to find_inode_nowait() must be the base inode's mft number (na.mft_no, set just above to MREF_LE(m->base_mft_record)), but the code passes @mft_no, the extent record's own number. find_inode_nowait() uses its second argument as the hashval, so the lookup lands in the wrong bucket and almost always returns NULL. ntfs_may_write_mft_record() then returns false and the writeback path (ntfs_write_mft_block()) skips that extent record, leaving the on-disk copy permanently out of sync with the in-memory one. The original ilookup5_nowait() call this conversion replaced used na.mft_no. Restore that. Fixes: 115380f9a2f9 ("ntfs: update mft operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/mft.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 7d989267a82b..ef423303565d 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -833,7 +833,7 @@ static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no, vi = igrab(mft_vi); WARN_ON(vi != mft_vi); } else { - vi = find_inode_nowait(sb, mft_no, ntfs_test_inode_wb, &na); + vi = find_inode_nowait(sb, na.mft_no, ntfs_test_inode_wb, &na); if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated) return false; } -- cgit v1.2.3 From 49c12bee2bb2604e82a997521175b85ca5421685 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Fri, 1 May 2026 02:20:54 +0900 Subject: ntfs: redirty folio when ntfs_write_mft_block() runs out of memory ntfs_write_mft_block() is called by writeback_iter() with the folio locked. When the per-call allocations for @locked_nis or @ref_inos fail, the function returns -ENOMEM directly without unlocking the folio. Any later task that needs the folio's lock then stalls, and the folio's dirty state is silently lost from the writeback iterator's point of view. Use folio_redirty_for_writepage() so the folio remains dirty for a subsequent writeback pass, unlock it, and only then return -ENOMEM so the caller can propagate the error to fsync()/sync_filesystem(). Fixes: f462fdf3d6a4 ("ntfs: reduce stack usage in ntfs_write_mft_block()") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/mft.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index ef423303565d..f5017f337068 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2721,8 +2721,11 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w ntfs_debug("Entering for inode 0x%llx, attribute type 0x%x, folio index 0x%lx.", ni->mft_no, ni->type, folio->index); - if (!locked_nis || !ref_inos) + if (!locked_nis || !ref_inos) { + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); return -ENOMEM; + } /* We have to zero every time due to mmap-at-end-of-file. */ if (folio->index >= (i_size >> folio_shift(folio))) -- cgit v1.2.3 From 618c991cdf031925b09cbb1117f613abdb068680 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Fri, 1 May 2026 02:20:55 +0900 Subject: ntfs: capture mft mirror sync errors in ntfs_write_mft_block() After ntfs_sync_mft_mirror() became able to return real I/O errors, ntfs_write_mft_block() still discards its return value at the call site inside the per-record loop. A failed $MFTMirr write therefore leaves the volume looking clean from the writeback path even though the on-disk mirror is now stale. Capture the return value and feed it into the function's existing @err variable using the same "first error wins" pattern already used on other failure paths. The error is propagated to the caller and, via the existing tail of the function, sets NVolErrors so umount and chkdsk see the volume as inconsistent. Fixes: 115380f9a2f9 ("ntfs: update mft operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/mft.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index f5017f337068..f5186a19dffc 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2843,9 +2843,13 @@ flush_bio: } prev_mft_ofs = mft_ofs; - if (mft_no < vol->mftmirr_size) - ntfs_sync_mft_mirror(vol, mft_no, + if (mft_no < vol->mftmirr_size) { + int sub_err = ntfs_sync_mft_mirror(vol, mft_no, (struct mft_record *)(kaddr + mft_ofs)); + + if (unlikely(sub_err) && !err) + err = sub_err; + } } else if (ref_inos[nr_ref_inos]) nr_ref_inos++; } -- cgit v1.2.3 From 563d0d4c2c1dc1f3f84104c78b388d0490c0086f Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Fri, 1 May 2026 02:20:53 +0900 Subject: ntfs: wait for sync mft writes to complete ntfs_sync_mft_mirror() and write_mft_record_nolock() with @sync set are both documented as synchronous, but neither actually waits for the bio they submit nor inspects bi_status. write_inode() can return success while dirty mft record bytes are still in flight, and bio errors are silently dropped: the volume is not marked with errors and the inode is not redirtied. This breaks fsync()/sync metadata durability. Switch ntfs_sync_mft_mirror() and the @sync path of write_mft_record_nolock() to submit_bio_wait() and propagate the returned error to the caller. Capture ntfs_sync_mft_mirror()'s return value at its call sites in write_mft_record_nolock() so a mirror write failure surfaces too. The @sync parameter only controls the main MFT bio. The !@sync main submission is therefore unchanged and still uses ntfs_bio_end_io() to drop the folio reference taken before submission. The mirror call has always been documented as performing synchronous I/O regardless of @sync, so making it actually block restores the originally intended contract for both @sync and !@sync callers. Note this only fixes the synchronous mirror/main paths reachable from write_mft_record_nolock(). The main MFT write submitted from ntfs_write_mft_block() (the .writepages path) still does not wait for completion or check bi_status; that requires a larger restructuring and is left to a follow-up patch. Fixes: 115380f9a2f9 ("ntfs: update mft operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/mft.c | 63 ++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index f5186a19dffc..68f6fc8b7b62 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -449,7 +449,7 @@ static void ntfs_bio_end_io(struct bio *bio) int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no, struct mft_record *m) { - u8 *kmirr = NULL; + u8 *kmirr; struct folio *folio; unsigned int folio_ofs, lcn_folio_off = 0; int err = 0; @@ -479,6 +479,7 @@ int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no, kmirr = kmap_local_folio(folio, 0) + folio_ofs; /* Copy the mst protected mft record to the mirror. */ memcpy(kmirr, m, vol->mft_record_size); + kunmap_local(kmirr); if (vol->cluster_size_bits > PAGE_SHIFT) { lcn_folio_off = folio->index << PAGE_SHIFT; @@ -490,20 +491,22 @@ int ntfs_sync_mft_mirror(struct ntfs_volume *vol, const u64 mft_no, NTFS_B_TO_SECTOR(vol, NTFS_CLU_TO_B(vol, vol->mftmirr_lcn) + lcn_folio_off + folio_ofs); - if (!bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs)) { + if (bio_add_folio(bio, folio, vol->mft_record_size, folio_ofs)) + err = submit_bio_wait(bio); + else err = -EIO; - bio_put(bio); - goto unlock_folio; - } + bio_put(bio); - bio->bi_end_io = ntfs_bio_end_io; - submit_bio(bio); - /* Current state: all buffers are clean, unlocked, and uptodate. */ + /* + * The in-memory mirror is now valid because we just memcpy()'d the + * mst-protected mft record into it. Mark the folio uptodate even on + * write error so a subsequent read_mapping_folio() does not refetch + * the stale on-disk mirror and overwrite this copy. The error is + * propagated to the caller via @err. + */ folio_mark_uptodate(folio); -unlock_folio: folio_unlock(folio); - kunmap_local(kmirr); folio_put(folio); if (likely(!err)) { ntfs_debug("Done."); @@ -588,20 +591,36 @@ int write_mft_record_nolock(struct ntfs_inode *ni, struct mft_record *m, int syn } /* Synchronize the mft mirror now if not @sync. */ - if (!sync && ni->mft_no < vol->mftmirr_size) - ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); + if (!sync && ni->mft_no < vol->mftmirr_size) { + int sub_err = ntfs_sync_mft_mirror(vol, ni->mft_no, + fixup_m); + if (unlikely(sub_err) && !err) + err = sub_err; + } - folio_get(folio); - bio->bi_private = folio; - bio->bi_end_io = ntfs_bio_end_io; - submit_bio(bio); + if (sync) { + int sub_err = submit_bio_wait(bio); + + bio_put(bio); + if (unlikely(sub_err) && !err) + err = sub_err; + } else { + folio_get(folio); + bio->bi_private = folio; + bio->bi_end_io = ntfs_bio_end_io; + submit_bio(bio); + } offset += vol->cluster_size; i++; } /* If @sync, now synchronize the mft mirror. */ - if (sync && ni->mft_no < vol->mftmirr_size) - ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); + if (sync && ni->mft_no < vol->mftmirr_size) { + int sub_err = ntfs_sync_mft_mirror(vol, ni->mft_no, fixup_m); + + if (unlikely(sub_err) && !err) + err = sub_err; + } kunmap_local(kaddr); if (unlikely(err)) { /* I/O error during writing. This is really bad! */ @@ -617,10 +636,10 @@ put_bio_out: bio_put(bio); err_out: /* - * Current state: all buffers are clean, unlocked, and uptodate. - * The caller should mark the base inode as bad so that no more i/o - * happens. ->drop_inode() will still be invoked so all extent inodes - * and other allocated memory will be freed. + * The caller should mark the base inode as bad so no more I/O + * happens. ->drop_inode() will still be invoked so all extent inodes + * and other allocated memory will be freed. ENOMEM is retried by + * redirtying the mft record below. */ if (err == -ENOMEM) { ntfs_error(vol->sb, -- cgit v1.2.3 From f3c8cd8a63683f53a4e0247ef2b3cdc5132e97fa Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 2 May 2026 09:48:52 +0900 Subject: ntfs: fix copy length in ntfs_bdev_write() for non-page-aligned start This is not a normal data I/O hot path. The single in-tree caller is the $LogFile emptying path used during read-write mount/remount, and the bug only becomes visible on NTFS volumes whose cluster_size is strictly smaller than the kernel's PAGE_SIZE (typically 4 KiB on x86_64). Per Microsoft's format command documentation, NTFS supports allocation unit sizes starting at 512 bytes, so 512 B, 1 KiB and 2 KiB clusters are uncommon but valid on-disk configurations. When cluster_size >= PAGE_SIZE every "start" passed in is page-aligned and the buggy "from != 0" path is never taken. ntfs_bdev_write() splits the write across one or more block-device folios. Inside the loop, "to" is computed as the *end byte offset* within the current page (0..PAGE_SIZE), and "from" is the start byte offset within the page (reset to 0 from the second iteration onward). The copy length should therefore be "to - from", but the current code uses "to" directly: to = min_t(u32, end - offset, PAGE_SIZE); memcpy_to_folio(folio, from, buf + buf_off, to); buf_off += to; When "from != 0" (i.e. "start" is not page-aligned) memcpy_to_folio() copies "from" extra bytes: - it reads "from" bytes past the source buffer into kernel heap; - it writes "from" bytes past the requested range into the next part of the block-device page (or, if "from + to > PAGE_SIZE", past the folio boundary entirely, which trips the VM_BUG_ON inside memcpy_to_folio() on CONFIG_DEBUG_VM=y kernels). "buf_off" is then advanced by the wrong amount, so every subsequent iteration also reads the source buffer at the wrong offset and writes the wrong content to disk. ntfs_empty_logfile() calls ntfs_bdev_write(sb, empty_buf, NTFS_CLU_TO_B(vol, lcn), vol->cluster_size); with empty_buf sized to vol->cluster_size. On a sub-PAGE_SIZE-cluster volume, any $LogFile run whose LCN is not aligned to PAGE_SIZE / cluster_size reaches the non-page-aligned path. The over-copy can read beyond empty_buf and overwrite the sectors following the requested cluster in the block-device page with unrelated kernel heap contents while $LogFile is being emptied. A userspace reducer of the same arithmetic and copy loop confirms the bug under AddressSanitizer: ASan reports a heap-buffer-overflow read past the source buffer for the buggy length, and the fixed version is ASan-clean. Compute the copy length as "to - from" and advance buf_off by the same amount. Fixes: 5218cd102aec ("ntfs: update misc operations") Link: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/format Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/bdev-io.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ntfs/bdev-io.c b/fs/ntfs/bdev-io.c index 67e65c88d681..27d7c2767a33 100644 --- a/fs/ntfs/bdev-io.c +++ b/fs/ntfs/bdev-io.c @@ -97,6 +97,8 @@ int ntfs_bdev_write(struct super_block *sb, void *buf, loff_t start, size_t size idx_end++; for (; idx < idx_end; idx++, from = 0) { + u32 len; + folio = read_mapping_folio(sb->s_bdev->bd_mapping, idx, NULL); if (IS_ERR(folio)) { ntfs_error(sb, "Unable to read %ld page", idx); @@ -105,9 +107,10 @@ int ntfs_bdev_write(struct super_block *sb, void *buf, loff_t start, size_t size offset = (loff_t)idx << PAGE_SHIFT; to = min_t(u32, end - offset, PAGE_SIZE); + len = to - from; - memcpy_to_folio(folio, from, buf + buf_off, to); - buf_off += to; + memcpy_to_folio(folio, from, buf + buf_off, len); + buf_off += len; folio_mark_uptodate(folio); folio_mark_dirty(folio); folio_put(folio); -- cgit v1.2.3 From 6c30af0b203e7d7f63f70df1f2c4694c1e5ed589 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 2 May 2026 09:49:16 +0900 Subject: ntfs: avoid use-after-free of index inode in ntfs_inode_sync_filename() ntfs_inode_sync_filename() walks every FILE_NAME attribute and, for each one that points at a different parent, opens the parent index inode with ntfs_iget() and locks index_ni->mrec_lock. All three error branches (NInoBeingDeleted, ntfs_index_ctx_get failure, ntfs_index_lookup failure) drop the parent reference before unlocking: iput(index_vi); mutex_unlock(&index_ni->mrec_lock); continue; index_ni is NTFS_I(index_vi), so the ntfs_inode (and its mrec_lock) is embedded in the inode allocation. If the parent directory is not held outside the icache - no open dentry, recently evicted from dcache, no other concurrent lookup - ntfs_iget() returns with i_count == 1 and our iput() drops the last reference. evict_inode() then runs and destroy_inode() schedules the slab object for RCU free, while mutex_unlock() on the next line is still touching index_ni->mrec_lock. Swap the order so the mutex is dropped while index_vi is still alive, matching the success path at the bottom of the loop which already unlocks before iput(). Reproduced under KASAN with a debug build that forces ntfs_index_ctx_get() to fail when the parent index inode has been opened with i_count == 1. KASAN reports a slab-use-after-free read on the parent's mrec_lock from mutex_unlock() on the writeback worker: BUG: KASAN: slab-use-after-free in __mutex_unlock_slowpath+0xb5/0x970 Read of size 8 at addr ffff8880014b7598 by task kworker/u8:0/12 Workqueue: writeback wb_workfn (flush-253:0) Call Trace: mutex_unlock ntfs_inode_sync_filename __ntfs_write_inode ntfs_write_inode __writeback_single_inode Allocated by task 103: ntfs_alloc_big_inode ntfs_iget ntfs_lookup __x64_sys_mkdir Freed by task 12: ntfs_free_big_inode i_callback rcu_do_batch Last potentially related work creation: call_rcu destroy_inode evict dispose_list evict_inodes ntfs_inode_sync_filename __ntfs_write_inode The buggy address belongs to the object at ffff8880014b7440 which belongs to the cache ntfs_big_inode_cache of size 1800 The freed object is the parent directory inode itself: allocated by mkdir(2) via ntfs_iget(), then released through call_rcu(i_callback) that destroy_inode() scheduled when evict_inodes() ran from inside ntfs_inode_sync_filename(). Re-running the same workload with mutex_unlock() moved before iput() runs cleanly under KASAN. Fixes: af0db57d4293 ("ntfs: update inode operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 16890d411194..360bebd1ee3f 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2582,8 +2582,8 @@ int ntfs_inode_sync_filename(struct ntfs_inode *ni) mutex_lock_nested(&index_ni->mrec_lock, NTFS_INODE_MUTEX_PARENT); if (NInoBeingDeleted(ni)) { - iput(index_vi); mutex_unlock(&index_ni->mrec_lock); + iput(index_vi); continue; } @@ -2591,8 +2591,8 @@ int ntfs_inode_sync_filename(struct ntfs_inode *ni) if (!ictx) { ntfs_error(sb, "Failed to get index ctx, inode %llu", index_ni->mft_no); - iput(index_vi); mutex_unlock(&index_ni->mrec_lock); + iput(index_vi); continue; } @@ -2601,8 +2601,8 @@ int ntfs_inode_sync_filename(struct ntfs_inode *ni) ntfs_debug("Index lookup failed, inode %llu", index_ni->mft_no); ntfs_index_ctx_put(ictx); - iput(index_vi); mutex_unlock(&index_ni->mrec_lock); + iput(index_vi); continue; } /* Update flags and file size. */ -- cgit v1.2.3 From de08874bae7db49d77085a34b62ebb491ea68e2e Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Mon, 4 May 2026 20:03:14 +0900 Subject: ntfs: match ntfs_resident_attr_min_value_length with $AttrDef Update ntfs_resident_attr_min_value_length() to align with $AttrDef. The $VOLUME_NAME is allowed to have the size of 0. The Windows 11 $AttrDef values are as follows: Attribute Name (ID) Size (Min-Max) Flags $STANDARD_INFORMATION (16) 48-72 Resident $ATTRIBUTE_LIST (32) No Limit Non-resident $FILE_NAME (48) 68-578 Resident, Index $OBJECT_ID (64) 0-256 Resident $SECURITY_DESCRIPTOR (80) No Limit Non-resident $VOLUME_NAME (96) 2-256 Resident $VOLUME_INFORMATION (112) 12-12 Resident $DATA (128) No Limit (None) $INDEX_ROOT (144) No Limit Resident $INDEX_ALLOCATION (160) No Limit Non-resident $BITMAP (176) No Limit Non-resident $REPARSE_POINT (192) 0-16384 Non-resident $EA_INFORMATION (208) 8-8 Resident $EA (224) 0-65536 (None) $LOGGED_UTILITY_STREAM (256) 0-65536 Non-resident Reported-by: woot000 Signed-off-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/attrib.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 97b660eaa00c..7ab3571cc5f9 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -583,24 +583,13 @@ static u32 ntfs_resident_attr_min_value_length(const __le32 type) case AT_STANDARD_INFORMATION: return offsetof(struct standard_information, ver) + sizeof(((struct standard_information *)0)->ver.v1.reserved12); - case AT_ATTRIBUTE_LIST: - return offsetof(struct attr_list_entry, name); case AT_FILE_NAME: - return offsetof(struct file_name_attr, file_name); - case AT_OBJECT_ID: - return sizeof(struct guid); - case AT_SECURITY_DESCRIPTOR: - return sizeof(struct security_descriptor_relative); + return offsetof(struct file_name_attr, file_name) + + sizeof(__le16) * 1; case AT_VOLUME_INFORMATION: return sizeof(struct volume_information); - case AT_INDEX_ROOT: - return sizeof(struct index_root); - case AT_REPARSE_POINT: - return offsetof(struct reparse_point, reparse_data); case AT_EA_INFORMATION: return sizeof(struct ea_information); - case AT_EA: - return offsetof(struct ea_attr, ea_name) + 1; default: return 0; } -- cgit v1.2.3 From 11f7a6d9d722aeb889f6363e4d07e9f0c54f1be1 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Tue, 5 May 2026 22:07:52 +0900 Subject: ntfs: fix default_upcase refcount underflow and UAF on fs_context teardown ntfs_init_fs_context() allocates a fresh ntfs_volume with vol->upcase left as NULL. ntfs_free_fs_context() unconditionally calls ntfs_volume_free() during fs_context teardown, even when ntfs_fill_super() never ran or already cleaned up. ntfs_volume_free() then executes: mutex_lock(&ntfs_lock); if (vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; } When the global default_upcase is also NULL (very first mount attempt, or all prior mounts have released the table), the comparison is NULL == NULL, and ntfs_nr_upcase_users is decremented even though this volume never claimed a reference. ntfs_nr_upcase_users is unsigned long, so the decrement wraps to ULONG_MAX. A subsequent successful mount can then free the shared table while the mounted volume still points at it: 1. ntfs_fill_super() does the temporary ntfs_nr_upcase_users++ at the "Generate the global default upcase table if necessary" block. With the prior wraparound this brings the counter back to 0. 2. If the volume's $UpCase matches the default, the match path does ntfs_nr_upcase_users++ and sets vol->upcase = default_upcase. The counter is now 1. 3. On the success path, !--ntfs_nr_upcase_users evaluates true and default_upcase is kvfree()'d while vol->upcase still points at it. Subsequent upcase comparisons through that mount touch freed memory. This was reproduced with KASAN by closing a fresh fsopen("ntfs") context, then mounting an NTFS image whose $UpCase table matches generate_default_upcase(), and finally doing a case-insensitive lookup. KASAN reports the dangling vol->upcase access: BUG: KASAN: use-after-free in ntfs_collate_names+0x3b4/0x420 Read of size 2 at addr ffff888008d40048 by task init/1 ntfs_collate_names+0x3b4/0x420 ntfs_lookup_inode_by_name+0x1921/0x3130 ntfs_lookup+0x193/0xc40 vfs_statx+0xc7/0x190 vfs_fstatat+0x4b/0xa0 __do_sys_newfstatat+0x92/0xf0 The same QEMU reproducer was rerun after this change with KASAN enabled. It reached "reproducer finished", and the log contained no KASAN, use-after-free, Oops, or panic signatures. Guard each comparison with an explicit vol->upcase non-NULL check so a volume that never took a reference cannot decrement the global users counter. Apply the same guard to the other default_upcase release sites so all cleanup paths follow the same ownership rule: only volumes that actually hold a default_upcase reference may drop one. Fixes: 1e9ea7e04472 ("Revert "fs: Remove NTFS classic"") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 22dc7865eca7..e9de84fb8297 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1671,7 +1671,7 @@ iput_attrdef_err_out: iput_upcase_err_out: vol->upcase_len = 0; mutex_lock(&ntfs_lock); - if (vol->upcase == default_upcase) { + if (vol->upcase && vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; } @@ -1701,7 +1701,7 @@ static void ntfs_volume_free(struct ntfs_volume *vol) * the number of upcase users if we are a user. */ mutex_lock(&ntfs_lock); - if (vol->upcase == default_upcase) { + if (vol->upcase && vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; } @@ -2494,7 +2494,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } vol->upcase_len = 0; mutex_lock(&ntfs_lock); - if (vol->upcase == default_upcase) { + if (vol->upcase && vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; } -- cgit v1.2.3 From c37d9e68b6766f5e28057ee2ea3251b7ffe88e54 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 6 May 2026 20:36:37 +0900 Subject: ntfs: fix variable dereferenced before check ni in ntfs_attr_open() Smatch warnings: ntfs_attr_open() warn: variable dereferenced before check 'ni' Moves the ntfs_debug() call after the NULL pointer checks to ensure safe access to the structure members. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon --- fs/ntfs/attrib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 7ab3571cc5f9..d60d0c686718 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -2913,12 +2913,12 @@ int ntfs_attr_open(struct ntfs_inode *ni, const __le32 type, struct ntfs_inode *base_ni; int err; - ntfs_debug("Entering for inode %lld, attr 0x%x.\n", - (unsigned long long)ni->mft_no, type); - if (!ni || !ni->vol) return -EINVAL; + ntfs_debug("Entering for inode %lld, attr 0x%x.\n", + ni->mft_no, type); + if (NInoAttr(ni)) base_ni = ni->ext.base_ntfs_ino; else -- cgit v1.2.3 From 11816f7131c876b911605a8dc8b0a8835ed0d715 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Wed, 6 May 2026 18:24:48 +0900 Subject: ntfs: fix out-of-bounds write in ntfs_rl_collapse_range() merge path ntfs_rl_collapse_range() merges the run on the left of the collapsed region with the run on its right when they are contiguous. The contiguous check chooses a clamped index when @new_1st_cnt is 0: i = new_1st_cnt == 0 ? 1 : new_1st_cnt; if (ntfs_rle_lcn_contiguous(&new_rl[i - 1], &new_rl[i])) { but the merge itself uses the unclamped value: s_rl = &new_rl[new_1st_cnt - 1]; s_rl->length += s_rl[1].length; When @new_1st_cnt is 0 this computes &new_rl[-1] and writes 8 bytes before the kvcalloc() runlist buffer. The path is reachable through fallocate(FALLOC_FL_COLLAPSE_RANGE) starting at vcn 0 against an attribute whose first run after the collapsed region and the following run are holes. In that case ntfs_rle_lcn_contiguous() returns true because both checked entries are LCN_HOLE, so the merge path is entered with @new_1st_cnt still 0. Such consecutive holes do not occur on a well-formed runlist (NTFS keeps runlists coalesced in memory), so this OOB path is only reachable from a crafted volume. A normal runlist has no element to the left of vcn 0, so the left/right merge is not valid when @new_1st_cnt is 0. Require @new_1st_cnt to be positive before checking or performing the merge. This skips the merge entirely in that case instead of clamping the merge target. The out-of-bounds write can corrupt an adjacent slab object. On a non-KASAN kernel, it is reachable after a crafted NTFS volume has been mounted read-write with the legacy fs/ntfs driver, by a local user that has write access to the crafted file. Fixes: 11ccc9107dc4 ("ntfs: update runlist handling and cluster allocator") Suggested-by: Hyunchul Lee Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/runlist.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index da21dbeaaf66..e7de3d01257e 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -2056,10 +2056,11 @@ struct runlist_element *ntfs_rl_collapse_range(struct runlist_element *dst_rl, i * consists of holes. */ merge_cnt = 0; - i = new_1st_cnt == 0 ? 1 : new_1st_cnt; - if (ntfs_rle_lcn_contiguous(&new_rl[i - 1], &new_rl[i])) { - /* Merge right and left */ - s_rl = &new_rl[new_1st_cnt - 1]; + if (new_1st_cnt > 0 && + ntfs_rle_lcn_contiguous(&new_rl[new_1st_cnt - 1], + &new_rl[new_1st_cnt])) { + /* Merge right and left. */ + s_rl = &new_rl[new_1st_cnt - 1]; s_rl->length += s_rl[1].length; merge_cnt = 1; } -- cgit v1.2.3 From 79629b748ae2f7c19a562b83e8055499765dea89 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Thu, 7 May 2026 11:18:31 +0900 Subject: ntfs: fix out-of-bounds write in ntfs_index_walk_down() ntfs_index_walk_down() used to update the index traversal depth directly before writing parent_pos[] and parent_vcn[]. A malformed directory index with too many child-node levels can therefore advance pindex past MAX_PARENT_VCN and write past the fixed arrays in struct ntfs_index_context, corrupting context state used by later index traversal. Use ntfs_icx_parent_inc() for walk-down transitions so the existing depth limit is enforced before the arrays are updated. Make the helper check the limit before incrementing pindex so failed callers do not leave the context at an out-of-range depth. This is reachable by iterating a crafted NTFS directory after the volume has been mounted, including read-only mounts. The reproducer uses getdents64() on an index root that points to an excessively deep chain of child index blocks. A crafted directory index with a chain of child-node entries reproduced UBSAN array-index-out-of-bounds reports in ntfs_index_walk_down() and subsequent KASAN reports in ntfs_index_walk_up(). With this change, the same image is rejected with "Index is over 32 level deep" and no KASAN or UBSAN report is emitted. Fixes: 0a8ac0c1fa0b ("ntfs: update directory operations") Suggested-by: Namjae Jeon Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/index.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index a547bdcfa456..146e011c1a41 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -677,11 +677,11 @@ static int ntfs_ib_read(struct ntfs_index_context *icx, s64 vcn, struct index_bl static int ntfs_icx_parent_inc(struct ntfs_index_context *icx) { - icx->pindex++; - if (icx->pindex >= MAX_PARENT_VCN) { + if (icx->pindex >= MAX_PARENT_VCN - 1) { ntfs_error(icx->idx_ni->vol->sb, "Index is over %d level deep", MAX_PARENT_VCN); return -EOPNOTSUPP; } + icx->pindex++; return 0; } @@ -1970,6 +1970,7 @@ struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_ind { struct index_entry *entry; struct index_block *ib; + int err; s64 vcn; entry = ie; @@ -1979,14 +1980,20 @@ struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_ind ib = kvzalloc(ictx->block_size, GFP_NOFS); if (!ib) return ERR_PTR(-ENOMEM); - /* down from level zero */ + /* + * Descending from root index (level 0) to the first + * child level. is_in_root == true implies pindex == 0, + * so advance to level 1. + */ + ictx->pindex = 1; ictx->ir = NULL; ictx->ib = ib; - ictx->pindex = 1; ictx->is_in_root = false; } else { /* down from non-zero level */ - ictx->pindex++; + err = ntfs_icx_parent_inc(ictx); + if (err) + return ERR_PTR(err); } ictx->parent_pos[ictx->pindex] = 0; -- cgit v1.2.3 From 3086c49a075f144536db0268ad307e63a8e1dbdb Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Fri, 8 May 2026 00:48:52 +0900 Subject: ntfs: avoid leaking uninitialised bytes in new security descriptors ntfs_sd_add_everyone() builds the on-disk security descriptor for a newly created file by kmalloc()'ing a buffer and then partially filling it in: sd = kmalloc(sd_len, GFP_NOFS); ... sd->revision = 1; sd->control = SE_DACL_PRESENT | SE_SELF_RELATIVE; ... The buffer is then handed to ntfs_attr_add() and persisted as the SECURITY_DESCRIPTOR attribute of the new MFT record. The descriptor covers a relative security descriptor header, two SIDs (owner and group), an ACL header, and a single ACE, but several fields inside those structures are never written before the buffer is committed to disk: - struct security_descriptor_relative @alignment (1 byte) @sacl (4 bytes; SE_SACL_PRESENT is not set but the offset still reaches disk) - struct ntfs_sid (3 instances: owner, group, ACE.sid) identifier_authority.value[0..4] (5 bytes per SID, 15 total - only value[5] is set) - struct ntfs_acl @alignment1 (1 byte) @alignment2 (2 bytes) That is 23 bytes of uninitialised slab memory persisted to disk for every new file or directory the legacy ntfs driver creates. The "+ 4" trailing accounting in sd_len holds ace->sid.sub_authority[0], which the existing code does explicitly write to zero, so it is not part of the leak. Anything later able to read the SECURITY_DESCRIPTOR attribute - the same NTFS volume mounted on Windows or by another NTFS reader, an offline forensics tool, an unprivileged user that ends up with read access to the volume - can recover those bytes. The leak persists for the lifetime of the file on disk, not just the lifetime of the kernel that wrote it. Switch the allocation to kzalloc() so every byte the on-disk descriptor covers is zero before the explicit initialisations run. While there, replace the bare "return -1" allocation-failure path with a proper -ENOMEM so the error reaches userspace as a meaningful errno instead of an unrelated -EPERM. Found by inspection while auditing fs/ntfs new-inode paths. Fixes: af0db57d4293 ("ntfs: update inode operations") Signed-off-by: DaeMyung Kang Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 96c450e62efc..c4f82846c58c 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -344,9 +344,9 @@ static int ntfs_sd_add_everyone(struct ntfs_inode *ni) sd_len = sizeof(struct security_descriptor_relative) + 2 * (sizeof(struct ntfs_sid) + 8) + sizeof(struct ntfs_acl) + sizeof(struct ntfs_ace) + 4; - sd = kmalloc(sd_len, GFP_NOFS); + sd = kzalloc(sd_len, GFP_NOFS); if (!sd) - return -1; + return -ENOMEM; sd->revision = 1; sd->control = SE_DACL_PRESENT | SE_SELF_RELATIVE; -- cgit v1.2.3 From 786a45757dcdf8f2beb9d4a6db605db16c18b2b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 28 Apr 2026 21:59:52 +0100 Subject: x86/kexec: Push kjump return address even for non-kjump kexec The version of purgatory code shipped by kexec-tools attempts to look above the top of its stack to find a return address for a kjump, even in a non-kjump kexec. After the commit in Fixes: the word above the stack might not be there, leading to a fault (which is at least now caught by my exception-handling code in kexec). That commit fixed things for the actual kjump path, but no longer "gratuitously" pushes the unused return address to the stack in the non-kjump path. Put that *back* in the non-kjump path, to prevent purgatory from crashing when trying to access it. Fixes: 2cacf7f23a02 ("x86/kexec: Fix stack and handling of re-entry point for ::preserve_context") Reported-by: Rohan Kakulawaram Signed-off-by: David Woodhouse Signed-off-by: Borislav Petkov (AMD) Acked-by: Dave Hansen Tested-by: Rohan Kakulawaram Cc: Link: https://patch.msgid.link/32d627134143ffd957891cb697138e839c623211.camel@infradead.org --- arch/x86/kernel/relocate_kernel_64.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4ffba68dc57b..eaeb77464c06 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -136,6 +136,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) * %r13 original CR4 when relocate_kernel() was invoked */ + /* + * Set return address to 0 if not preserving context. The purgatory + * shipped in kexec-tools will unconditionally look for the return + * address on the stack and set a kexec_jump_back_entry= command + * line option if it's non-zero. There's no other way that it can + * tell a preserve-context (kjump) kexec from a normal one. + */ + pushq $0 /* store the start address on the stack */ pushq %rdx -- cgit v1.2.3 From 411c1cf430392c905e39f12bc305dd994da0b426 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 8 May 2026 15:20:23 +0100 Subject: arm64/entry: Fix arm64-specific rseq brokenness Mathias Stearn reports that since v6.19, there are two big issues affecting rseq: (1) On arm64 specifically, rseq critical sections aren't aborted when they should be. (2) The 'cpu_id_start' field is no longer written by the kernel in all cases it used to be, including some cases where TCMalloc depends on the kernel clobbering the field. This patch fixes issue #1. This patch DOES NOT fix issue #2, which will need to be addressed by other patches. The arm64-specific brokenness is a result of commits: 2fc0e4b4126c ("rseq: Record interrupt from user space") 39a167560a61 ("rseq: Optimize event setting") The first commit failed to add a call to rseq_note_user_irq_entry() on arm64. Thus arm64 never sets rseq_event::user_irq to record that it may be necessary to abort an active rseq critical section upon return to userspace. On its own, this commit had no functional impact as the value of rseq_event::user_irq was not consumed. The second commit relied upon rseq_event::user_irq to determine whether or not to bother to perform rseq work when returning to userspace. As rseq_event::user_irq wasn't set on arm64, this work would be skipped, and consequently an active rseq critical section would not be aborted. Fix this by giving arm64 syscall-specific entry/exit paths, and performing the relevant logic in syscall and non-syscall paths, including calling rseq_note_user_irq_entry() for non-syscall entry. Currently arm64 cannot use syscall_enter_from_user_mode(), syscall_exit_to_user_mode(), and irqentry_exit_to_user_mode(), due to ordering constraints with exception masking, and risk of ABI breakage for syscall tracing/audit/etc. For the moment the entry/exit logic is left as arm64-specific, directly using enter_from_user_mode() and exit_to_user_mode(), but mirroring the generic code. I intend to follow up with refactoring/cleanup, as we did for kernel mode entry paths in commit: 041aa7a85390 ("entry: Split preemption from irqentry_exit_to_kernel_mode()") ... which will allow arm64 to use the GENERIC_IRQ_ENTRY functions directly. Fixes: 39a167560a61 ("rseq: Optimize event setting") Reported-by: Mathias Stearn Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Catalin Marinas Link: https://lore.kernel.org/regressions/CAHnCjA25b+nO2n5CeifknSKHssJpPrjnf+dtr7UgzRw4Zgu=oA@mail.gmail.com/ Link: https://patch.msgid.link/20260508142023.3268622-1-mark.rutland@arm.com --- arch/arm64/kernel/entry-common.c | 31 ++++++++++++++++++++++++------- include/linux/irq-entry-common.h | 8 -------- include/linux/rseq_entry.h | 19 ------------------- 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index cb54335465f6..c7a23f7c2212 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -62,6 +62,13 @@ static void noinstr arm64_exit_to_kernel_mode(struct pt_regs *regs, irqentry_exit_to_kernel_mode_after_preempt(regs, state); } +static __always_inline void arm64_syscall_enter_from_user_mode(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + mte_disable_tco_entry(current); + sme_enter_from_user_mode(); +} + /* * Handle IRQ/context state management when entering from user mode. * Before this function is called it is not safe to call regular kernel code, @@ -70,20 +77,30 @@ static void noinstr arm64_exit_to_kernel_mode(struct pt_regs *regs, static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs) { enter_from_user_mode(regs); + rseq_note_user_irq_entry(); mte_disable_tco_entry(current); sme_enter_from_user_mode(); } +static __always_inline void arm64_syscall_exit_to_user_mode(struct pt_regs *regs) +{ + local_irq_disable(); + syscall_exit_to_user_mode_prepare(regs); + local_daif_mask(); + sme_exit_to_user_mode(); + mte_check_tfsr_exit(); + exit_to_user_mode(); +} + /* * Handle IRQ/context state management when exiting to user mode. * After this function returns it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. */ - static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs) { local_irq_disable(); - exit_to_user_mode_prepare_legacy(regs); + irqentry_exit_to_user_mode_prepare(regs); local_daif_mask(); sme_exit_to_user_mode(); mte_check_tfsr_exit(); @@ -92,7 +109,7 @@ static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs) asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs) { - arm64_exit_to_user_mode(regs); + arm64_syscall_exit_to_user_mode(regs); } /* @@ -716,12 +733,12 @@ static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr) static void noinstr el0_svc(struct pt_regs *regs) { - arm64_enter_from_user_mode(regs); + arm64_syscall_enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); fpsimd_syscall_enter(); local_daif_restore(DAIF_PROCCTX); do_el0_svc(regs); - arm64_exit_to_user_mode(regs); + arm64_syscall_exit_to_user_mode(regs); fpsimd_syscall_exit(); } @@ -868,11 +885,11 @@ static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) static void noinstr el0_svc_compat(struct pt_regs *regs) { - arm64_enter_from_user_mode(regs); + arm64_syscall_enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); do_el0_svc_compat(regs); - arm64_exit_to_user_mode(regs); + arm64_syscall_exit_to_user_mode(regs); } static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr) diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h index 167fba7dbf04..1fabf0f5ea8e 100644 --- a/include/linux/irq-entry-common.h +++ b/include/linux/irq-entry-common.h @@ -218,14 +218,6 @@ static __always_inline void __exit_to_user_mode_validate(void) lockdep_sys_exit(); } -/* Temporary workaround to keep ARM64 alive */ -static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs) -{ - __exit_to_user_mode_prepare(regs, EXIT_TO_USER_MODE_WORK); - rseq_exit_to_user_mode_legacy(); - __exit_to_user_mode_validate(); -} - /** * syscall_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required * @regs: Pointer to pt_regs on entry stack diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index 2d0295df5107..63bc72086e75 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -749,24 +749,6 @@ static __always_inline void rseq_irqentry_exit_to_user_mode(void) ev->events = 0; } -/* Required to keep ARM64 working */ -static __always_inline void rseq_exit_to_user_mode_legacy(void) -{ - struct rseq_event *ev = ¤t->rseq.event; - - rseq_stat_inc(rseq_stats.exit); - - if (static_branch_unlikely(&rseq_debug_enabled)) - WARN_ON_ONCE(ev->sched_switch); - - /* - * Ensure that event (especially user_irq) is cleared when the - * interrupt did not result in a schedule and therefore the - * rseq processing did not clear it. - */ - ev->events = 0; -} - void __rseq_debug_syscall_return(struct pt_regs *regs); static __always_inline void rseq_debug_syscall_return(struct pt_regs *regs) @@ -782,7 +764,6 @@ static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned } static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } -static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } static inline bool rseq_grant_slice_extension(unsigned long ti_work, unsigned long mask) { return false; } #endif /* !CONFIG_RSEQ */ -- cgit v1.2.3 From ab28a0673daabe7f0fcbd7a5e36334f2f003f02f Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 8 May 2026 19:50:45 +0800 Subject: sched_ext: Use IRQ_WORK_INIT_HARD() to initialize sch->disable_irq_work For built with PREEMPT_RT kernels, the scx_disable_irq_workfn() is called from per-cpu irq_work kthreads context, this means that when call the scx_dump_state() in the scx_disable_irq_workfn() to output current->comm/pid, it always output current irq_work kthread's comm/pid. this commit therefore use the IRQ_WORK_INIT_HARD() to initialize sch->disable_irq_work to make scx_disable_irq_workfn() is called from hardirq context. Fixes: f4a6c506d118 ("sched_ext: Always bounce scx_disable() through irq_work") Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f4e2db8e56be..df305712a2d4 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6589,7 +6589,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, sch->slice_dfl = SCX_SLICE_DFL; atomic_set(&sch->exit_kind, SCX_EXIT_NONE); - init_irq_work(&sch->disable_irq_work, scx_disable_irq_workfn); + sch->disable_irq_work = IRQ_WORK_INIT_HARD(scx_disable_irq_workfn); kthread_init_work(&sch->disable_work, scx_disable_workfn); timer_setup(&sch->bypass_lb_timer, scx_bypass_lb_timerfn, 0); -- cgit v1.2.3 From b2ed01e7ad3de80333e9b962a44024b094bc0b2b Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 28 Apr 2026 11:44:42 +0200 Subject: drm/ttm: Fix ttm_bo_swapout() infinite LRU walk on swapout failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ttm_tt_swapout() fails, the current code calls ttm_resource_add_bulk_move() followed by ttm_resource_move_to_lru_tail() to restore the resource's bulk_move membership. However, ttm_resource_move_to_lru_tail() places the resource at the tail of the LRU list which, relative to the walk cursor's hitch node (placed immediately after the resource when it was yielded), puts the resource *in front of the* the hitch. The next list_for_each_entry_continue() from the hitch finds the same resource again, causing an infinite loop. Fix by deferring del_bulk_move to the success path only. On the success path, TTM_TT_FLAG_SWAPPED has just been set by ttm_tt_swapout() but the resource is still tracked in the bulk_move range, so ttm_resource_del_bulk_move()'s !ttm_resource_unevictable() guard would incorrectly skip the removal. Introduce ttm_resource_del_bulk_move_unevictable() which bypasses that guard. Reported-by: Jatin Kataria Fixes: fc5d96670eb2 ("drm/ttm: Move swapped objects off the manager's LRU list") Cc: Christian König Cc: Matthew Brost Cc: Cc: # v6.13+ Assisted-by: GitHub_Copilot:claude-sonnet-4.6 Signed-off-by: Thomas Hellström Reviewed-by: Christian König Tested-by: Boqun Feng Link: https://patch.msgid.link/20260428094442.16985-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo.c | 16 ++++++---------- drivers/gpu/drm/ttm/ttm_resource.c | 13 +++++++++++++ include/drm/ttm/ttm_resource.h | 2 ++ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d85f0a37ac35..293401705542 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1177,17 +1177,13 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) bdev->funcs->swap_notify(bo); if (ttm_tt_is_populated(tt)) { - spin_lock(&bdev->lru_lock); - ttm_resource_del_bulk_move(bo->resource, bo); - spin_unlock(&bdev->lru_lock); - ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags); - - spin_lock(&bdev->lru_lock); - if (ret) - ttm_resource_add_bulk_move(bo->resource, bo); - ttm_resource_move_to_lru_tail(bo->resource); - spin_unlock(&bdev->lru_lock); + if (!ret) { + spin_lock(&bdev->lru_lock); + ttm_resource_del_bulk_move_unevictable(bo->resource, bo); + ttm_resource_move_to_lru_tail(bo->resource); + spin_unlock(&bdev->lru_lock); + } } out: diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 9f36631d48b6..0e5f1582f13d 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -292,6 +292,19 @@ void ttm_resource_del_bulk_move(struct ttm_resource *res, ttm_lru_bulk_move_del(bo->bulk_move, res); } +/* + * Remove a resource from its bulk_move, bypassing the unevictable check. + * Use only when the resource is known to still be tracked in the range despite + * the BO having just become unevictable; asserts that this is the case. + */ +void ttm_resource_del_bulk_move_unevictable(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + WARN_ON_ONCE(!ttm_resource_unevictable(res, bo)); + if (bo->bulk_move) + ttm_lru_bulk_move_del(bo->bulk_move, res); +} + /* Move a resource to the LRU or bulk tail */ void ttm_resource_move_to_lru_tail(struct ttm_resource *res) { diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 33e80f30b8b8..a5d386583fb6 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -448,6 +448,8 @@ void ttm_resource_add_bulk_move(struct ttm_resource *res, struct ttm_buffer_object *bo); void ttm_resource_del_bulk_move(struct ttm_resource *res, struct ttm_buffer_object *bo); +void ttm_resource_del_bulk_move_unevictable(struct ttm_resource *res, + struct ttm_buffer_object *bo); void ttm_resource_move_to_lru_tail(struct ttm_resource *res); void ttm_resource_init(struct ttm_buffer_object *bo, -- cgit v1.2.3 From 481c2265286ef302327c93403a8cf7b3fe4506d0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 4 May 2026 21:04:48 +0000 Subject: bpf: tcp: Fix type confusion in bpf_tcp_sock(). bpf_tcp_sock() only checks if sk->sk_protocol is IPPROTO_TCP, but RAW socket can bypass it: socket(AF_INET, SOCK_RAW, IPPROTO_TCP) Calling bpf_setsockopt() in SOCKOPT prog triggers out-of-bounds access to another slab object. [0] Let's use sk_is_tcp(). [0]: BUG: KASAN: slab-out-of-bounds in sol_tcp_sockopt (net/core/filter.c:5519) Read of size 8 at addr ffff88801083d760 by task test_progs/1259 CPU: 1 UID: 0 PID: 1259 Comm: test_progs Tainted: G OE 7.0.0-11175-gb5c111f4967b #1 PREEMPT(full) Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:94 lib/dump_stack.c:120) print_report (mm/kasan/report.c:378 mm/kasan/report.c:482) kasan_report (mm/kasan/report.c:595) sol_tcp_sockopt (net/core/filter.c:5519) __bpf_getsockopt (net/core/filter.c:5633) bpf_sk_getsockopt (net/core/filter.c:5654) bpf_prog_629ba00a1601e9f2__setsockopt+0x86/0x22c __cgroup_bpf_run_filter_setsockopt (./include/linux/bpf.h:1402 ./include/linux/filter.h:722 ./include/linux/filter.h:729 kernel/bpf/cgroup.c:81 kernel/bpf/cgroup.c:2026) do_sock_setsockopt (net/socket.c:2363) __x64_sys_setsockopt (net/socket.c:2406) do_syscall_64 (arch/x86/entry/syscall_64.c:63) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) RIP: 0033:0x7f85f82fe7de Code: 55 48 63 c9 48 63 ff 45 89 c9 48 89 e5 48 83 ec 08 6a 2c e8 34 69 f7 ff c9 c3 66 90 f3 0f 1e fa 49 89 ca b8 36 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 0a c3 66 0f 1f 84 00 00 00 00 00 48 8b 15 e1 RSP: 002b:00007ffe59dcecd8 EFLAGS: 00000202 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f85f82fe7de RDX: 000000000000001c RSI: 0000000000000006 RDI: 000000000000000d RBP: 00007ffe59dcef20 R08: 000000000000003c R09: 0000000000000000 R10: 00007ffe59dcef00 R11: 0000000000000202 R12: 00007ffe59dcf268 R13: 0000000000000003 R14: 00007f85f9da5000 R15: 000055b2f3201400 The buggy address belongs to the object at ffff88801083d280 which belongs to the cache RAW of size 1792 The buggy address is located 1248 bytes inside of allocated 1792-byte region [ffff88801083d280, ffff88801083d980) Fixes: 655a51e536c0 ("bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock") Reported-by: Damiano Melotti Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20260504210610.180150-2-kuniyu@google.com --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index bc96c18df4e0..cd88633f8dc1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7475,7 +7475,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; -- cgit v1.2.3 From a7488f089bdfa87c4fef1744d4dca9f4f8b46f8b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 7 May 2026 04:04:46 -0700 Subject: workqueue: Release PENDING in __queue_work() drain/destroy reject path The caller of __queue_work() owns WORK_STRUCT_PENDING, won via test_and_set_bit() in queue_work_on()/__queue_delayed_work(). The state machine documented above __queue_work() requires that owner to either hand the token to a pwq (insert_work() -> set_work_pwq()), hand it to a timer, or release it via set_work_pool_and_clear_pending(). try_to_grab_pending() relies on this: when it observes "PENDING && off-queue" it busy-loops, trusting the current owner to make progress. The (__WQ_DESTROYING | __WQ_DRAINING) early-return path violates that contract. It WARN_ONCE()s and bare-returns, leaving work->data with PENDING set, WORK_STRUCT_PWQ clear, and work->entry empty. The path is reachable without explicit API abuse: queue_delayed_work() arms a timer with PENDING set; if drain_workqueue() runs while the timer is still pending, delayed_work_timer_fn() -> __queue_work() in softirq context hits the WARN, current is not a wq worker so is_chained_work() is false, and the work is silently dropped with PENDING leaked. Mirror what clear_pending_if_disabled() already does on its analogous reject path: unpack the off-queue data and call set_work_pool_and_clear_pending() to release the token before returning. I was able to reproduce this by queueing several slow works on a max_active=1 wq, arm a delayed_work whose timer fires while drain_workqueue() is blocked, then call cancel_delayed_work_sync(). Without this patch the cancel livelocks at 100% CPU; with it the cancel returns immediately. Signed-off-by: Breno Leitao Signed-off-by: Tejun Heo --- kernel/workqueue.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3d2e3b2ec528..c0e58f877e08 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2296,6 +2296,18 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n", work->func, wq->name))) { + struct work_offq_data offqd; + + /* + * State on entry: PENDING is set, work is off-queue (no + * insert_work() has run). + * + * Returning without clearing PENDING would leave the work + * in a weird state (PENDING=1, PWQ=0, entry empty) + */ + work_offqd_unpack(&offqd, *work_data_bits(work)); + set_work_pool_and_clear_pending(work, offqd.pool_id, + work_offqd_pack_flags(&offqd)); return; } rcu_read_lock(); -- cgit v1.2.3 From 0143033dc22cdff912cfc13419f5db92fea3b4cb Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 8 May 2026 09:22:03 -0700 Subject: workqueue: Fix wq->cpu_pwq leak in alloc_and_link_pwqs() WQ_UNBOUND path For WQ_UNBOUND workqueues, alloc_and_link_pwqs() allocates wq->cpu_pwq via alloc_percpu() and then calls apply_workqueue_attrs_locked(). On failure it returns the error directly, bypassing the enomem: label which holds the only free_percpu(wq->cpu_pwq) in this function. The caller's error path kfree()s wq without touching wq->cpu_pwq, leaking one percpu pointer table (nr_cpu_ids * sizeof(void *) bytes) per failed call. If kmemleak is enabled, we can see: unreferenced object (percpu) 0xc0fffa5b121048 (size 8): comm "insmod", pid 776, jiffies 4294682844 backtrace (crc 0): pcpu_alloc_noprof+0x665/0xac0 __alloc_workqueue+0x33f/0xa20 alloc_workqueue_noprof+0x60/0x100 Route the error through the existing enomem: cleanup and any error before this one. Cc: stable@kernel.org Fixes: 636b927eba5b ("workqueue: Make unbound workqueues to use per-cpu pool_workqueues") Signed-off-by: Breno Leitao Signed-off-by: Tejun Heo --- kernel/workqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c0e58f877e08..33b721a9af02 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5654,7 +5654,9 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) ret = apply_workqueue_attrs_locked(wq, unbound_std_wq_attrs[highpri]); } - return ret; + if (ret) + goto enomem; + return 0; enomem: if (wq->cpu_pwq) { -- cgit v1.2.3 From d73549b8bb7fa6147666c579d66f72bf076f719f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 4 May 2026 21:04:49 +0000 Subject: selftest: bpf: Add test for bpf_tcp_sock() and RAW socket. Let's extend sockopt_sk.c to cover bpf_tcp_sock() for the wrong socket type. Before: # ./test_progs -t sockopt_sk [ 151.948613] ================================================================== [ 151.951376] BUG: KASAN: slab-out-of-bounds in sol_tcp_sockopt+0xc7/0x8e0 [ 151.954159] Read of size 8 at addr ffff88801083d760 by task test_progs/1259 ... run_test:FAIL:getsetsockopt unexpected error: -1 (errno 0) #427 sockopt_sk:FAIL After: #427 sockopt_sk:OK While at it, missing free() is fixed up. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260504210610.180150-3-kuniyu@google.com --- tools/testing/selftests/bpf/prog_tests/sockopt_sk.c | 17 ++++++++++++++++- tools/testing/selftests/bpf/progs/sockopt_sk.c | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 53637431ec5d..3a41c517b918 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -190,7 +190,7 @@ static int getsetsockopt(void) fd = socket(AF_NETLINK, SOCK_RAW, 0); if (fd < 0) { log_err("Failed to create AF_NETLINK socket"); - return -1; + goto err; } buf.u32 = 1; @@ -211,6 +211,21 @@ static int getsetsockopt(void) } ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); + /* Trick bpf_tcp_sock() with IPPROTO_TCP */ + close(fd); + fd = socket(AF_INET, SOCK_RAW, IPPROTO_TCP); + if (!ASSERT_OK_FD(fd, "socket")) + goto err; + + /* The BPF prog intercepts this before the kernel sees it, any + * optlen works. Go with 4 bytes for simplicity. + */ + buf.u32 = 1; + optlen = sizeof(buf.u32); + err = setsockopt(fd, SOL_TCP, TCP_SAVED_SYN, &buf, optlen); + if (!ASSERT_ERR(err, "setsockopt(TCP_SAVED_SYN)")) + goto err; + free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index cb990a7d3d45..5e0b27e7855c 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -149,6 +149,20 @@ int _setsockopt(struct bpf_sockopt *ctx) if (sk && sk->family == AF_NETLINK) goto out; + if (sk && sk->family == AF_INET && sk->type == SOCK_RAW) { + struct bpf_tcp_sock *tp = bpf_tcp_sock(sk); + + if (tp) { + char saved_syn[60]; + + bpf_getsockopt(sk, SOL_TCP, TCP_SAVED_SYN, + &saved_syn, sizeof(saved_syn)); + goto consumed; + } + + goto out; + } + /* Make sure bpf_get_netns_cookie is callable. */ if (bpf_get_netns_cookie(NULL) == 0) @@ -224,6 +238,8 @@ int _setsockopt(struct bpf_sockopt *ctx) return 0; /* couldn't get sk storage */ storage->val = optval[0]; + +consumed: ctx->optlen = -1; /* BPF has consumed this option, don't call kernel * setsockopt handler. */ -- cgit v1.2.3 From 7995b216a731db657f356f6ae37a42f445b9a0ec Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 4 May 2026 21:04:50 +0000 Subject: mptcp: bpf: Fix type confusion in bpf_mptcp_sock_from_subflow() bpf_mptcp_sock_from_subflow() only checks if sk->sk_protocol is IPPROTO_TCP, but RAW socket can bypass it: socket(AF_INET, SOCK_RAW, IPPROTO_TCP) In this case, it would NOT be valid to call sk_is_mptcp() which will assume sk is a pointer to a struct tcp_sock, and wrongly checks for: tcp_sk(sk)->is_mptcp. Fixes: 3bc253c2e652 ("bpf: Add bpf_skc_to_mptcp_sock_proto") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260504210610.180150-4-kuniyu@google.com --- net/mptcp/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 8a16672b94e2..4cc16cbeb328 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -14,7 +14,7 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk) && sk_is_mptcp(sk)) return mptcp_sk(mptcp_subflow_ctx(sk)->conn); return NULL; -- cgit v1.2.3 From decb84b8383ab7acff94db208ef7ed19f9c55e1f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 4 May 2026 21:04:51 +0000 Subject: bpf: tcp: Fix type confusion in bpf_skc_to_tcp_sock(). bpf_skc_to_tcp_sock() only checks if sk->sk_protocol is IPPROTO_TCP, but RAW socket can bypass it: socket(AF_INET, SOCK_RAW, IPPROTO_TCP) Let's use sk_is_tcp(). Fixes: 478cfbdf5f13 ("bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260504210610.180150-5-kuniyu@google.com --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index cd88633f8dc1..7d945dc2cb92 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11963,7 +11963,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk_is_tcp(sk)) return (unsigned long)sk; return (unsigned long)NULL; -- cgit v1.2.3 From 843064b0a77eed3d6d63ffc53aeaa359672b4e12 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 4 May 2026 21:04:52 +0000 Subject: bpf: tcp: Fix type confusion in bpf_skc_to_tcp6_sock(). bpf_skc_to_tcp6_sock() only checks if sk->sk_protocol is IPPROTO_TCP and sk->sk_family is AF_INET6, but RAW socket can bypass it: socket(AF_INET6, SOCK_RAW, IPPROTO_TCP) Let's check sk->sk_type too. Fixes: af7ec1383361 ("bpf: Add bpf_skc_to_tcp6_sock() helper") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260504210610.180150-6-kuniyu@google.com --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 7d945dc2cb92..684922efd481 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11947,7 +11947,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) */ BTF_TYPE_EMIT(struct tcp6_sock); if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_family == AF_INET6) + sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6) return (unsigned long)sk; return (unsigned long)NULL; -- cgit v1.2.3 From 1c2958e4ab1ed4594db16425dbcab33c56ea8330 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 4 May 2026 21:04:53 +0000 Subject: bpf: tcp: Fix type confusion in sol_tcp_sockopt(). sol_tcp_sockopt() only checks if sk->sk_protocol is IPPROTO_TCP, but RAW socket can bypass it: socket(AF_INET, SOCK_RAW, IPPROTO_TCP) Let's use sk_is_tcp(). Note that initially sol_tcp_sockopt() checked sk->sk_prot->setsockopt. Fixes: 2ab42c7b871f ("bpf: Check the protocol of a sock to agree the calls to bpf_setsockopt().") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20260504210610.180150-7-kuniyu@google.com --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 684922efd481..ef0877eefaa7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5481,7 +5481,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) { - if (sk->sk_protocol != IPPROTO_TCP) + if (!sk_is_tcp(sk)) return -EINVAL; switch (optname) { -- cgit v1.2.3 From db5dadb562cabb6da49959b473ed0d9645b6f2da Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 4 May 2026 18:01:37 -0500 Subject: Revert "ACPI: CPPC: Adjust debug messages in amd_set_max_freq_ratio() to warn" Some older systems don't support CPPC in the firmware and this just makes noise for them when booting. Drop back to debug. This reverts commit 21fb59ab4b9767085f4fe1edbdbe3177fbb9ec97. Fixes: 21fb59ab4b976 ("ACPI: CPPC: Adjust debug messages in amd_set_max_freq_ratio() to warn") Suggested-by: Kim Phillips Signed-off-by: Mario Limonciello Tested-by: Kim Phillips Cc: All applicable Link: https://patch.msgid.link/20260504230141.484743-2-mario.limonciello@amd.com Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/cppc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index d7c8ef1e354d..be4c5e9e5ff6 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -88,19 +88,19 @@ static void amd_set_max_freq_ratio(void) rc = cppc_get_perf_caps(0, &perf_caps); if (rc) { - pr_warn("Could not retrieve perf counters (%d)\n", rc); + pr_debug("Could not retrieve perf counters (%d)\n", rc); return; } rc = amd_get_boost_ratio_numerator(0, &numerator); if (rc) { - pr_warn("Could not retrieve highest performance (%d)\n", rc); + pr_debug("Could not retrieve highest performance (%d)\n", rc); return; } nominal_perf = perf_caps.nominal_perf; if (!nominal_perf) { - pr_warn("Could not retrieve nominal performance\n"); + pr_debug("Could not retrieve nominal performance\n"); return; } -- cgit v1.2.3 From 18fc650ccd7fe3376eca89203668cfb8268f60df Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 26 Apr 2026 01:26:43 +0000 Subject: bpf: Free reuseport cBPF prog after RCU grace period. Eulgyu Kim reported the splat below with a repro. [0] The repro sets up a UDP reuseport group with a cBPF prog and replaces it with a new one while another thread is sending a UDP packet to the group. The reuseport prog is freed by sk_reuseport_prog_free(). bpf_prog_put() is called for "e"BPF prog to destruct through multiple stages while cBPF prog is freed immediately by bpf_release_orig_filter() and bpf_prog_free(). If a reuseport prog is detached from the setsockopt() path (reuseport_attach_prog() or reuseport_detach_prog()), sk_reuseport_prog_free() is called without waiting for RCU readers to complete, resulting in various bugs. Let's defer freeing the reuseport cBPF prog after one RCU grace period. Note "e"BPF prog is safe as is unless the fast path starts to touch fields destroyed in bpf_prog_put_deferred() and __bpf_prog_put_noref(). [0]: BUG: KASAN: vmalloc-out-of-bounds in reuseport_select_sock+0xedc/0x1220 net/core/sock_reuseport.c:596 Read of size 4 at addr ffffc9000051e004 by task slowme/10208 CPU: 6 UID: 1000 PID: 10208 Comm: slowme Not tainted 7.0.0-geb7ac95ff75e #32 PREEMPT(full) Hardware name: QEMU Ubuntu 24.04 PC v2 (i440FX + PIIX, arch_caps fix, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x240 mm/kasan/report.c:482 kasan_report+0x118/0x150 mm/kasan/report.c:595 reuseport_select_sock+0xedc/0x1220 net/core/sock_reuseport.c:596 udp4_lib_lookup2+0x3bc/0x950 net/ipv4/udp.c:495 __udp4_lib_lookup+0x768/0xe20 net/ipv4/udp.c:723 __udp4_lib_lookup_skb+0x297/0x390 net/ipv4/udp.c:752 __udp4_lib_rcv+0x1312/0x2620 net/ipv4/udp.c:2752 ip_protocol_deliver_rcu+0x282/0x440 net/ipv4/ip_input.c:207 ip_local_deliver_finish+0x3bb/0x6f0 net/ipv4/ip_input.c:241 NF_HOOK+0x30c/0x3a0 include/linux/netfilter.h:318 NF_HOOK+0x30c/0x3a0 include/linux/netfilter.h:318 __netif_receive_skb_one_core net/core/dev.c:6181 [inline] __netif_receive_skb net/core/dev.c:6294 [inline] process_backlog+0xaa4/0x1960 net/core/dev.c:6645 __napi_poll+0xae/0x340 net/core/dev.c:7709 napi_poll net/core/dev.c:7772 [inline] net_rx_action+0x5d7/0xf50 net/core/dev.c:7929 handle_softirqs+0x22b/0x870 kernel/softirq.c:622 do_softirq+0x76/0xd0 kernel/softirq.c:523 __local_bh_enable_ip+0xf8/0x130 kernel/softirq.c:450 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:924 [inline] __dev_queue_xmit+0x1dd7/0x3710 net/core/dev.c:4890 neigh_output include/net/neighbour.h:556 [inline] ip_finish_output2+0xca9/0x1070 net/ipv4/ip_output.c:237 NF_HOOK_COND include/linux/netfilter.h:307 [inline] ip_output+0x29f/0x450 net/ipv4/ip_output.c:438 ip_send_skb+0x45/0xc0 net/ipv4/ip_output.c:1508 udp_send_skb+0xb04/0x1510 net/ipv4/udp.c:1195 udp_sendmsg+0x1a71/0x2350 net/ipv4/udp.c:1485 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg net/socket.c:742 [inline] __sys_sendto+0x554/0x680 net/socket.c:2206 __do_sys_sendto net/socket.c:2213 [inline] __se_sys_sendto net/socket.c:2209 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2209 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x160/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x415a2d Code: b3 66 2e 0f 1f 84 00 00 00 00 00 66 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6bc31e41e8 EFLAGS: 00000212 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f6bc31e4cdc RCX: 0000000000415a2d RDX: 0000000000000001 RSI: 00007f6bc31e421f RDI: 0000000000000003 RBP: 00007f6bc31e4240 R08: 00007f6bc31e4220 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000212 R12: 00007f6bc31e46c0 R13: ffffffffffffffb8 R14: 0000000000000000 R15: 00007ffc9b0d70b0 Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") Reported-by: Eulgyu Kim Reported-by: Taeyang Lee <0wn@theori.io> Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20260426012647.3233119-1-kuniyu@google.com --- net/core/filter.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index ef0877eefaa7..70eef14edb99 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1654,15 +1654,24 @@ err_prog_put: return err; } +static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu) +{ + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + struct bpf_prog *prog = aux->prog; + + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + void sk_reuseport_prog_free(struct bpf_prog *prog) { if (!prog) return; - if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) - bpf_prog_put(prog); + if (bpf_prog_was_classic(prog)) + call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu); else - bpf_prog_destroy(prog); + bpf_prog_put(prog); } static inline int __bpf_try_make_writable(struct sk_buff *skb, -- cgit v1.2.3 From 909f7bf9b080c10df3c3b38533906dbf09ff1d8b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 15 Apr 2026 17:56:06 +0200 Subject: PCI: Update saved_config_space upon resource assignment Bernd reports passthrough failure of a Digital Devices Cine S2 V6 DVB adapter plugged into an ASRock X570S PG Riptide board with BIOS version P5.41 (09/07/2023): ddbridge 0000:05:00.0: detected Digital Devices Cine S2 V6 DVB adapter ddbridge 0000:05:00.0: cannot read registers ddbridge 0000:05:00.0: fail BIOS assigns an incorrect BAR to the DVB adapter which doesn't fit into the upstream bridge window. The kernel corrects the BAR assignment: pci 0000:07:00.0: BAR 0 [mem 0xfffffffffc500000-0xfffffffffc50ffff 64bit]: can't claim; no compatible bridge window pci 0000:07:00.0: BAR 0 [mem 0xfc500000-0xfc50ffff 64bit]: assigned Correction of the BAR assignment happens in an x86-specific fs_initcall, pcibios_assign_resources(), after device enumeration in a subsys_initcall. This order was introduced at the behest of Linus in 2004: https://git.kernel.org/tglx/history/c/a06a30144bbc No other architecture performs such a late BAR correction. Bernd bisected the issue to commit a2f1e22390ac ("PCI/ERR: Ensure error recoverability at all times"), but it only occurs in the absence of commit 4d4c10f763d7 ("PCI: Explicitly put devices into D0 when initializing"). This combination exists in stable kernel v6.12.70, but not in mainline, hence Bernd cannot reproduce the issue with mainline. Since a2f1e22390ac, config space is saved on enumeration, prior to BAR correction. Upon passthrough, the corrected BAR is overwritten with the incorrect saved value by: vfio_pci_core_register_device() vfio_pci_set_power_state() pci_restore_state() But only if the device's current_state is PCI_UNKNOWN, as it was prior to commit 4d4c10f763d7. Since the commit, it is PCI_D0, which changes the behavior of vfio_pci_set_power_state() to no longer restore the state without saving it first. Alexandre is reporting the same issue as Bernd, but in his case, mainline is affected as well. The difference is that on Alexandre's system, the host kernel binds a driver to the device which is unbound prior to passthrough, whereas on Bernd's system no driver gets bound by the host kernel. Unbinding sets current_state to PCI_UNKNOWN in pci_device_remove(), so when vfio-pci is subsequently bound to the device, pci_restore_state() is once again called without invoking pci_save_state() first. To robustly fix the issue, always update saved_config_space upon resource assignment. Reported-by: Bernd Schumacher Closes: https://lore.kernel.org/r/acfZrlP0Ua_5D3U4@eldamar.lan/ Reported-by: Alexandre N. Closes: https://lore.kernel.org/r/dd3c3358-de0f-4a56-9c81-04aceaab4058@mailo.com/ Fixes: a2f1e22390ac ("PCI/ERR: Ensure error recoverability at all times") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Bernd Schumacher Tested-by: Alexandre N. Cc: stable@vger.kernel.org # v6.12+ Link: https://patch.msgid.link/febc3f354e0c1f5a9f5b3ee9ffddaa44caccf651.1776268054.git.lukas@wunner.de --- drivers/pci/setup-res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index fbc05cda96ee..991d3ed543f5 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -102,6 +102,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) } pci_write_config_dword(dev, reg, new); + dev->saved_config_space[reg / 4] = new; pci_read_config_dword(dev, reg, &check); if ((new ^ check) & mask) { @@ -112,6 +113,7 @@ static void pci_std_update_resource(struct pci_dev *dev, int resno) if (res->flags & IORESOURCE_MEM_64) { new = region.start >> 16 >> 16; pci_write_config_dword(dev, reg + 4, new); + dev->saved_config_space[(reg + 4) / 4] = new; pci_read_config_dword(dev, reg + 4, &check); if (check != new) { pci_err(dev, "%s: error updating (high %#010x != %#010x)\n", -- cgit v1.2.3 From f45a49a2380a47332817b7248c61a0ebbc6f0d00 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Tue, 5 May 2026 23:43:27 +0000 Subject: PCI: Initialize temporary device in new_id_store() When setting new_id of a PCI device driver using sysfs a lockdep splat occurs. This is because new_id_store() builds a temporary pci_dev for pci_match_device(), which calls device_match_driver_override(). That depends on the driver_override.lock added by cb3d1049f4ea ("driver core: generalize driver_override in struct device"). The new driver_override.lock was not initialized in the temporary pci_dev, resulting in this lockdep splat. Initialize the temporary pci_dev to fix this. Repro: Build with CONFIG_LOCKDEP=y, boot with QEMU, and add a new ID: # echo "8086 10f5" > /sys/bus/pci/drivers/e1000e/new_id INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 2 UID: 0 PID: 177 Comm: liveupdate-iomm Not tainted 7.0.0+ #9 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x80 register_lock_class+0x77e/0x790 lock_acquire+0xbf/0x2e0 pci_match_device+0x24/0x180 new_id_store+0x189/0x1d0 kernfs_fop_write_iter+0x14f/0x210 vfs_write+0x263/0x5e0 ksys_write+0x79/0xf0 do_syscall_64+0x117/0xf80 Fixes: 10a4206a2401 ("PCI: use generic driver_override infrastructure") Fixes: 8895d3bcb8ba ("PCI: Fail new_id for vendor/device values already built into driver") Signed-off-by: Samiullah Khawaja [bhelgaas: add commit log details and repro, trim backtrace] Signed-off-by: Bjorn Helgaas Reviewed-by: Danilo Krummrich Link: https://patch.msgid.link/20260505234327.716630-1-skhawaja@google.com --- drivers/pci/pci-driver.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d10ece0889f0..e3f59001785a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -179,6 +179,11 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, return NULL; } +static void _pci_free_device(struct device *dev) +{ + kfree(to_pci_dev(dev)); +} + /** * new_id_store - sysfs frontend to pci_add_dynid() * @driver: target device driver @@ -214,11 +219,13 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf, pdev->subsystem_vendor = subvendor; pdev->subsystem_device = subdevice; pdev->class = class; + pdev->dev.release = _pci_free_device; + device_initialize(&pdev->dev); if (pci_match_device(pdrv, pdev)) retval = -EEXIST; - kfree(pdev); + put_device(&pdev->dev); if (retval) return retval; -- cgit v1.2.3 From bf5421b3d8d3d9216d045af89d4f7e2f75375554 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 28 Apr 2026 07:19:54 +0200 Subject: MAINTAINERS: Update Marek Vasut email for PCIe R-Car Use up to date address. No functional change. Signed-off-by: Marek Vasut Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260428052030.51101-1-marek.vasut+renesas@mailbox.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..db06875cfb46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20433,7 +20433,7 @@ F: drivers/pci/controller/plda/pcie-plda-host.c F: drivers/pci/controller/plda/pcie-plda.h PCI DRIVER FOR RENESAS R-CAR -M: Marek Vasut +M: Marek Vasut M: Yoshihiro Shimoda L: linux-pci@vger.kernel.org L: linux-renesas-soc@vger.kernel.org -- cgit v1.2.3 From 78e115d806b0eea83a0e4df182a689823aa80511 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 8 May 2026 10:30:06 +0800 Subject: MAINTAINERS: Update Hans Zhang email for PCIe CIX Sky1 Update my email address as my work email account is no longer in use. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260508023006.1787674-1-18255117159@163.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index db06875cfb46..b10ee9a3b5f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20321,7 +20321,7 @@ F: Documentation/devicetree/bindings/pci/cdns,* F: drivers/pci/controller/cadence/*cadence* PCI DRIVER FOR CIX Sky1 -M: Hans Zhang +M: Hans Zhang <18255117159@163.com> L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/cix,sky1-pcie-*.yaml -- cgit v1.2.3 From 9ef40a09c5de18f0d275a451f5c2a7fd4f07158b Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Fri, 8 May 2026 11:39:51 +0530 Subject: MAINTAINERS: Add Aksh Garg as PCIe CADENCE reviewer I wish to contribute to the review process for Cadence PCIe IP drivers, hence add myself as a reviewer. Signed-off-by: Aksh Garg Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260508060951.840233-1-a-garg7@ti.com --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b10ee9a3b5f5..0cafa37693a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20315,10 +20315,11 @@ F: Documentation/devicetree/bindings/pci/marvell,armada8k-pcie.yaml F: drivers/pci/controller/dwc/pcie-armada8k.c PCI DRIVER FOR CADENCE PCIE IP +R: Aksh Garg L: linux-pci@vger.kernel.org S: Orphan F: Documentation/devicetree/bindings/pci/cdns,* -F: drivers/pci/controller/cadence/*cadence* +F: drivers/pci/controller/cadence/ PCI DRIVER FOR CIX Sky1 M: Hans Zhang <18255117159@163.com> -- cgit v1.2.3 From 97c8a3c1f73d828de43a5a88e8a9a143efb2b661 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 May 2026 03:59:18 +0000 Subject: tcp: Fix potential UAF in reqsk_timer_handler(). When TCP socket migration fails at inet_ehash_insert() in reqsk_timer_handler(), we jump to the no_ownership: label and free the new reqsk immediately with __reqsk_free(). Thus, we must stop the new reqsk's timer before jumping to the label, but the timer might be missed since the cited commit, resulting in UAF. As we are in the original reqsk's timer context, we can safely call timer_delete_sync() for the new reqsk. Let's pass false to __inet_csk_reqsk_queue_drop() to stop the new reqsk's timer. Fixes: 83fccfc3940c ("inet: fix potential deadlock in reqsk_queue_unlink()") Reported-by: Damiano Melotti Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260506035954.1563147-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 928654c34156..971f9db2c586 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1108,7 +1108,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ - __inet_csk_reqsk_queue_drop(sk_listener, nreq, true); + __inet_csk_reqsk_queue_drop(sk_listener, nreq, false); goto no_ownership; } -- cgit v1.2.3 From 7eca3292cac7c26dad4c236f51ba225c39a0523f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 6 May 2026 03:59:19 +0000 Subject: tcp: Fix imbalanced icsk_accept_queue count. When TCP socket migration happens in reqsk_timer_handler(), @sk_listener will be updated with the new listener. When we call __inet_csk_reqsk_queue_drop(), the listener must be the one stored in req->rsk_listener. The cited commit accidentally replaced oreq->rsk_listener with sk_listener, leading to imbalanced icsk_accept_queue count. Let's pass the correct listener to __inet_csk_reqsk_queue_drop(). Fixes: e8c526f2bdf1 ("tcp/dccp: Don't use timer_pending() in reqsk_queue_unlink().") Reported-by: Damiano Melotti Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260506035954.1563147-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 971f9db2c586..dbcd37dfdc15 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1134,7 +1134,7 @@ no_ownership: } drop: - __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + __inet_csk_reqsk_queue_drop(oreq->rsk_listener, oreq, true); reqsk_put(oreq); } -- cgit v1.2.3 From e539acf9f9c2550452914fb85aeb8fda67dd762f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 27 Apr 2026 11:23:17 +0100 Subject: MAINTAINERS: Add self for the 3c509 network driver It appears there's a need for a maintainer for the 3Com EtherLink III family of Ethernet network adapters. There is documentation available and the driver is very mature so the task ought to be of little hassle, so I think I should be able to squeeze in any issues to be addressed. Signed-off-by: Maciej W. Rozycki Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/alpine.DEB.2.21.2604271056460.28583@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0650fa014f24..1a485549ee96 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -68,6 +68,12 @@ Maintainers List first. When adding to this list, please keep the entries in alphabetical order. +3C509 NETWORK DRIVER +M: "Maciej W. Rozycki" +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/3com/3c509.c + 3C59X NETWORK DRIVER M: Steffen Klassert L: netdev@vger.kernel.org -- cgit v1.2.3 From 7ce5556f255a680d80daa31b1cedecf7f89e2c22 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Wed, 6 May 2026 16:24:15 +0800 Subject: ipv6: flowlabel: take ip6_fl_lock across mem_check and fl_intern mem_check() in net/ipv6/ip6_flowlabel.c reads fl_size without holding ip6_fl_lock. fl_intern() takes the lock immediately afterwards. The two checks therefore race against concurrent fl_intern, ip6_fl_gc and ip6_fl_purge writers, which makes the mem_check budget check approximate. Move spin_lock_bh(&ip6_fl_lock) and the matching unlock from fl_intern() into its only caller ipv6_flowlabel_get(). The mem_check() call now runs under the same critical section as the fl_intern() insert, so the budget check is exact. With all writers and the read of fl_size under ip6_fl_lock, convert fl_size from atomic_t to plain int. The four sites that update or read fl_size are fl_intern (insert path), ip6_fl_gc (garbage collector, the !sched check and the per-entry decrement), ip6_fl_purge (per-netns purge), and mem_check (budget check), and all four now run under ip6_fl_lock. This is a prerequisite for adding a per-netns budget alongside fl_size. The follow-up patch adds netns_ipv6::flowlabel_count and folds it into mem_check(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260506082416.2259567-2-maoyixie.tju@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_flowlabel.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c92f98c6f6ec..a8974643195a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -40,7 +40,7 @@ #define FL_HASH_MASK 255 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) -static atomic_t fl_size = ATOMIC_INIT(0); +static int fl_size; static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(struct timer_list *unused); @@ -163,7 +163,7 @@ static void ip6_fl_gc(struct timer_list *unused) if (time_after_eq(now, ttd)) { *flp = fl->next; fl_free(fl); - atomic_dec(&fl_size); + fl_size--; continue; } if (!sched || time_before(ttd, sched)) @@ -172,7 +172,7 @@ static void ip6_fl_gc(struct timer_list *unused) flp = &fl->next; } } - if (!sched && atomic_read(&fl_size)) + if (!sched && fl_size) sched = now + FL_MAX_LINGER; if (sched) { mod_timer(&ip6_fl_gc_timer, sched); @@ -196,7 +196,7 @@ static void __net_exit ip6_fl_purge(struct net *net) atomic_read(&fl->users) == 0) { *flp = fl->next; fl_free(fl); - atomic_dec(&fl_size); + fl_size--; continue; } flp = &fl->next; @@ -210,10 +210,10 @@ static struct ip6_flowlabel *fl_intern(struct net *net, { struct ip6_flowlabel *lfl; + lockdep_assert_held(&ip6_fl_lock); + fl->label = label & IPV6_FLOWLABEL_MASK; - rcu_read_lock(); - spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; @@ -235,8 +235,6 @@ static struct ip6_flowlabel *fl_intern(struct net *net, lfl = __fl_lookup(net, fl->label); if (lfl) { atomic_inc(&lfl->users); - spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock(); return lfl; } } @@ -244,9 +242,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); - atomic_inc(&fl_size); - spin_unlock_bh(&ip6_fl_lock); - rcu_read_unlock(); + fl_size++; return NULL; } @@ -464,10 +460,14 @@ done: static int mem_check(struct sock *sk) { - int room = FL_MAX_SIZE - atomic_read(&fl_size); + int room; struct ipv6_fl_socklist *sfl; int count = 0; + lockdep_assert_held(&ip6_fl_lock); + + room = FL_MAX_SIZE - fl_size; + if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; @@ -692,11 +692,19 @@ release: if (!sfl1) goto done; + rcu_read_lock(); + spin_lock_bh(&ip6_fl_lock); err = mem_check(sk); + if (err == 0) + fl1 = fl_intern(net, fl, freq->flr_label); + else + fl1 = NULL; + spin_unlock_bh(&ip6_fl_lock); + rcu_read_unlock(); + if (err != 0) goto done; - fl1 = fl_intern(net, fl, freq->flr_label); if (fl1) goto recheck; -- cgit v1.2.3 From e68eadffb724b36ffd3d5619e0efcaf29ec2a175 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Wed, 6 May 2026 16:24:16 +0800 Subject: ipv6: flowlabel: enforce per-netns limit for unprivileged callers fl_size, fl_ht and ip6_fl_lock in net/ipv6/ip6_flowlabel.c are file scope and shared across netns. mem_check() reads fl_size to decide whether to deny non-CAP_NET_ADMIN callers. capable() runs against init_user_ns, so an unprivileged user in any non-init userns can push fl_size past FL_MAX_SIZE - FL_MAX_SIZE / 4 and starve every other unprivileged userns on the host. Add struct netns_ipv6::flowlabel_count, bumped and decremented next to fl_size in fl_intern, ip6_fl_gc and ip6_fl_purge. The new field fills the existing 4-byte hole after ipmr_seq, so struct netns_ipv6 stays the same size on 64-bit builds. Bump FL_MAX_SIZE from 4096 to 8192. It has been 4096 since the file was added. Machines and connection counts have grown. mem_check() folds an extra per-netns ceiling into the existing non-CAP_NET_ADMIN conditional. The ceiling is half of the total budget that unprivileged callers have ever been able to use, i.e. (FL_MAX_SIZE - FL_MAX_SIZE / 4) / 2 = 3072 entries. With FL_MAX_SIZE doubled, this preserves the original per-user reach of 3K (what an unprivileged caller could already obtain before this change), while forcing an attacker to spread allocations across at least two netns to exhaust the global non-CAP_NET_ADMIN budget. CAP_NET_ADMIN against init_user_ns still bypasses both caps. The previous patch took ip6_fl_lock across mem_check and fl_intern, so the new flowlabel_count read in mem_check and the new flowlabel_count++ in fl_intern run under the same critical section. flowlabel_count is therefore plain int, like fl_size. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Willem de Bruijn Reviewed-by: Willem de Bruijn Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260506082416.2259567-3-maoyixie.tju@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv6.h | 1 + net/ipv6/ip6_flowlabel.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 499e4288170f..875916d60bfe 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -119,6 +119,7 @@ struct netns_ipv6 { struct fib_notifier_ops *notifier_ops; struct fib_notifier_ops *ip6mr_notifier_ops; atomic_t ipmr_seq; + int flowlabel_count; struct { struct hlist_head head; spinlock_t lock; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index a8974643195a..b1ccdf0dc646 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -36,7 +36,7 @@ /* FL hash table */ #define FL_MAX_PER_SOCK 32 -#define FL_MAX_SIZE 4096 +#define FL_MAX_SIZE 8192 #define FL_HASH_MASK 255 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) @@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused) ttd = fl->expires; if (time_after_eq(now, ttd)) { *flp = fl->next; - fl_free(fl); fl_size--; + fl->fl_net->ipv6.flowlabel_count--; + fl_free(fl); continue; } if (!sched || time_before(ttd, sched)) @@ -197,6 +198,7 @@ static void __net_exit ip6_fl_purge(struct net *net) *flp = fl->next; fl_free(fl); fl_size--; + net->ipv6.flowlabel_count--; continue; } flp = &fl->next; @@ -243,6 +245,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->next = fl_ht[FL_HASH(fl->label)]; rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); fl_size++; + net->ipv6.flowlabel_count++; return NULL; } @@ -460,6 +463,9 @@ done: static int mem_check(struct sock *sk) { + const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4); + const int unpriv_user_limit = unpriv_total_limit / 2; + struct net *net = sock_net(sk); int room; struct ipv6_fl_socklist *sfl; int count = 0; @@ -478,7 +484,9 @@ static int mem_check(struct sock *sk) if (room <= 0 || ((count >= FL_MAX_PER_SOCK || - (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && + (count > 0 && room < FL_MAX_SIZE / 2) || + room < FL_MAX_SIZE / 4 || + net->ipv6.flowlabel_count >= unpriv_user_limit) && !capable(CAP_NET_ADMIN))) return -ENOBUFS; -- cgit v1.2.3 From 58e2330bd45572a6e3d46ea94cf7a9641f43591a Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 6 May 2026 09:08:08 +0000 Subject: net: napi: Avoid gro timer misfiring at end of busypoll When in irq deferral mode (defer-hard-irqs > 0), a short enough gro-flush timeout can trigger before NAPI_STATE_SCHED is cleared if the last poll in busy_poll_stop() takes too long. This can have the effect of leaving the queue stuck with interrupts disabled and no timer armed which results in a tx timeout if there is no subsequent busypoll cycle. To prevent this, defer the gro-flush timer arm after the last poll. Fixes: 7fd3253a7de6 ("net: Introduce preferred busy-polling") Co-developed-by: Martin Karsten Signed-off-by: Martin Karsten Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Reviewed-by: Cosmin Ratiu Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260506090808.820559-2-dtatulea@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 8bfa8313ef62..0c6c270d9f7d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6862,9 +6862,9 @@ static void skb_defer_free_flush(void) #if defined(CONFIG_NET_RX_BUSY_POLL) -static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) +static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout) { - if (!skip_schedule) { + if (!timeout) { gro_normal_list(&napi->gro); __napi_schedule(napi); return; @@ -6874,6 +6874,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) gro_flush_normal(&napi->gro, HZ >= 1000); clear_bit(NAPI_STATE_SCHED, &napi->state); + hrtimer_start(&napi->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); } enum { @@ -6885,8 +6887,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; - bool skip_schedule = false; - unsigned long timeout; + unsigned long timeout = 0; int rc; /* Busy polling means there is a high chance device driver hard irq @@ -6906,10 +6907,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); - timeout = napi_get_gro_flush_timeout(napi); - if (napi->defer_hard_irqs_count && timeout) { - hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - skip_schedule = true; + if (napi->defer_hard_irqs_count) { + /* A short enough gro flush timeout and long enough + * poll can result in timer firing too early. + * Timer will be armed later if necessary. + */ + timeout = napi_get_gro_flush_timeout(napi); } } @@ -6924,7 +6927,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, trace_napi_poll(napi, rc, budget); netpoll_poll_unlock(have_poll_lock); if (rc == budget) - __busy_poll_stop(napi, skip_schedule); + __busy_poll_stop(napi, timeout); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } -- cgit v1.2.3 From 0a549298f452a83ae57e6582e6ca389357f9355d Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 7 May 2026 14:04:44 +0200 Subject: MAINTAINERS: change maintainers for macb Ethernet driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I would like to hand over the macb maintenance to Théo, as I'm unable to keep up with the recent flow of patches for this driver. After speaking with Claudiu, he indicated that he is in the same position as me. To help with this work, Conor has agreed to act as a reviewer. I was given responsibility for this driver years ago, and I'm glad to see it continue with talented developers. Signed-off-by: Nicolas Ferre Acked-by: Claudiu Beznea Acked-by: Conor Dooley Link: https://patch.msgid.link/20260507120444.9733-1-nicolas.ferre@microchip.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1a485549ee96..8a134cf6e9bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4187,8 +4187,8 @@ F: include/uapi/linux/sonet.h F: net/atm/ ATMEL MACB ETHERNET DRIVER -M: Nicolas Ferre -M: Claudiu Beznea +M: Théo Lebrun +R: Conor Dooley S: Maintained F: drivers/net/ethernet/cadence/ -- cgit v1.2.3 From 4908f1395fb1b832ceec11584af649874a2732ea Mon Sep 17 00:00:00 2001 From: Quan Sun <2022090917019@std.uestc.edu.cn> Date: Thu, 7 May 2026 21:17:38 +0800 Subject: net: ethtool: fix NULL pointer dereference in phy_reply_size In phy_prepare_data(), several strings such as 'name', 'drvname', 'upstream_sfp_name', and 'downstream_sfp_name' are allocated using kstrdup(). However, these allocations were not checked for failure. If kstrdup() fails for 'name', it returns NULL while the function continues. This leads to a kernel NULL pointer dereference and panic later in phy_reply_size() when it unconditionally calls strlen() on the NULL pointer. While other strings like 'upstream_sfp_name' might be checked before access in certain code paths, failing to handle these allocations consistently can lead to incomplete data reporting or hidden bugs. Fix this by adding proper NULL checks for all kstrdup() calls in phy_prepare_data() and implement a centralized error handling path using goto labels to ensure all previously allocated resources are freed on failure. Fixes: 9dd2ad5e92b9 ("net: ethtool: phy: Convert the PHY_GET command to generic phy dump") Signed-off-by: Quan Sun <2022090917019@std.uestc.edu.cn> Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260507131738.1173835-1-2022090917019@std.uestc.edu.cn Signed-off-by: Jakub Kicinski --- net/ethtool/phy.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index d4e6887055ab..f76d94d848d6 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -76,6 +76,7 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, struct nlattr **tb = info->attrs; struct phy_device_node *pdn; struct phy_device *phydev; + int ret; /* RTNL is held by the caller */ phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER, @@ -88,8 +89,17 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, return -EOPNOTSUPP; rep_data->phyindex = phydev->phyindex; + rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL); + if (!rep_data->name) + return -ENOMEM; + rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->drvname) { + ret = -ENOMEM; + goto err_free_name; + } + rep_data->upstream_type = pdn->upstream_type; if (pdn->upstream_type == PHY_UPSTREAM_PHY) { @@ -97,15 +107,33 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, rep_data->upstream_index = upstream->phyindex; } - if (pdn->parent_sfp_bus) + if (pdn->parent_sfp_bus) { rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus), GFP_KERNEL); + if (!rep_data->upstream_sfp_name) { + ret = -ENOMEM; + goto err_free_drvname; + } + } - if (phydev->sfp_bus) + if (phydev->sfp_bus) { rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus), GFP_KERNEL); + if (!rep_data->downstream_sfp_name) { + ret = -ENOMEM; + goto err_free_upstream_sfp; + } + } return 0; + +err_free_upstream_sfp: + kfree(rep_data->upstream_sfp_name); +err_free_drvname: + kfree(rep_data->drvname); +err_free_name: + kfree(rep_data->name); + return ret; } static int phy_fill_reply(struct sk_buff *skb, -- cgit v1.2.3 From f2ab4fd02777c4081be38c35f939e4dc529b8952 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 7 May 2026 14:04:26 +0200 Subject: net: nsh: fix incorrect header length macros NSH header length is a 6-bit field that encodes the total length of the header in 4-byte words. So the maximum length is 0b111111 * 4, which is 252 and not 256. The maximum context length is the same number minus the length of the base header (8), so 244. These macros are used to validate push_nsh() action in openvswitch. Miscalculation here doesn't cause any real issues. In the worst case the oversized context is truncated while building the header, so we'll construct and send a broken packet, which is not a big problem, as any receiver should validate the fields. No invalid memory accesses will happen during the header push. But we should fix the macros to reject the incorrect actions in the first place. Using previously defined values and calculating the length instead of defining numbers directly, so it's easier to understand where they come from and harder to make a mistake. Fixes: 1f0b7744c505 ("net: add NSH header structures and helpers") Signed-off-by: Ilya Maximets Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20260507120434.2962505-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- include/net/nsh.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/nsh.h b/include/net/nsh.h index 16a751093896..15a26c590815 100644 --- a/include/net/nsh.h +++ b/include/net/nsh.h @@ -247,10 +247,10 @@ struct nshhdr { #define NSH_M_TYPE1_LEN 24 /* NSH header maximum Length. */ -#define NSH_HDR_MAX_LEN 256 +#define NSH_HDR_MAX_LEN ((NSH_LEN_MASK >> NSH_LEN_SHIFT) * 4) /* NSH context headers maximum Length. */ -#define NSH_CTX_HDRS_MAX_LEN 248 +#define NSH_CTX_HDRS_MAX_LEN (NSH_HDR_MAX_LEN - NSH_BASE_HDR_LEN) static inline struct nshhdr *nsh_hdr(struct sk_buff *skb) { -- cgit v1.2.3 From efda25ee84325385f859d10872590e90ce837243 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 May 2026 20:07:13 +0000 Subject: genetlink: free the skb on 'group >= family->n_mcgrps' These methods generally consume ownership of the provided skb, so even if an error path is encountered, the skb is freed. This is because the very first thing they do after some initial setup is to unconditionally consume the skb via consume_skb(skb). Any subsequent errors lead to the core netlink layer freeing the skb. However, there is one check that occurs before ownership is passed, which is the check for the group index. So if this error condition is encountered, then the skb is leaked. This error condition is generally considered a violation of the netlink API, so it's not expected to occur under normal circumstances. For the same reason, no callers check for this error condition, and no callers need to be adjusted. However, we should still follow the same ownership semantics of the rest of the function. Thus, free the skb in this codepath. Suggested-by: Andrew Lunn Suggested-by: Matthew Maurer Fixes: 2a94fe48f32c ("genetlink: make multicast groups const, prevent abuse") Link: https://lore.kernel.org/r/845b36ba-7b3a-41f2-acb2-b284f253e2ca@lunn.ch Signed-off-by: Alice Ryhl Link: https://patch.msgid.link/20260506-genlmsg-return-v2-1-a63ee2a055d6@google.com Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 4 +++- net/netlink/genetlink.c | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 7b84f2cef8b1..d70510ac31ab 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -489,8 +489,10 @@ genlmsg_multicast_netns_filtered(const struct genl_family *family, netlink_filter_fn filter, void *filter_data) { - if (WARN_ON_ONCE(group >= family->n_mcgrps)) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) { + nlmsg_free(skb); return -EINVAL; + } group = family->mcgrp_offset + group; return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group, flags, filter, filter_data); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d251d894afd4..0da39eaed255 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1972,8 +1972,10 @@ int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group) { - if (WARN_ON_ONCE(group >= family->n_mcgrps)) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) { + kfree_skb(skb); return -EINVAL; + } group = family->mcgrp_offset + group; return genlmsg_mcast(skb, portid, group); @@ -1986,8 +1988,10 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct net *net = genl_info_net(info); struct sock *sk = net->genl_sock; - if (WARN_ON_ONCE(group >= family->n_mcgrps)) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) { + kfree_skb(skb); return; + } group = family->mcgrp_offset + group; nlmsg_notify(sk, skb, info->snd_portid, group, -- cgit v1.2.3 From a77d5a069d959dc45f5f472d48cba37d8cba0f1c Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Wed, 6 May 2026 16:37:45 -0700 Subject: net: shaper: Reject reparenting of existing nodes When an existing node-scope shaper is moved to a different parent via the group operation, the framework fails to update the leaves count on both the old and new parent shapers. Only newly created nodes (handle.id == NET_SHAPER_ID_UNSPEC) trigger the parent leaves increment at line 1039. This causes the parent's leaves counter to diverge from the actual number of children in the xarray. When the node is later deleted, pre_del_node() allocates an array sized by the stale leaves count, but the xarray iteration finds more children than expected, hitting the WARN_ON_ONCE guard and returning -EINVAL. Rather than adding reparenting support with complex leaves count bookkeeping, reject group calls that attempt to change an existing node's parent. Updates to an existing node's rate or leaves under the same parent remain permitted. We expect that for any modification of the topology user should always create new groups and let the kernel garbage collect the leaf-less nodes. Fixes: 5d5d4700e75d ("net-shapers: implement NL group operation") Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260506233745.111895-1-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- net/shaper/shaper.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 94bc9c7382ea..1069fa4eb9f6 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -964,15 +964,22 @@ static int __net_shaper_group(struct net_shaper_binding *binding, int i, ret; if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + struct net_shaper *cur = NULL; + new_node = node->handle.id == NET_SHAPER_ID_UNSPEC; - if (!new_node && !net_shaper_lookup(binding, &node->handle)) { - /* The related attribute is not available when - * reaching here from the delete() op. - */ - NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists", - node->handle.scope, node->handle.id); - return -ENOENT; + if (!new_node) { + cur = net_shaper_lookup(binding, &node->handle); + if (!cur) { + /* The related attribute is not available + * when reaching here from the delete() op. + */ + NL_SET_ERR_MSG_FMT(extack, + "Node shaper %d:%d does not exist", + node->handle.scope, + node->handle.id); + return -ENOENT; + } } /* When unspecified, the node parent scope is inherited from @@ -986,6 +993,15 @@ static int __net_shaper_group(struct net_shaper_binding *binding, return ret; } + if (cur && net_shaper_handle_cmp(&cur->parent, + &node->parent)) { + NL_SET_ERR_MSG_FMT(extack, + "Cannot reparent node shaper %d:%d", + node->handle.scope, + node->handle.id); + return -EOPNOTSUPP; + } + } else { net_shaper_default_parent(&node->handle, &node->parent); } -- cgit v1.2.3 From 1619553b0a6ba7a966b17b0226f3acb9dd4d5380 Mon Sep 17 00:00:00 2001 From: Matt Vollrath Date: Wed, 6 May 2026 14:48:10 -0700 Subject: i40e: Cleanup PTP registration on probe failure Fix two conditions which would leak PTP registration on probe failure: 1. i40e_setup_pf_switch can encounter an error in i40e_setup_pf_filter_control, call i40e_ptp_init, then return non-zero, sending i40e_probe to err_vsis. 2. i40e_setup_misc_vector can return non-zero, sending i40e_probe to err_vsis. Both of these conditions have been present since PTP was introduced in this driver. Found with coccinelle. Fixes: beb0dff1251db ("i40e: enable PTP") Signed-off-by: Matt Vollrath Tested-by: Sunitha Mekala Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-1-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 028bd500603a..f06fcef644e5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16108,6 +16108,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Unwind what we've done if something failed in the setup */ err_vsis: set_bit(__I40E_DOWN, pf->state); + i40e_ptp_stop(pf); i40e_clear_interrupt_scheme(pf); kfree(pf->vsi); err_switch_setup: -- cgit v1.2.3 From 678b713ece1e853f11e670a84cb887c35e1381b7 Mon Sep 17 00:00:00 2001 From: Matt Vollrath Date: Wed, 6 May 2026 14:48:11 -0700 Subject: i40e: Cleanup PTP pins on probe failure PTP pin structs are allocated early in probe, but never cleaned up. Fix this by calling i40e_ptp_free_pins in the error path. To support this, i40e_ptp_free_pins is added to the header and pin_config is correctly nullified after being freed. This has been an issue since i40e_ptp_alloc_pins was introduced. Fixes: 1050713026a08 ("i40e: add support for PTP external synchronization clock") Reported-by: Kohei Enju Cc: stable@vger.kernel.org Signed-off-by: Matt Vollrath Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Reviewed-by: Kohei Enju Tested-by: Sunitha Mekala Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-2-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_ptp.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index dcb50c2e1aa2..83e780919ac9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1318,6 +1318,7 @@ void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); int i40e_ptp_alloc_pins(struct i40e_pf *pf); +void i40e_ptp_free_pins(struct i40e_pf *pf); int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); int i40e_get_partition_bw_setting(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f06fcef644e5..6d4f9218dc68 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16112,6 +16112,7 @@ err_vsis: i40e_clear_interrupt_scheme(pf); kfree(pf->vsi); err_switch_setup: + i40e_ptp_free_pins(pf); i40e_reset_interrupt_capability(pf); timer_shutdown_sync(&pf->service_timer); err_mac_addr: diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 404a716db8da..7d07c389bb23 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -940,12 +940,13 @@ int i40e_ptp_hwtstamp_get(struct net_device *netdev, * * Release memory allocated for PTP pins. **/ -static void i40e_ptp_free_pins(struct i40e_pf *pf) +void i40e_ptp_free_pins(struct i40e_pf *pf) { if (i40e_is_ptp_pin_dev(&pf->hw)) { kfree(pf->ptp_pins); kfree(pf->ptp_caps.pin_config); pf->ptp_pins = NULL; + pf->ptp_caps.pin_config = NULL; } } -- cgit v1.2.3 From da4f76b6a84ede14a71282ef841768299ead0221 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 6 May 2026 14:48:12 -0700 Subject: idpf: fix read_dev_clk_lock spinlock init in idpf_ptp_init() In idpf_ptp_init(), read_dev_clk_lock is initialized after ptp_schedule_worker() had already been called (and after idpf_ptp_settime64() could reach the lock). The PTP aux worker fires immediately upon scheduling and can call into idpf_ptp_read_src_clk_reg_direct(), which takes spin_lock(&ptp->read_dev_clk_lock) on an uninitialized lock, triggering the lockdep "non-static key" warning: [12973.796587] idpf 0000:83:00.0: Device HW Reset initiated [12974.094507] INFO: trying to register non-static key. ... [12974.097208] Call Trace: [12974.097213] [12974.097218] dump_stack_lvl+0x93/0xe0 [12974.097234] register_lock_class+0x4c4/0x4e0 [12974.097249] ? __lock_acquire+0x427/0x2290 [12974.097259] __lock_acquire+0x98/0x2290 [12974.097272] lock_acquire+0xc6/0x310 [12974.097281] ? idpf_ptp_read_src_clk_reg+0xb7/0x150 [idpf] [12974.097311] ? lockdep_hardirqs_on_prepare+0xde/0x190 [12974.097318] ? finish_task_switch.isra.0+0xd2/0x350 [12974.097330] ? __pfx_ptp_aux_kworker+0x10/0x10 [ptp] [12974.097343] _raw_spin_lock+0x30/0x40 [12974.097353] ? idpf_ptp_read_src_clk_reg+0xb7/0x150 [idpf] [12974.097373] idpf_ptp_read_src_clk_reg+0xb7/0x150 [idpf] [12974.097391] ? kthread_worker_fn+0x88/0x3d0 [12974.097404] ? kthread_worker_fn+0x4e/0x3d0 [12974.097411] idpf_ptp_update_cached_phctime+0x26/0x120 [idpf] [12974.097428] ? _raw_spin_unlock_irq+0x28/0x50 [12974.097436] idpf_ptp_do_aux_work+0x15/0x20 [idpf] [12974.097454] ptp_aux_kworker+0x20/0x40 [ptp] [12974.097464] kthread_worker_fn+0xd5/0x3d0 [12974.097474] ? __pfx_kthread_worker_fn+0x10/0x10 [12974.097482] kthread+0xf4/0x130 [12974.097489] ? __pfx_kthread+0x10/0x10 [12974.097498] ret_from_fork+0x32c/0x410 [12974.097512] ? __pfx_kthread+0x10/0x10 [12974.097519] ret_from_fork_asm+0x1a/0x30 [12974.097540] Move the call to spin_lock_init() up a bit to make sure read_dev_clk_lock is not touched before it's been initialized. Fixes: 5cb8805d2366 ("idpf: negotiate PTP capabilities and get PTP clock") Signed-off-by: Emil Tantilov Reviewed-by: Madhu Chittim Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-3-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index eec91c4f0a75..4a51d2727547 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -952,6 +952,8 @@ int idpf_ptp_init(struct idpf_adapter *adapter) goto free_ptp; } + spin_lock_init(&adapter->ptp->read_dev_clk_lock); + err = idpf_ptp_create_clock(adapter); if (err) goto free_ptp; @@ -977,8 +979,6 @@ int idpf_ptp_init(struct idpf_adapter *adapter) goto remove_clock; } - spin_lock_init(&adapter->ptp->read_dev_clk_lock); - pci_dbg(adapter->pdev, "PTP init successful\n"); return 0; -- cgit v1.2.3 From 6c77b9510829a424d1b74409b7db9456e3522871 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 May 2026 14:48:13 -0700 Subject: idpf: fix double free and use-after-free in aux device error paths When auxiliary_device_add() fails in idpf_plug_vport_aux_dev() or idpf_plug_core_aux_dev(), the err_aux_dev_add label calls auxiliary_device_uninit() and falls through to err_aux_dev_init. The uninit call will trigger put_device(), which invokes the release callback (idpf_vport_adev_release / idpf_core_adev_release) that frees iadev. The fall-through then reads adev->id from the freed iadev for ida_free() and double-frees iadev with kfree(). Free the IDA slot and clear the back-pointer before uninit, while adev is still valid, then return immediately. Commit 65637c3a1811 ("idpf: fix UAF in RDMA core aux dev deinitialization") fixed the same use-after-free in the matching unplug path in this file but missed both probe error paths. Cc: Tony Nguyen Cc: Przemek Kitszel Cc: Andrew Lunn Cc: stable@kernel.org Fixes: be91128c579c ("idpf: implement RDMA vport auxiliary dev create, init, and destroy") Fixes: f4312e6bfa2a ("idpf: implement core RDMA auxiliary dev create, init, and destroy") Signed-off-by: Greg Kroah-Hartman Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-4-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_idc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 7e4f4ac92653..b7d6b08fc89e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -90,7 +90,10 @@ static int idpf_plug_vport_aux_dev(struct iidc_rdma_core_dev_info *cdev_info, return 0; err_aux_dev_add: + ida_free(&idpf_idc_ida, adev->id); + vdev_info->adev = NULL; auxiliary_device_uninit(adev); + return ret; err_aux_dev_init: ida_free(&idpf_idc_ida, adev->id); err_ida_alloc: @@ -228,7 +231,10 @@ static int idpf_plug_core_aux_dev(struct iidc_rdma_core_dev_info *cdev_info) return 0; err_aux_dev_add: + ida_free(&idpf_idc_ida, adev->id); + cdev_info->adev = NULL; auxiliary_device_uninit(adev); + return ret; err_aux_dev_init: ida_free(&idpf_idc_ida, adev->id); err_ida_alloc: -- cgit v1.2.3 From b3cda96feb60d91fe88d52b974ff110dcfa91239 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Wed, 6 May 2026 14:48:14 -0700 Subject: ice: fix setting RSS VSI hash for E830 ice_set_rss_hfunc() performs a VSI update, in which it sets hashing function, leaving other VSI options unchanged. However, ::q_opt_flags is mistakenly set to the value of another field, instead of its original value, probably due to a typo. What happens next is hardware-dependent: On E810, only the first bit is meaningful (see ICE_AQ_VSI_Q_OPT_PE_FLTR_EN) and can potentially end up in a different state than before VSI update. On E830, some of the remaining bits are not reserved. Setting them to some unrelated values can cause the firmware to reject the update because of invalid settings, or worse - succeed. Reproducer: sudo ethtool -X $PF1 equal 8 Output in dmesg: Failed to configure RSS hash for VSI 6, error -5 Fixes: 352e9bf23813 ("ice: enable symmetric-xor RSS for Toeplitz hash function") Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Signed-off-by: Marcin Szycik Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-5-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1d1947a7fe11..c52c465280f7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8046,7 +8046,7 @@ int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc) ctx->info.q_opt_rss |= FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc); ctx->info.q_opt_tc = vsi->info.q_opt_tc; - ctx->info.q_opt_flags = vsi->info.q_opt_rss; + ctx->info.q_opt_flags = vsi->info.q_opt_flags; err = ice_update_vsi(hw, vsi->idx, ctx, NULL); if (err) { -- cgit v1.2.3 From 0ded1f36ba4021cba50513e80be6b6e173710168 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 6 May 2026 14:48:15 -0700 Subject: ice: fix locking in ice_dcb_rebuild() Move the mutex_lock() call up to prevent that DCB settings change after the first ice_query_port_ets() call. The second ice_query_port_ets() call in ice_dcb_rebuild() is already protected by pf->tc_mutex. This also fixes a bug in an error path, as before taking the first "goto dcb_error" in the function jumped over mutex_lock() to mutex_unlock(). This bug has been detected by the clang thread-safety analyzer. Cc: intel-wired-lan@lists.osuosl.org Fixes: 242b5e068b25 ("ice: Fix DCB rebuild after reset") Signed-off-by: Bart Van Assche Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Tested-by: Arpana Arland Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-6-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 16aa25535152..0bc6dd375687 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -537,14 +537,14 @@ void ice_dcb_rebuild(struct ice_pf *pf) struct ice_dcbx_cfg *err_cfg; int ret; + mutex_lock(&pf->tc_mutex); + ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { dev_err(dev, "Query Port ETS failed\n"); goto dcb_error; } - mutex_lock(&pf->tc_mutex); - if (!pf->hw.port_info->qos_cfg.is_sw_lldp) ice_cfg_etsrec_defaults(pf->hw.port_info); -- cgit v1.2.3 From cce709d8df6ba6d2a0a0dbf34acc2cdd9e23bd46 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 6 May 2026 14:48:16 -0700 Subject: ice: dpll: fix rclk pin state get for E810 The refactoring of ice_dpll_rclk_state_on_pin_get() to use ice_dpll_pin_get_parent_idx() omitted the base_rclk_idx adjustment that was correctly added in the ice_dpll_rclk_state_on_pin_set() path. This breaks E810 devices where base_rclk_idx is non-zero, causing the wrong hardware index to be used for pin state lookup and incorrect recovered clock state to be reported via the DPLL subsystem. E825C is unaffected as its base_rclk_idx is 0. While at it, add bounds check against ICE_DPLL_RCLK_NUM_MAX on hw_idx after the base_rclk_idx subtraction in both ice_dpll_rclk_state_on_pin_{get,set}() to prevent out-of-bounds access on the pin state array. Fixes: ad1df4f2d591 ("ice: dpll: Support E825-C SyncE and dynamic pin discovery") Signed-off-by: Ivan Vecera Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-7-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_dpll.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 27b460926bac..892bc7c2e28b 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -2523,6 +2523,8 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv, if (hw_idx < 0) goto unlock; hw_idx -= pf->dplls.base_rclk_idx; + if (hw_idx >= ICE_DPLL_RCLK_NUM_MAX) + goto unlock; if ((enable && p->state[hw_idx] == DPLL_PIN_STATE_CONNECTED) || (!enable && p->state[hw_idx] == DPLL_PIN_STATE_DISCONNECTED)) { @@ -2586,6 +2588,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv, hw_idx = ice_dpll_pin_get_parent_idx(p, parent_pin); if (hw_idx < 0) goto unlock; + hw_idx -= pf->dplls.base_rclk_idx; + if (hw_idx >= ICE_DPLL_RCLK_NUM_MAX) + goto unlock; ret = ice_dpll_pin_state_update(pf, p, ICE_DPLL_PIN_TYPE_RCLK_INPUT, extack); -- cgit v1.2.3 From 30f1658fc5387384c7a60b9d15c79cb959512c1a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 6 May 2026 14:48:17 -0700 Subject: ice: dpll: fix misplaced header macros The CGU register definitions (ICE_CGU_R10, ICE_CGU_R11 and related field masks) were placed after the #endif of the _ICE_DPLL_H_ include guard, leaving them unprotected. Move them inside the guard. Fixes: ad1df4f2d591 ("ice: dpll: Support E825-C SyncE and dynamic pin discovery") Signed-off-by: Ivan Vecera Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20260506-jk-iwl-net-2026-05-04-v2-8-a5ea4dc837a9@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_dpll.h | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h index ae42cdea0ee1..8678575359b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.h +++ b/drivers/net/ethernet/intel/ice/ice_dpll.h @@ -8,6 +8,22 @@ #define ICE_DPLL_RCLK_NUM_MAX 4 +#define ICE_CGU_R10 0x28 +#define ICE_CGU_R10_SYNCE_CLKO_SEL GENMASK(8, 5) +#define ICE_CGU_R10_SYNCE_CLKODIV_M1 GENMASK(13, 9) +#define ICE_CGU_R10_SYNCE_CLKODIV_LOAD BIT(14) +#define ICE_CGU_R10_SYNCE_DCK_RST BIT(15) +#define ICE_CGU_R10_SYNCE_ETHCLKO_SEL GENMASK(18, 16) +#define ICE_CGU_R10_SYNCE_ETHDIV_M1 GENMASK(23, 19) +#define ICE_CGU_R10_SYNCE_ETHDIV_LOAD BIT(24) +#define ICE_CGU_R10_SYNCE_DCK2_RST BIT(25) +#define ICE_CGU_R10_SYNCE_S_REF_CLK GENMASK(31, 27) + +#define ICE_CGU_R11 0x2C +#define ICE_CGU_R11_SYNCE_S_BYP_CLK GENMASK(6, 1) + +#define ICE_CGU_BYPASS_MUX_OFFSET_E825C 3 + /** * enum ice_dpll_pin_sw - enumerate ice software pin indices: * @ICE_DPLL_PIN_SW_1_IDX: index of first SW pin @@ -157,19 +173,3 @@ static inline void ice_dpll_deinit(struct ice_pf *pf) { } #endif #endif - -#define ICE_CGU_R10 0x28 -#define ICE_CGU_R10_SYNCE_CLKO_SEL GENMASK(8, 5) -#define ICE_CGU_R10_SYNCE_CLKODIV_M1 GENMASK(13, 9) -#define ICE_CGU_R10_SYNCE_CLKODIV_LOAD BIT(14) -#define ICE_CGU_R10_SYNCE_DCK_RST BIT(15) -#define ICE_CGU_R10_SYNCE_ETHCLKO_SEL GENMASK(18, 16) -#define ICE_CGU_R10_SYNCE_ETHDIV_M1 GENMASK(23, 19) -#define ICE_CGU_R10_SYNCE_ETHDIV_LOAD BIT(24) -#define ICE_CGU_R10_SYNCE_DCK2_RST BIT(25) -#define ICE_CGU_R10_SYNCE_S_REF_CLK GENMASK(31, 27) - -#define ICE_CGU_R11 0x2C -#define ICE_CGU_R11_SYNCE_S_BYP_CLK GENMASK(6, 1) - -#define ICE_CGU_BYPASS_MUX_OFFSET_E825C 3 -- cgit v1.2.3 From c4f3d6eb1fcf6cd9ce4644f604d5aad1ce594dfc Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Wed, 6 May 2026 21:43:11 +0900 Subject: net: lan966x: avoid unregistering netdev on register failure lan966x_probe_port() stores the newly allocated net_device in the port before calling register_netdev(). If register_netdev() fails, the probe error path calls lan966x_cleanup_ports(), which sees port->dev and calls unregister_netdev() for a device that was never registered. Destroy the phylink instance created for this port and clear port->dev before returning the registration error. The common cleanup path now skips ports without port->dev before reaching the registered netdev cleanup, so it only handles ports that reached the registered-netdev lifetime. This also avoids treating an uninitialized FDMA netdev and the failed port as a NULL == NULL match in the common cleanup path. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Co-developed-by: Ijae Kim Signed-off-by: Ijae Kim Signed-off-by: Myeonghun Pak Link: https://patch.msgid.link/20260506124331.31945-1-mhun512@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 47752d3fde0b..1179a6e127c5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -749,11 +749,10 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) for (p = 0; p < lan966x->num_phys_ports; p++) { port = lan966x->ports[p]; - if (!port) + if (!port || !port->dev) continue; - if (port->dev) - unregister_netdev(port->dev); + unregister_netdev(port->dev); lan966x_xdp_port_deinit(port); if (lan966x->fdma && lan966x->fdma_ndev == port->dev) @@ -873,6 +872,9 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, err = register_netdev(dev); if (err) { dev_err(lan966x->dev, "register_netdev failed\n"); + phylink_destroy(phylink); + port->phylink = NULL; + port->dev = NULL; return err; } -- cgit v1.2.3 From 6635fa84403c3a59455b66007c019a7cc632db30 Mon Sep 17 00:00:00 2001 From: Shitalkumar Gandhi Date: Thu, 7 May 2026 01:28:13 +0530 Subject: net: ti: icssm-prueth: fix eth_ports_node leak in probe The error path on of_property_read_u32() failure inside icssm_prueth_probe() returns without putting eth_ports_node, which was acquired before the for_each_child_of_node() loop. Drop it before returning. Fixes: 511f6c1ae093 ("net: ti: icssm-prueth: Adds ICSSM Ethernet driver") Signed-off-by: Shitalkumar Gandhi Link: https://patch.msgid.link/20260506195813.641610-1-shitalkumar.gandhi@cambiumnetworks.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssm/icssm_prueth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/icssm/icssm_prueth.c b/drivers/net/ethernet/ti/icssm/icssm_prueth.c index 53bbd9290904..b7e94244355a 100644 --- a/drivers/net/ethernet/ti/icssm/icssm_prueth.c +++ b/drivers/net/ethernet/ti/icssm/icssm_prueth.c @@ -1825,6 +1825,7 @@ static int icssm_prueth_probe(struct platform_device *pdev) dev_err(dev, "%pOF error reading port_id %d\n", eth_node, ret); of_node_put(eth_node); + of_node_put(eth_ports_node); return ret; } -- cgit v1.2.3 From abb5f36771cc4c05899b34000829a787572a8817 Mon Sep 17 00:00:00 2001 From: Ben Morris Date: Thu, 7 May 2026 17:14:55 -0700 Subject: sctp: revalidate list cursor after sctp_sendmsg_to_asoc() in SCTP_SENDALL The SCTP_SENDALL path in sctp_sendmsg() iterates ep->asocs with list_for_each_entry_safe(), which caches the next entry in @tmp before the loop body runs. The body calls sctp_sendmsg_to_asoc(), which may drop the socket lock inside sctp_wait_for_sndbuf(). While the lock is dropped, another thread can SCTP_SOCKOPT_PEELOFF the association cached in @tmp, migrating it to a new endpoint via sctp_sock_migrate() (list_del_init() + list_add_tail() to newep->asocs), and optionally close the new socket which frees the association via kfree_rcu(). The cached @tmp can also be freed by a network ABORT for that association, processed in softirq while the lock is dropped. sctp_wait_for_sndbuf() revalidates @asoc (the current entry) on re-lock via the "sk != asoc->base.sk" and "asoc->base.dead" checks, but nothing revalidates @tmp. After a successful return, the iterator advances to the stale @tmp, yielding either a use-after-free (if the peeled socket was closed) or a list-walk onto the new endpoint's list head (type confusion of &newep->asocs as a struct sctp_association *). Both are reachable from CapEff=0; the type-confusion path gives controlled indirect call via the outqueue.sched->init_sid pointer. Fix by re-deriving @tmp from @asoc after sctp_sendmsg_to_asoc() returns. @asoc is known to still be on ep->asocs at that point: the only callers that list_del an association from ep->asocs are sctp_association_free() (which sets asoc->base.dead) and sctp_assoc_migrate() (which changes asoc->base.sk), and sctp_wait_for_sndbuf() checks both under the lock before any successful return; a tripped check propagates as err < 0 and the loop bails before the re-derive. The SCTP_ABORT path in sctp_sendmsg_check_sflags() returns 0 and the loop hits 'continue' before sctp_sendmsg_to_asoc() is ever called, so the @tmp cached by list_for_each_entry_safe() still covers the lock-held free that ba59fb027307 ("sctp: walk the list of asoc safely") was added for. Fixes: 4910280503f3 ("sctp: add support for snd flag SCTP_SENDALL process in sendmsg") Cc: stable@vger.kernel.org Signed-off-by: Ben Morris Acked-by: Xin Long Link: https://patch.msgid.link/20260508001455.3137-1-joycathacker@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 58d0d9747f0b..1d2568bb6bc2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1986,6 +1986,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_unlock; iov_iter_revert(&msg->msg_iter, err); + + /* sctp_sendmsg_to_asoc() may have released the socket + * lock (sctp_wait_for_sndbuf), during which other + * associations on ep->asocs could have been peeled + * off or freed. @asoc itself is revalidated by the + * base.dead and base.sk checks in sctp_wait_for_sndbuf, + * so re-derive the cached cursor from it. + */ + tmp = list_next_entry(asoc, asocs); } goto out_unlock; -- cgit v1.2.3 From 496c0c4c53bbe1bad97e82cd12103df61a6e459d Mon Sep 17 00:00:00 2001 From: Holger Brunck Date: Thu, 7 May 2026 17:53:32 +0200 Subject: net: wan: fsl_ucc_hdlc: free tx_skbuff in uhdlc_memclean When the device is removed all allocated resources should be freed. In uhdlc_memclean the netdev transmit queue was already stopped. But at this point we may have pending skb in the transmit queue which must be freed. Therefore iterate over the tx_skbuff pointers and free all pending skb. The issue was discovered by sashiko. Tested on a ls1043a board running HDLC in bus mode on kernel 6.12. https: //sashiko.dev/#/patchset/20260429114208.941011-1-holger.brunck%40hitachienergy.com Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC") Signed-off-by: Holger Brunck Link: https://patch.msgid.link/20260507155332.3452319-1-holger.brunck@hitachienergy.com Signed-off-by: Jakub Kicinski --- drivers/net/wan/fsl_ucc_hdlc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 15bfb78381d4..809f21fb93f5 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -740,6 +740,8 @@ static int uhdlc_open(struct net_device *dev) static void uhdlc_memclean(struct ucc_hdlc_private *priv) { + int i; + qe_muram_free(ioread16be(&priv->ucc_pram->riptr)); qe_muram_free(ioread16be(&priv->ucc_pram->tiptr)); @@ -770,6 +772,11 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv) kfree(priv->rx_skbuff); priv->rx_skbuff = NULL; + for (i = 0; i < TX_BD_RING_LEN; i++) { + dev_kfree_skb(priv->tx_skbuff[i]); + priv->tx_skbuff[i] = NULL; + } + kfree(priv->tx_skbuff); priv->tx_skbuff = NULL; -- cgit v1.2.3 From d1aabc2132d29224caa3c994dadd8224dc473ed9 Mon Sep 17 00:00:00 2001 From: Zhan Xusheng Date: Fri, 8 May 2026 15:29:34 +0800 Subject: ntfs: fix missing kstrdup() error check in ntfs_write_volume_label() ntfs_write_volume_label() does not check the return value of kstrdup(). If the allocation fails, vol->volume_label is set to NULL while the function returns success. A subsequent FS_IOC_GETFSLABEL then returns an empty string even though the on-disk label was updated correctly. Fix by allocating the new label before taking vol_ni->mrec_lock and updating any on-disk metadata, so an -ENOMEM from kstrdup() leaves both the in-memory and on-disk labels untouched and consistent. On success the preallocated copy replaces the old vol->volume_label. Also move mark_inode_dirty_sync() into the success path so that it is not called when no metadata was actually modified. Fixes: 6251f0b0de7d ("ntfs: update super block operations") Suggested-by: Hyunchul Lee Signed-off-by: Zhan Xusheng Reviewed-by: Hyunchul Lee Signed-off-by: Namjae Jeon --- fs/ntfs/super.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index e9de84fb8297..d282cf6e712e 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -413,6 +413,7 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label) { struct ntfs_inode *vol_ni = NTFS_I(vol->vol_ino); struct ntfs_attr_search_ctx *ctx; + char *new_label; __le16 *uname; int uname_len, ret; @@ -425,7 +426,7 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label) return uname_len; } - if (uname_len > NTFS_MAX_LABEL_LEN) { + if (uname_len > NTFS_MAX_LABEL_LEN) { ntfs_error(vol->sb, "Volume label is too long (max %d characters).", NTFS_MAX_LABEL_LEN); @@ -433,11 +434,22 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label) return -EINVAL; } + /* + * Allocate the in-memory label copy up front. If kstrdup() fails we + * bail out before touching on-disk metadata, so the in-memory label + * and the on-disk label stay in sync. + */ + new_label = kstrdup(label, GFP_KERNEL); + if (!new_label) { + kvfree(uname); + return -ENOMEM; + } + mutex_lock(&vol_ni->mrec_lock); ctx = ntfs_attr_get_search_ctx(vol_ni, NULL); if (!ctx) { ret = -ENOMEM; - goto out; + goto out; } if (!ntfs_attr_lookup(AT_VOLUME_NAME, NULL, 0, 0, 0, NULL, 0, @@ -450,12 +462,14 @@ int ntfs_write_volume_label(struct ntfs_volume *vol, char *label) out: mutex_unlock(&vol_ni->mrec_lock); kvfree(uname); - mark_inode_dirty_sync(vol->vol_ino); if (ret >= 0) { kfree(vol->volume_label); - vol->volume_label = kstrdup(label, GFP_KERNEL); + vol->volume_label = new_label; + mark_inode_dirty_sync(vol->vol_ino); ret = 0; + } else { + kfree(new_label); } return ret; } -- cgit v1.2.3 From 6098790c403d5e95a35bb6bf938591ca8c8e224f Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 9 May 2026 15:12:35 +0900 Subject: ntfs: validate MFT attrs_offset against bytes_in_use ntfs_mft_record_check() verifies that attrs_offset is aligned and that the resulting pointer stays within the allocated MFT record buffer, but it does not check that the first attribute header starts within the bytes_in_use area. A malformed record with attrs_offset greater than bytes_in_use can pass this check as long as attrs_offset is still within bytes_allocated. The attribute parser then computes the remaining record space by subtracting the attribute pointer from bytes_in_use. Because that value is unsigned, the subtraction can underflow and allow bytes after bytes_in_use to be interpreted as an attribute. Reject records where attrs_offset is outside bytes_in_use or where the used area does not even contain the four-byte attribute type/AT_END terminator at attrs_offset. A small userspace model with attrs_offset=128 and bytes_in_use=64 shows the current check accepts the record and the parser space calculation underflows to 0xffffffc0. With this change the same malformed record is rejected before the attribute walker is entered. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: DaeMyung Kang Signed-off-by: Namjae Jeon --- fs/ntfs/mft.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 68f6fc8b7b62..729b259974eb 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -30,6 +30,8 @@ int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m, { struct attr_record *a; struct super_block *sb = vol->sb; + u16 attrs_offset; + u32 bytes_in_use; if (!ntfs_is_file_record(m->magic)) { ntfs_error(sb, "Record %llu has no FILE magic (0x%x)\n", @@ -65,7 +67,16 @@ int ntfs_mft_record_check(const struct ntfs_volume *vol, struct mft_record *m, goto err_out; } - a = (struct attr_record *)((char *)m + le16_to_cpu(m->attrs_offset)); + attrs_offset = le16_to_cpu(m->attrs_offset); + bytes_in_use = le32_to_cpu(m->bytes_in_use); + + if (attrs_offset > bytes_in_use || + bytes_in_use - attrs_offset < sizeof_field(struct attr_record, type)) { + ntfs_error(sb, "Record %llu has corrupt attribute offset\n", mft_no); + goto err_out; + } + + a = (struct attr_record *)((char *)m + attrs_offset); if ((char *)a < (char *)m || (char *)a > (char *)m + vol->mft_record_size) { ntfs_error(sb, "Record %llu is corrupt\n", mft_no); goto err_out; -- cgit v1.2.3 From 679ee5afd5b4764911656b4d4b83b9abee2b5572 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 9 May 2026 15:12:36 +0900 Subject: ntfs: fix MFT bitmap scan 2^32 boundary check NTFS MFT record numbers are limited to the 32-bit range, and ntfs_mft_record_layout() rejects mft_no >= 2^32. The free-MFT-record bitmap scan in ntfs_mft_bitmap_find_and_alloc_free_rec_nolock() also guards against this overflow but uses a strict greater than comparison, allowing record number 2^32 itself through this earlier check. Every other 2^32 boundary check in fs/ntfs/mft.c uses '>=', so the strict greater than here is both a real off-by-one and an internal inconsistency. A model with ll == 2^32 confirms the current check accepts the value while the corrected check rejects it. Use '>=' so the boundary matches the layout-time rejection and the surrounding bitmap-scan checks. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: DaeMyung Kang Signed-off-by: Namjae Jeon --- fs/ntfs/mft.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 729b259974eb..a7d10ee41b34 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1064,7 +1064,7 @@ static s64 ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(struct ntfs_volume *vo b = ffz((unsigned long)*byte); if (b < 8 && b >= (bit & 7)) { ll = data_pos + (bit & ~7ull) + b; - if (unlikely(ll > (1ll << 32))) { + if (unlikely(ll >= (1ll << 32))) { folio_unlock(folio); kunmap_local(buf); folio_put(folio); -- cgit v1.2.3 From b64f0ae5d47c0bd9581eb9cd59375a87f748dc00 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sat, 9 May 2026 15:12:37 +0900 Subject: ntfs: validate attribute name bounds before returning it ntfs_attr_find() validates a named attribute before comparing it with the requested name, but that check is currently after the AT_UNUSED handling. When callers enumerate attributes with AT_UNUSED, ntfs_attr_find() can return a malformed named attribute before checking whether name_offset and name_length stay within the attribute record. Some enumeration callers use the returned attribute name pointer directly. For example, one path passes (attr + name_offset, name_length) to ntfs_attr_iget(), where the name can later be copied according to name_length. A malformed on-disk name_offset/name_length pair should not be exposed to those callers. Move the existing name bounds validation before returning attributes during AT_UNUSED enumeration, and write it as an offset/remaining-size check so the subtraction cannot underflow. Extract the converted values into local variables (name_offset, attr_len, name_size) to make the intent explicit and avoid repeating the endian conversions inside the bounds check. This keeps matching attributes on the same checked path while also covering attribute enumeration. A small userspace ASAN model with attr length=32, name_offset=124 and name_length=8 reproduces a heap-buffer-overflow read in the old enumeration path. With this change the same malformed attribute is rejected before the name pointer is returned to the caller. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: DaeMyung Kang Signed-off-by: Namjae Jeon --- fs/ntfs/attrib.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index d60d0c686718..421c6cdcbb53 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -661,6 +661,9 @@ static int ntfs_attr_find(const __le32 type, const __le16 *name, __le16 *upcase = vol->upcase; u32 upcase_len = vol->upcase_len; unsigned int space; + u16 name_offset; + u32 attr_len; + u32 name_size; /* * Iterate over attributes in mft record starting at @ctx->attr, or the @@ -688,6 +691,20 @@ static int ntfs_attr_find(const __le32 type, const __le16 *name, return -ENOENT; if (unlikely(!a->length)) break; + if (a->name_length) { + name_offset = le16_to_cpu(a->name_offset); + attr_len = le32_to_cpu(a->length); + name_size = a->name_length * sizeof(__le16); + + if (name_offset > attr_len || + attr_len - name_offset < name_size) { + ntfs_error(vol->sb, + "Corrupt attribute name in MFT record %llu\n", + ctx->ntfs_ino->mft_no); + break; + } + } + if (type == AT_UNUSED) return 0; if (a->type != type) @@ -701,14 +718,6 @@ static int ntfs_attr_find(const __le32 type, const __le16 *name, if (a->name_length) return -ENOENT; } else { - if (a->name_length && ((le16_to_cpu(a->name_offset) + - a->name_length * sizeof(__le16)) > - le32_to_cpu(a->length))) { - ntfs_error(vol->sb, "Corrupt attribute name in MFT record %llu\n", - ctx->ntfs_ino->mft_no); - break; - } - if (!ntfs_are_names_equal(name, name_len, (__le16 *)((u8 *)a + le16_to_cpu(a->name_offset)), a->name_length, ic, upcase, upcase_len)) { -- cgit v1.2.3 From 512809bb8a370d071f66fc53abe67368e171dec5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 7 May 2026 20:22:06 +0200 Subject: bpf: Don't run arg-tracking analysis twice on main subprog Because subprog 0, the main subprog, is considered a global function, we end up running the arg-tracking dataflow analysis twice on it. That results in slightly longer verification but mostly in more verbose verifier logs. This patch fixes it by keeping only the iteration over global subprogs. When running over all of Cilium's programs with BPF_LOG_LEVEL2, this reduces verbosity by ~20% on average. Fixes: bf0c571f7feb6 ("bpf: introduce forward arg-tracking dataflow analysis") Signed-off-by: Paul Chaignon Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/e4d7b53d4963ef520541a782f5fc8108a168877c.1778176504.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/liveness.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 332e6e003f27..58197d73b120 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -1914,26 +1914,15 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env) return -ENOMEM; } - instance = call_instance(env, NULL, 0, 0); - if (IS_ERR(instance)) { - err = PTR_ERR(instance); - goto out; - } - err = analyze_subprog(env, NULL, info, instance, callsites); - if (err) - goto out; - /* - * Subprogs and callbacks that don't receive FP-derived arguments - * cannot access ancestor stack frames, so they were skipped during - * the recursive walk above. Async callbacks (timer, workqueue) are - * also not reachable from the main program's call graph. Analyze - * all unvisited subprogs as independent roots at depth 0. + * Analyze every subprog in reverse topological order (callers + * before callees) so that each subprog is analyzed before its + * callees, allowing the recursive walk inside analyze_subprog() + * to naturally reach callees that receive FP-derived args. * - * Use reverse topological order (callers before callees) so that - * each subprog is analyzed before its callees, allowing the - * recursive walk inside analyze_subprog() to naturally - * reach nested callees that also lack FP-derived args. + * Subprogs and callbacks that don't receive FP-derived arguments + * cannot access ancestor stack frames are analyzed independently. + * Async callbacks (timer, workqueue) are handled the same way. */ for (k = env->subprog_cnt - 1; k >= 0; k--) { int sub = env->subprog_topo_order[k]; -- cgit v1.2.3 From bf6d507f7e3c65751d52fd8caf1ea4e003922624 Mon Sep 17 00:00:00 2001 From: Linpu Yu Date: Fri, 8 May 2026 22:43:43 +0800 Subject: xskmap: reject TX-only AF_XDP sockets XSKMAP entries are used as redirect targets for incoming XDP frames. A TX-only AF_XDP socket lacks an Rx ring and cannot handle redirected traffic, but xsk_map_update_elem() currently allows such sockets to be inserted into the map. Redirecting packets to such a socket on the veth generic-XDP path causes a kernel crash in xsk_generic_rcv(). This became possible after xsk_is_setup_for_bpf_map() was removed from the XSKMAP update path, which allowed bound TX-only sockets to be inserted into the map. Reject TX-only sockets during XSKMAP updates to avoid the crash. They remain fully operational for pure Tx purposes outside XSKMAP. Fixes: 968be23ceaca ("xsk: Fix possible segfault at xskmap entry insertion") Reported-by: Juefei Pu Reported-by: Yuan Tan Reported-by: Xin Liu Signed-off-by: Yifan Wu Signed-off-by: Linpu Yu Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/20260508144344.694-1-linpu5433@gmail.com Signed-off-by: Alexei Starovoitov --- net/xdp/xskmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index afa457506274..3bff346308d0 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -184,6 +184,10 @@ static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, } xs = (struct xdp_sock *)sock->sk; + if (!READ_ONCE(xs->rx)) { + sockfd_put(sock); + return -ENOBUFS; + } map_entry = &m->xsk_map[i]; node = xsk_map_node_alloc(m, map_entry); -- cgit v1.2.3 From 3ac1a467e37683f602221e243fa3c59b0de81165 Mon Sep 17 00:00:00 2001 From: Junyoung Jang Date: Mon, 27 Apr 2026 02:25:05 +0900 Subject: bpf: Fix off-by-one boundary validation in arena direct-value access BPF_MAP_TYPE_ARENA accepts BPF_PSEUDO_MAP_VALUE offsets at exactly the end of the arena mapping (off == arena_size). The boundary check in arena_map_direct_value_addr() uses `>` instead of `>=`, which incorrectly allows a one-past-end pointer to be accepted. Change the condition to `>=` to correctly reject offsets that fall outside the valid arena user_vm range. Fixes: 317460317a02 ("bpf: Introduce bpf_arena.") Signed-off-by: Junyoung Jang Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20260426172505.1947915-1-graypanda.inzag@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 802656c6fd3c..49a8f7b1beef 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -511,7 +511,7 @@ static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 { struct bpf_arena *arena = container_of(map, struct bpf_arena, map); - if ((u64)off > arena->user_vm_end - arena->user_vm_start) + if ((u64)off >= arena->user_vm_end - arena->user_vm_start) return -ERANGE; *imm = (unsigned long)arena->user_vm_start; return 0; -- cgit v1.2.3 From 8c16c1c00167134f15ca8e9defdf38b1cac08c36 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Sun, 10 May 2026 11:13:11 +0900 Subject: ntfs: fix empty_buf and ra lifetime bugs in ntfs_empty_logfile() ntfs_empty_logfile() has three related allocator bugs around the @empty_buf and @ra buffers it uses inside the per-cluster loop. When the loop encounters a runlist entry with LCN_RL_NOT_MAPPED, the function kvfrees @empty_buf and goes to map_vcn to remap. @empty_buf is not cleared. If ntfs_map_runlist_nolock() fails on re-entry, control jumps to the err label which kvfrees @empty_buf a second time. In the same branch, @ra is left allocated. When the remap succeeds the function falls through the @empty_buf re-allocation and the @ra re-allocation, overwriting the previous @ra pointer and leaking it. The success path frees @empty_buf with kfree() instead of kvfree(). kvzalloc() may fall back to vmalloc(), in which case kfree() does not correctly release the memory. A KASAN-enabled QEMU harness mirroring this control flow reports "BUG: KASAN: double-free" when the second ntfs_map_runlist_nolock() fails. Clear both @empty_buf and @ra after the in-loop releases so the err path is a no-op when the buffers have already been freed and so the remap-success path does not leak the previous @ra. Switch the success path to kvfree() to match the @empty_buf allocator. Fixes: 5218cd102aec ("ntfs: update misc operations") Signed-off-by: DaeMyung Kang Signed-off-by: Namjae Jeon --- fs/ntfs/logfile.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 3f8d1640f1d5..d3f25d8e29f9 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -710,6 +710,9 @@ map_vcn: if (unlikely(lcn == LCN_RL_NOT_MAPPED)) { vcn = rl->vcn; kvfree(empty_buf); + empty_buf = NULL; + kfree(ra); + ra = NULL; goto map_vcn; } /* If this run is not valid abort with an error. */ @@ -753,7 +756,7 @@ map_vcn: } while (start < end); } while ((++rl)->vcn < end_vcn); up_write(&log_ni->runlist.lock); - kfree(empty_buf); + kvfree(empty_buf); kfree(ra); truncate_inode_pages(log_vi->i_mapping, 0); /* Set the flag so we do not have to do it again on remount. */ -- cgit v1.2.3 From 91ddf6f722084383fb05be731c0107814b055c0c Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sat, 21 Mar 2026 15:42:32 +0100 Subject: phy: marvell: mvebu-a3700-utmi: fix incorrect USB2_PHY_CTRL register access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mvebu_a3700_utmi_phy_power_off() function tries to modify the USB2_PHY_CTRL register by using the IO address of the PHY IP block along with the readl/writel IO accessors. However, the register exist in the USB miscellaneous register space, and as such it must be accessed via regmap like it is done in the mvebu_a3700_utmi_phy_power_on() function. Change the code to use regmap_update_bits() for modífying the register to fix this. Fixes: cc8b7a0ae866 ("phy: add A3700 UTMI PHY driver") Signed-off-by: Gabor Juhos Reviewed-by: Miquel Raynal Link: https://patch.msgid.link/20260321-a3700-utmi-fix-usb2_phy_ctrl-access-v1-1-6005ff4b5058@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-utmi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-utmi.c b/drivers/phy/marvell/phy-mvebu-a3700-utmi.c index 04f4fb4bed70..f882bc57649c 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-utmi.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-utmi.c @@ -168,9 +168,8 @@ static int mvebu_a3700_utmi_phy_power_off(struct phy *phy) u32 reg; /* Disable PHY pull-up and enable USB2 suspend */ - reg = readl(utmi->regs + USB2_PHY_CTRL(usb32)); - reg &= ~(RB_USB2PHY_PU | RB_USB2PHY_SUSPM(usb32)); - writel(reg, utmi->regs + USB2_PHY_CTRL(usb32)); + regmap_update_bits(utmi->usb_misc, USB2_PHY_CTRL(usb32), + RB_USB2PHY_PU | RB_USB2PHY_SUSPM(usb32), 0); /* Power down OTG module */ if (usb32) { -- cgit v1.2.3 From da110228b54f2e2143d97ea7151e0dc22e539d67 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Mon, 4 May 2026 11:33:05 +0800 Subject: phy: tegra: xusb: Fix per-pad high-speed termination calibration The existing code reads a single hs_term_range_adj value from bit field [10:7] of FUSE_SKU_CALIB_0 and applies it to all USB2 pads uniformly. However, on SoCs that support per-pad termination, each pad has its own hs_term_range_adj field: pad 0 in FUSE_SKU_CALIB_0[10:7], and pads 1-3 in FUSE_USB_CALIB_EXT_0 at bit offsets [8:5], [12:9], and [16:13] respectively. Fix the calibration by reading per-pad values from the appropriate fuse registers. For SoCs that do not support per-pad termination, replicate pad 0's value to all pads to maintain existing behavior. Add a has_per_pad_term flag to the SoC data to indicate whether per-pad termination values are available in FUSE_USB_CALIB_EXT_0. Fixes: 1ef535c6ba8e ("phy: tegra: xusb: Add Tegra194 support") Cc: stable@vger.kernel.org Signed-off-by: Wayne Chang Signed-off-by: Wei-Cheng Chen Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://patch.msgid.link/20260504033305.2283145-1-weichengc@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra186.c | 33 ++++++++++++++++++++++++++------- drivers/phy/tegra/xusb.h | 1 + 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index 1ddf11265974..60156aea2707 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -20,8 +20,8 @@ /* FUSE USB_CALIB registers */ #define HS_CURR_LEVEL_PADX_SHIFT(x) ((x) ? (11 + (x - 1) * 6) : 0) #define HS_CURR_LEVEL_PAD_MASK 0x3f -#define HS_TERM_RANGE_ADJ_SHIFT 7 -#define HS_TERM_RANGE_ADJ_MASK 0xf +#define HS_TERM_RANGE_ADJ_PADX_SHIFT(x) ((x) ? (5 + (x - 1) * 4) : 7) +#define HS_TERM_RANGE_ADJ_PAD_MASK 0xf #define HS_SQUELCH_SHIFT 29 #define HS_SQUELCH_MASK 0x7 @@ -253,7 +253,7 @@ struct tegra_xusb_fuse_calibration { u32 *hs_curr_level; u32 hs_squelch; - u32 hs_term_range_adj; + u32 *hs_term_range_adj; u32 rpd_ctrl; }; @@ -930,7 +930,7 @@ static int tegra186_utmi_phy_power_on(struct phy *phy) value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); value &= ~TERM_RANGE_ADJ(~0); - value |= TERM_RANGE_ADJ(priv->calib.hs_term_range_adj); + value |= TERM_RANGE_ADJ(priv->calib.hs_term_range_adj[index]); value &= ~RPD_CTRL(~0); value |= RPD_CTRL(priv->calib.rpd_ctrl); padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); @@ -1464,17 +1464,23 @@ static const char * const tegra186_usb3_functions[] = { static int tegra186_xusb_read_fuse_calibration(struct tegra186_xusb_padctl *padctl) { + const struct tegra_xusb_padctl_soc *soc = padctl->base.soc; struct device *dev = padctl->base.dev; unsigned int i, count; u32 value, *level; + u32 *hs_term_range_adj; int err; - count = padctl->base.soc->ports.usb2.count; + count = soc->ports.usb2.count; level = devm_kcalloc(dev, count, sizeof(u32), GFP_KERNEL); if (!level) return -ENOMEM; + hs_term_range_adj = devm_kcalloc(dev, count, sizeof(u32), GFP_KERNEL); + if (!hs_term_range_adj) + return -ENOMEM; + err = tegra_fuse_readl(TEGRA_FUSE_SKU_CALIB_0, &value); if (err) return dev_err_probe(dev, err, @@ -1490,8 +1496,8 @@ tegra186_xusb_read_fuse_calibration(struct tegra186_xusb_padctl *padctl) padctl->calib.hs_squelch = (value >> HS_SQUELCH_SHIFT) & HS_SQUELCH_MASK; - padctl->calib.hs_term_range_adj = (value >> HS_TERM_RANGE_ADJ_SHIFT) & - HS_TERM_RANGE_ADJ_MASK; + hs_term_range_adj[0] = (value >> HS_TERM_RANGE_ADJ_PADX_SHIFT(0)) & + HS_TERM_RANGE_ADJ_PAD_MASK; err = tegra_fuse_readl(TEGRA_FUSE_USB_CALIB_EXT_0, &value); if (err) { @@ -1503,6 +1509,17 @@ tegra186_xusb_read_fuse_calibration(struct tegra186_xusb_padctl *padctl) padctl->calib.rpd_ctrl = (value >> RPD_CTRL_SHIFT) & RPD_CTRL_MASK; + for (i = 1; i < count; i++) { + if (soc->has_per_pad_term) + hs_term_range_adj[i] = + (value >> HS_TERM_RANGE_ADJ_PADX_SHIFT(i)) & + HS_TERM_RANGE_ADJ_PAD_MASK; + else + hs_term_range_adj[i] = hs_term_range_adj[0]; + } + + padctl->calib.hs_term_range_adj = hs_term_range_adj; + return 0; } @@ -1708,6 +1725,7 @@ const struct tegra_xusb_padctl_soc tegra194_xusb_padctl_soc = { .num_supplies = ARRAY_SIZE(tegra194_xusb_padctl_supply_names), .supports_gen2 = true, .poll_trk_completed = true, + .has_per_pad_term = true, }; EXPORT_SYMBOL_GPL(tegra194_xusb_padctl_soc); @@ -1732,6 +1750,7 @@ const struct tegra_xusb_padctl_soc tegra234_xusb_padctl_soc = { .trk_hw_mode = false, .trk_update_on_idle = true, .supports_lp_cfg_en = true, + .has_per_pad_term = true, }; EXPORT_SYMBOL_GPL(tegra234_xusb_padctl_soc); #endif diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h index cd277d0ed9e1..77609e54de66 100644 --- a/drivers/phy/tegra/xusb.h +++ b/drivers/phy/tegra/xusb.h @@ -435,6 +435,7 @@ struct tegra_xusb_padctl_soc { bool trk_hw_mode; bool trk_update_on_idle; bool supports_lp_cfg_en; + bool has_per_pad_term; }; struct tegra_xusb_padctl { -- cgit v1.2.3 From 5a759b120e31aa3ed914d98b51eb1755235250f2 Mon Sep 17 00:00:00 2001 From: Łukasz Lebiedziński Date: Mon, 6 Apr 2026 15:56:27 +0200 Subject: phy: exynos5-usbdrd: fix USB 2.0 HS PHY tuning values for Exynos7870 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing PHYPARAM0 tuning values for Exynos7870 are incorrect, causing the USB 2.0 PHY to fail high-speed negotiation and fall back to full-speed (12Mbps) operation. Fix TXVREFTUNE (transmitter voltage reference) from 14 to 3, TXRESTUNE (transmitter impedance) from 3 to 2, and SQRXTUNE (squelch threshold) from 6 to 5. Also explicitly set TXPREEMPPULSETUNE to 0, which was previously missing from the tuning table despite being included in the register mask. All values are derived from the vendor kernel for the Samsung Galaxy A6 (SM-A600FN), as no public hardware documentation is available for the Exynos7870 USB DRD PHY. With these corrections, the PHY successfully negotiates high-speed (480Mbps) operation. Fixes: 588d5d20ca8d ("phy: exynos5-usbdrd: add exynos7870 USBDRD support") Cc: stable@vger.kernel.org Tested-by: Kaustabh Chakraborty Reviewed-by: Krzysztof Kozlowski Signed-off-by: Łukasz Lebiedziński Link: https://patch.msgid.link/20260406135627.234835-1-kernel@lvkasz.us Signed-off-by: Vinod Koul --- drivers/phy/samsung/phy-exynos5-usbdrd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/samsung/phy-exynos5-usbdrd.c b/drivers/phy/samsung/phy-exynos5-usbdrd.c index 5a181cb4597e..8711a3b62c8e 100644 --- a/drivers/phy/samsung/phy-exynos5-usbdrd.c +++ b/drivers/phy/samsung/phy-exynos5-usbdrd.c @@ -1958,13 +1958,14 @@ const struct exynos5_usbdrd_phy_tuning exynos7870_tunes_utmi_postinit[] = { PHYPARAM0_TXPREEMPAMPTUNE | PHYPARAM0_TXHSXVTUNE | PHYPARAM0_TXFSLSTUNE | PHYPARAM0_SQRXTUNE | PHYPARAM0_OTGTUNE | PHYPARAM0_COMPDISTUNE), - (FIELD_PREP_CONST(PHYPARAM0_TXVREFTUNE, 14) | + (FIELD_PREP_CONST(PHYPARAM0_TXVREFTUNE, 3) | FIELD_PREP_CONST(PHYPARAM0_TXRISETUNE, 1) | - FIELD_PREP_CONST(PHYPARAM0_TXRESTUNE, 3) | + FIELD_PREP_CONST(PHYPARAM0_TXRESTUNE, 2) | + FIELD_PREP_CONST(PHYPARAM0_TXPREEMPPULSETUNE, 0) | FIELD_PREP_CONST(PHYPARAM0_TXPREEMPAMPTUNE, 0) | FIELD_PREP_CONST(PHYPARAM0_TXHSXVTUNE, 0) | FIELD_PREP_CONST(PHYPARAM0_TXFSLSTUNE, 3) | - FIELD_PREP_CONST(PHYPARAM0_SQRXTUNE, 6) | + FIELD_PREP_CONST(PHYPARAM0_SQRXTUNE, 5) | FIELD_PREP_CONST(PHYPARAM0_OTGTUNE, 2) | FIELD_PREP_CONST(PHYPARAM0_COMPDISTUNE, 3))), PHY_TUNING_ENTRY_LAST -- cgit v1.2.3 From 80305760d7a55b884fb9023c490b75568d1ea0b1 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Wed, 15 Apr 2026 16:18:51 +0530 Subject: phy: qcom-qmp-ufs: Fix kaanapali PHY PLL lock failure after SM8650 G4 fix Commit 81af9e40e2e4 ("phy: qcom: qmp-ufs: Fix SM8650 PCS table for Gear 4") moved QPHY_V6_PCS_UFS_PLL_CNTL register configuration from the shared sm8650_ufsphy_g5_pcs table to the SM8650-specific sm8650_ufsphy_pcs base table to fix Gear 4 operation on SM8650. However, this change inadvertently broke kaanapali and SM8750 SoCs which also rely on the shared sm8650_ufsphy_g5_pcs table for Gear 5 configuration but use their own sm8750_ufsphy_pcs base table. After the change, kaanapali PHYs are left without the required PLL_CNTL = 0x33 setting, causing the PHY PLL to remain at its hardware reset default value, preventing PLL lock and resulting in DME_LINKSTARTUP timeouts. Fix this by adding the missing QPHY_V6_PCS_UFS_PLL_CNTL = 0x33 entry to the sm8750_ufsphy_pcs table, mirroring what the original commit already did for sm8650_ufsphy_pcs. Cc: stable@vger.kernel.org # v6.19.12 Fixes: 81af9e40e2e4 ("phy: qcom: qmp-ufs: Fix SM8650 PCS table for Gear 4") Signed-off-by: Nitin Rawat Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260415104851.2763238-1-nitin.rawat@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 771bc7c2ab50..b87314c8379d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1112,6 +1112,7 @@ static const struct qmp_phy_init_tbl sm8750_ufsphy_pcs[] = { QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PCS_CTRL1, 0x40), + QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x68), QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_POST_EMP_LVL_S4, 0x0e), -- cgit v1.2.3 From c2cd08e8f150738515c8df415ad7ecfa3d38124a Mon Sep 17 00:00:00 2001 From: Yulin Lu Date: Mon, 13 Apr 2026 15:00:33 +0800 Subject: phy: eswin: Fix incorrect error check in probe() devm_ioremap() returns NULL on failure, not an ERR_PTR. Using IS_ERR() to check the return value is incorrect. Fix this by checking for NULL and returning -ENOMEM. Fixes: 67ee9ccaa34a ("phy: eswin: Create eswin directory and add EIC7700 SATA PHY driver") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-phy/adjNbuWoc1B-3Ok1@stanley.mountain/ Signed-off-by: Yulin Lu Link: https://patch.msgid.link/20260413070033.128-1-luyulin@eswincomputing.com Signed-off-by: Vinod Koul --- drivers/phy/eswin/phy-eic7700-sata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/eswin/phy-eic7700-sata.c b/drivers/phy/eswin/phy-eic7700-sata.c index c33653d48daa..76774b9e391b 100644 --- a/drivers/phy/eswin/phy-eic7700-sata.c +++ b/drivers/phy/eswin/phy-eic7700-sata.c @@ -216,8 +216,8 @@ static int eic7700_sata_phy_probe(struct platform_device *pdev) return -ENOENT; regs = devm_ioremap(dev, res->start, resource_size(res)); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (!regs) + return -ENOMEM; sata_phy->regmap = devm_regmap_init_mmio (dev, regs, &eic7700_sata_phy_regmap_config); -- cgit v1.2.3 From a4058c09dd6e28ec33316fd6eb45ddae4cab1f31 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 26 Mar 2026 00:23:58 +0800 Subject: phy: spacemit: Remove incorrect clk_disable() in spacemit_usb2phy_init() When clk_enable() fails, the clock was never enabled. Calling clk_disable() in this error path is incorrect. Remove the spurious clk_disable() call from the error handling in spacemit_usb2phy_init(). Fixes: fe4bc1a08638 ("phy: spacemit: support K1 USB2.0 PHY controller") Signed-off-by: Felix Gu Reviewed-by: Ze Huang Link: https://patch.msgid.link/20260326-k1-usb3-v1-1-0c2b6adf5185@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/spacemit/phy-k1-usb2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/spacemit/phy-k1-usb2.c b/drivers/phy/spacemit/phy-k1-usb2.c index 9215d0b223b2..e8c1e26428a9 100644 --- a/drivers/phy/spacemit/phy-k1-usb2.c +++ b/drivers/phy/spacemit/phy-k1-usb2.c @@ -97,7 +97,6 @@ static int spacemit_usb2phy_init(struct phy *phy) ret = clk_enable(sphy->clk); if (ret) { dev_err(&phy->dev, "failed to enable clock\n"); - clk_disable(sphy->clk); return ret; } -- cgit v1.2.3 From aa54b1d27fe0c2b78e664a34fd0fdf7cd1960d71 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Fri, 8 May 2026 17:53:09 +0900 Subject: rxrpc: Also unshare DATA/RESPONSE packets when paged frags are present The DATA-packet handler in rxrpc_input_call_event() and the RESPONSE handler in rxrpc_verify_response() copy the skb to a linear one before calling into the security ops only when skb_cloned() is true. An skb that is not cloned but still carries externally-owned paged fragments (e.g. SKBFL_SHARED_FRAG set by splice() into a UDP socket via __ip_append_data, or a chained skb_has_frag_list()) falls through to the in-place decryption path, which binds the frag pages directly into the AEAD/skcipher SGL via skb_to_sgvec(). Extend the gate to also unshare when skb_has_frag_list() or skb_has_shared_frag() is true. This catches the splice-loopback vector and other externally-shared frag sources while preserving the zero-copy fast path for skbs whose frags are kernel-private (e.g. NIC page_pool RX, GRO). The OOM/trace handling already in place is reused. Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") Cc: stable@vger.kernel.org Signed-off-by: Hyunwoo Kim Reviewed-by: Jiayuan Chen Acked-by: David Howells Signed-off-by: Linus Torvalds --- net/rxrpc/call_event.c | 4 +++- net/rxrpc/conn_event.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fdd683261226..2b19b252225e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -334,7 +334,9 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.securityIndex != 0 && - skb_cloned(skb)) { + (skb_cloned(skb) || + skb_has_frag_list(skb) || + skb_has_shared_frag(skb))) { /* Unshare the packet so that it can be * modified by in-place decryption. */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index a2130d25aaa9..442414d90ba1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -245,7 +245,8 @@ static int rxrpc_verify_response(struct rxrpc_connection *conn, { int ret; - if (skb_cloned(skb)) { + if (skb_cloned(skb) || skb_has_frag_list(skb) || + skb_has_shared_frag(skb)) { /* Copy the packet if shared so that we can do in-place * decryption. */ -- cgit v1.2.3 From 46e9b0224475abc739612ef72c35b7c90211a0c1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 8 May 2026 13:41:13 -0700 Subject: tools/ynl: add missing uapi header deps in Makefile.deps ethtool.h includes linux/typelimits.h which is a relatively new header not yet shipped in most distro kernel-header packages. Without the explicit entry, the build silently falls through to -idirafter. dev_energymodel.h is a new YNL family whose uapi header is not in system paths at all and was missing a CFLAGS entry entirely. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20260508204114.205896-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile.deps | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 08205f9fc525..cc53b2f21c44 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -15,9 +15,11 @@ UAPI_PATH:=../../../../include/uapi/ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) get_hdr_inc2=-D$(1) -D$(2) -include $(UAPI_PATH)/linux/$(3) +CFLAGS_dev-energymodel:=$(call get_hdr_inc,_LINUX_DEV_ENERGYMODEL_H,dev_energymodel.h) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) -CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ +CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_TYPELIMITS_H,typelimits.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \ $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) -- cgit v1.2.3 From 304d81a2fbf2b454def4debcb38ea173911b72cd Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 7 Apr 2026 18:08:57 -0400 Subject: nfsd: fix file change detection in CB_GETATTR RFC 8881, section 10.4.3 doesn't say anything about caching the file size in the delegation record, nor does it say anything about comparing a cached file size with the size reported by the client in the CB_GETATTR reply for the purpose of determining if the client holds modified data for the file. What section 10.4.3 of RFC 8881 does say is that the server should compare the *current* file size with the size reported by the client holding the delegation in the CB_GETATTR reply, and if they differ to treat it as a modification regardless of the change attribute retrieved via the CB_GETATTR. Doing otherwise would cause the server to believe the client holding the delegation has a modified version of the file, even if the client flushed the modifications to the server prior to the CB_GETATTR. This would have the added side effect of subsequent CB_GETATTRs causing updates to the mtime, ctime, and change attribute even if the client holding the delegation makes no further updates to the file. Modify nfsd4_deleg_getattr_conflict() to obtain the current file size via i_size_read(). Retain the ncf_cur_fsize field, since it's a convenient way to return the file size back to nfsd4_encode_fattr4(), but don't use it for the purpose of detecting file changes. Remove the unnecessary initialization of ncf_cur_fsize in nfs4_open_delegation(). Also, if we recall the delegation (because the client didn't respond to the CB_GETATTR), then skip the logic that checks the nfs4_cb_fattr fields. Fixes: c5967721e106 ("NFSD: handle GETATTR conflict with write delegation") Cc: stable@vger.kernel.org Signed-off-by: Scott Mayhew Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b4d0e82b2690..a9f7bd491b8c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6378,7 +6378,6 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, } open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; - dp->dl_cb_fattr.ncf_cur_fsize = stat.size; dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); dp->dl_atime = stat.atime; dp->dl_ctime = stat.ctime; @@ -9429,11 +9428,15 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, if (status != nfserr_jukebox || !nfsd_wait_for_delegreturn(rqstp, inode)) goto out_status; + status = nfs_ok; + goto out_status; + } + if (!ncf->ncf_file_modified) { + if (ncf->ncf_initial_cinfo != ncf->ncf_cb_change) + ncf->ncf_file_modified = true; + else if (i_size_read(inode) != ncf->ncf_cb_fsize) + ncf->ncf_file_modified = true; } - if (!ncf->ncf_file_modified && - (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || - ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) - ncf->ncf_file_modified = true; if (ncf->ncf_file_modified) { int err; -- cgit v1.2.3 From 2863bac7f49c4acd80a048ce52506a2b9c8db015 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 10 Apr 2026 12:09:19 -0400 Subject: nfsd: update mtime/ctime on CLONE in presense of delegated attributes When delegated attributes are given on open, the file is opened with NOCMTIME and modifying operations do not update mtime/ctime as to not get out-of-sync with the client's delegated view. However, for CLONE operation, the server should update its view of mtime/ctime and reflect that in any GETATTR queries. Fixes: e5e9b24ab8fa ("nfsd: freeze c/mtime updates with outstanding WRITE_ATTRS delegation") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 3 +++ fs/nfsd/nfs4state.c | 44 +++++++++++++++++++++++++++++--------------- fs/nfsd/state.h | 1 + 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2797da8cc950..5de8f37df78a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1413,6 +1413,9 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); + if (!status && (READ_ONCE(dst->nf_file->f_mode) & FMODE_NOCMTIME) != 0) + nfsd_update_cmtime_attr(dst->nf_file, 0); + nfsd_file_put(dst); nfsd_file_put(src); out: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a9f7bd491b8c..3cd8b3b59de4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1221,10 +1221,6 @@ static void put_deleg_file(struct nfs4_file *fp) static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f) { - struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME | ATTR_DELEG }; - struct inode *inode = file_inode(f); - int ret; - /* don't do anything if FMODE_NOCMTIME isn't set */ if ((READ_ONCE(f->f_mode) & FMODE_NOCMTIME) == 0) return; @@ -1242,17 +1238,7 @@ static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct f return; /* Stamp everything to "now" */ - inode_lock(inode); - ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &ia, NULL); - inode_unlock(inode); - if (ret) { - struct inode *inode = file_inode(f); - - pr_notice_ratelimited("nfsd: Unable to update timestamps on inode %02x:%02x:%lu: %d\n", - MAJOR(inode->i_sb->s_dev), - MINOR(inode->i_sb->s_dev), - inode->i_ino, ret); - } + nfsd_update_cmtime_attr(f, ATTR_ATIME); } static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) @@ -9563,3 +9549,31 @@ out_delegees: put_nfs4_file(fp); return ERR_PTR(status); } + +/** + * nfsd_update_cmtime_attr - update file's delegated ctime/mtime, + * and optionally other attributes (ie ATTR_ATIME). + * @f: pointer to an opened file + * @flags: any additional flags that should be updated + * + * Given upon opening a file delegated attributes were issues, update + * @f attributes to current times. + */ +void nfsd_update_cmtime_attr(struct file *f, unsigned int flags) +{ + int ret; + struct inode *inode = file_inode(f); + struct iattr attr = { + .ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_DELEG | flags, + }; + + inode_lock(inode); + ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &attr, NULL); + inode_unlock(inode); + if (ret) + pr_notice_ratelimited("nfsd: Unable to update timestamps on " + "inode %02x:%02x:%lu: %d\n", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), + inode->i_ino, ret); +} diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 953675eba5c3..c5ccea64c281 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -843,6 +843,7 @@ extern void nfsd4_shutdown_copy(struct nfs4_client *clp); void nfsd4_put_client(struct nfs4_client *clp); void nfsd4_async_copy_reaper(struct nfsd_net *nn); bool nfsd4_has_active_async_copies(struct nfs4_client *clp); +void nfsd_update_cmtime_attr(struct file *f, unsigned int flags); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); -- cgit v1.2.3 From 4183cf383b6faec17a0882b84cd2d901dba62b16 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 10 Apr 2026 12:09:20 -0400 Subject: nfsd: update mtime/ctime on COPY in presence of delegated attributes When delegated attributes are given on open, the file is opened with NOCMTIME and modifying operations do not update mtime/ctime as to not get out-of-sync with the client's delegated view. However, for COPY operation, the server should update its view of mtime/ctime and reflect that in any GETATTR queries. Fixes: e5e9b24ab8fa ("nfsd: freeze c/mtime updates with outstanding WRITE_ATTRS delegation") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 11 ++++++++++- fs/nfsd/xdr4.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5de8f37df78a..ab39ec885440 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2121,8 +2121,10 @@ do_callback: set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); trace_nfsd_copy_async_done(copy); - nfsd4_send_cb_offload(copy); atomic_dec(©->cp_nn->pending_async_copies); + if (copy->cp_res.wr_bytes_written > 0 && copy->attr_update) + nfsd_update_cmtime_attr(copy->nf_dst->nf_file, 0); + nfsd4_send_cb_offload(copy); return 0; } @@ -2182,6 +2184,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, sizeof(result->cb_stateid)); dup_copy_fields(copy, async_copy); + if ((READ_ONCE(copy->nf_dst->nf_file->f_mode) & + FMODE_NOCMTIME) != 0) + async_copy->attr_update = true; memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data, cstate->session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); @@ -2200,6 +2205,10 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } else { status = nfsd4_do_copy(copy, copy->nf_src->nf_file, copy->nf_dst->nf_file, true); + if ((READ_ONCE(copy->nf_dst->nf_file->f_mode) & + FMODE_NOCMTIME) != 0 && + copy->cp_res.wr_bytes_written > 0) + nfsd_update_cmtime_attr(copy->nf_dst->nf_file, 0); } out: trace_nfsd_copy_done(copy, status); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 417e9ad9fbb3..9a4124c77e04 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -752,6 +752,7 @@ struct nfsd4_copy { struct nfsd_file *nf_src; struct nfsd_file *nf_dst; + bool attr_update; copy_stateid_t cp_stateid; -- cgit v1.2.3 From c00b472a1322d4f5424cd7b6c7d00270eae673bd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Apr 2026 17:12:16 -0400 Subject: sunrpc: start cache request seqno at 1 to fix netlink GET_REQS sunrpc_cache_requests_snapshot() filters requests with crq->seqno <= min_seqno. The min_seqno for the first netlink dump call is cb->args[0] which is 0. Since next_seqno was initialized to 0, the very first cache request got seqno=0 and was silently skipped by the snapshot (0 <= 0 is true). This caused netlink-based GET_REQS to return 0 pending requests even when a request was queued, preventing mountd from resolving cache entries (particularly expkey/nfsd.fh). The unresolved CACHE_PENDING state blocked all further notifications for the entry, leading to permanent NFS4ERR_DELAY hangs. Start next_seqno at 1 so all requests have seqno >= 1 and pass the snapshot filter when min_seqno is 0. Fixes: facc4e3c8042 ("sunrpc: split cache_detail queue into request and reader lists") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7081c1214e6c..b5474ce534fb 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -403,7 +403,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd) INIT_LIST_HEAD(&cd->readers); spin_lock_init(&cd->queue_lock); init_waitqueue_head(&cd->queue_wait); - cd->next_seqno = 0; + cd->next_seqno = 1; spin_lock(&cache_list_lock); cd->nextcheck = 0; cd->entries = 0; -- cgit v1.2.3 From 4f8ef58c10bfe5f86a643c7c8331b37e69e3dae1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 19 Apr 2026 14:52:59 -0400 Subject: NFSD: Fix infinite loop in layout state revocation find_one_sb_stid() skips stids whose sc_status is non-zero, but the SC_TYPE_LAYOUT case in nfsd4_revoke_states() never sets sc_status before calling nfsd4_close_layout(). The retry loop therefore finds the same layout stid on every iteration, hanging the revoker indefinitely. Fixes: 1e33e1414bec ("nfsd: allow layout state to be admin-revoked.") Reported-by: Dai Ngo Reviewed-by: Jeff Layton Tested-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3cd8b3b59de4..c92bc0c11065 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1851,6 +1851,13 @@ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb) break; case SC_TYPE_LAYOUT: ls = layoutstateid(stid); + spin_lock(&clp->cl_lock); + if (stid->sc_status == 0) { + stid->sc_status |= + SC_STATUS_ADMIN_REVOKED; + atomic_inc(&clp->cl_admin_revoked); + } + spin_unlock(&clp->cl_lock); nfsd4_close_layout(ls); break; } -- cgit v1.2.3 From e42c755582f0960e684298762f0ab927b3778376 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Fri, 8 May 2026 06:21:21 +0000 Subject: net: ena: PHC: Fix potential use-after-free in get_timestamp Move the phc->active check and resp pointer assignment to after acquiring the spinlock. Previously, phc->active was checked without holding the lock, and resp was cached from ena_dev->phc.virt_addr before the lock was acquired. If ena_com_phc_destroy() runs between the lockless active check and the lock acquisition, it sets active=false, releases the lock, frees the DMA memory, and sets virt_addr=NULL. The get_timestamp path would then read a NULL virt_addr and dereference it. With both the active check and the pointer read under the lock, destroy cannot free the memory while get_timestamp is using it. Fixes: e0ea34158ee8 ("net: ena: Add PHC support in the ENA driver") Cc: stable@vger.kernel.org Signed-off-by: Arthur Kiyanovski Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260508062126.7273-1-akiyano@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_com.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index e67b592e5697..8c86789d867a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -1782,20 +1782,23 @@ void ena_com_phc_destroy(struct ena_com_dev *ena_dev) int ena_com_phc_get_timestamp(struct ena_com_dev *ena_dev, u64 *timestamp) { - volatile struct ena_admin_phc_resp *resp = ena_dev->phc.virt_addr; const ktime_t zero_system_time = ktime_set(0, 0); struct ena_com_phc_info *phc = &ena_dev->phc; + volatile struct ena_admin_phc_resp *resp; ktime_t expire_time; ktime_t block_time; unsigned long flags = 0; int ret = 0; + spin_lock_irqsave(&phc->lock, flags); + if (!phc->active) { + spin_unlock_irqrestore(&phc->lock, flags); netdev_err(ena_dev->net_device, "PHC feature is not active in the device\n"); return -EOPNOTSUPP; } - spin_lock_irqsave(&phc->lock, flags); + resp = ena_dev->phc.virt_addr; /* Check if PHC is in blocked state */ if (unlikely(ktime_compare(phc->system_time, zero_system_time))) { -- cgit v1.2.3 From a450063ef86b9967234ca1f896c0d77400c74f11 Mon Sep 17 00:00:00 2001 From: Shitalkumar Gandhi Date: Thu, 7 May 2026 19:50:24 +0530 Subject: net: xgene: fix mdio_np leak in xgene_mdiobus_register() The for_each_child_of_node() loop captures mdio_np via break, holding the refcount. of_mdiobus_register() does not consume the reference, so it leaks on success. Put it after registration. Fixes: e6ad767305eb ("drivers: net: Add APM X-Gene SoC ethernet driver support.") Signed-off-by: Shitalkumar Gandhi Link: https://patch.msgid.link/20260507142024.811543-1-shitalkumar.gandhi@cambiumnetworks.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index b854b6b42d77..2926e1e59941 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -910,7 +910,9 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata, return -ENXIO; } - return of_mdiobus_register(mdio, mdio_np); + ret = of_mdiobus_register(mdio, mdio_np); + of_node_put(mdio_np); + return ret; } /* Mask out all PHYs from auto probing. */ -- cgit v1.2.3 From 6947bea4b79115f50138882512f85fa9c93b2827 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 10 May 2026 10:08:16 -1000 Subject: sched_ext: Cleanups in preparation for the SCX_TASK_INIT_BEGIN/DEAD work Cleanups in preparation for the state-machine work that follows: - Convert three sub-sched call sites that open-code rcu_assign_pointer(p->scx.sched, ...) to scx_set_task_sched(). - Move scx_get_task_state()/scx_set_task_state() above the SCX task iter section so scx_task_iter_next_locked() can use them without a forward declaration. No functional change. Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 76 +++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index df305712a2d4..10c6e0261f11 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -711,6 +711,41 @@ struct bpf_iter_scx_dsq { } __attribute__((aligned(8))); +static u32 scx_get_task_state(const struct task_struct *p) +{ + return p->scx.flags & SCX_TASK_STATE_MASK; +} + +static void scx_set_task_state(struct task_struct *p, u32 state) +{ + u32 prev_state = scx_get_task_state(p); + bool warn = false; + + switch (state) { + case SCX_TASK_NONE: + break; + case SCX_TASK_INIT: + warn = prev_state != SCX_TASK_NONE; + break; + case SCX_TASK_READY: + warn = prev_state == SCX_TASK_NONE; + break; + case SCX_TASK_ENABLED: + warn = prev_state != SCX_TASK_READY; + break; + default: + WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]", + prev_state, state, p->comm, p->pid); + return; + } + + WARN_ONCE(warn, "sched_ext: Invalid task state transition 0x%x -> 0x%x for %s[%d]", + prev_state, state, p->comm, p->pid); + + p->scx.flags &= ~SCX_TASK_STATE_MASK; + p->scx.flags |= state; +} + /* * SCX task iterator. */ @@ -3499,41 +3534,6 @@ static struct cgroup *tg_cgrp(struct task_group *tg) #endif /* CONFIG_EXT_GROUP_SCHED */ -static u32 scx_get_task_state(const struct task_struct *p) -{ - return p->scx.flags & SCX_TASK_STATE_MASK; -} - -static void scx_set_task_state(struct task_struct *p, u32 state) -{ - u32 prev_state = scx_get_task_state(p); - bool warn = false; - - switch (state) { - case SCX_TASK_NONE: - break; - case SCX_TASK_INIT: - warn = prev_state != SCX_TASK_NONE; - break; - case SCX_TASK_READY: - warn = prev_state == SCX_TASK_NONE; - break; - case SCX_TASK_ENABLED: - warn = prev_state != SCX_TASK_READY; - break; - default: - WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]", - prev_state, state, p->comm, p->pid); - return; - } - - WARN_ONCE(warn, "sched_ext: Invalid task state transition 0x%x -> 0x%x for %s[%d]", - prev_state, state, p->comm, p->pid); - - p->scx.flags &= ~SCX_TASK_STATE_MASK; - p->scx.flags |= state; -} - static int __scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fork) { int ret; @@ -5696,7 +5696,7 @@ static void scx_fail_parent(struct scx_sched *sch, scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) { scx_disable_and_exit_task(sch, p); - rcu_assign_pointer(p->scx.sched, parent); + scx_set_task_sched(p, parent); } } scx_task_iter_stop(&sti); @@ -5783,7 +5783,7 @@ static void scx_sub_disable(struct scx_sched *sch) */ scx_disable_and_exit_task(sch, p); scx_set_task_state(p, SCX_TASK_INIT); - rcu_assign_pointer(p->scx.sched, parent); + scx_set_task_sched(p, parent); scx_set_task_state(p, SCX_TASK_READY); scx_enable_task(parent, p); } @@ -7212,7 +7212,7 @@ static void scx_sub_enable_workfn(struct kthread_work *work) * $p is now only initialized for @sch and READY, which * is what we want. Assign it to @sch and enable. */ - rcu_assign_pointer(p->scx.sched, sch); + scx_set_task_sched(p, sch); scx_enable_task(sch, p); p->scx.flags &= ~SCX_TASK_SUB_INIT; -- cgit v1.2.3 From 938dd9ab2bd7df0a7e58ce4249794156be9530b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 10 May 2026 10:08:16 -1000 Subject: sched_ext: Inline scx_init_task() and move RESET_RUNNABLE_AT into scx_set_task_state() Prepare for the SCX_TASK_INIT_BEGIN/DEAD work that follows by collapsing the scx_init_task() helper. Move the SCX_TASK_RESET_RUNNABLE_AT setting into scx_set_task_state() on the INIT transition (it was set unconditionally at every INIT site through the scx_init_task() helper), inline scx_init_task() into scx_fork() and scx_root_enable_workfn(), and drop the helper. As a side effect, scx_sub_disable() migration sequence now also sets RESET_RUNNABLE_AT (it previously wrote INIT directly without going through scx_init_task()). The flag triggers a runnable_at reset on the next set_task_runnable(), which is harmless on a task that has just been moved between scheds. On root-enable, p->scx.flags is written without the task's rq lock. The task isn't visible to scx yet, and a follow-up patch restores the lock-held write. v2: Note p->scx.flags rq-lock relaxation on root-enable path. (Andrea) Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 10c6e0261f11..81841277a54f 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -726,6 +726,7 @@ static void scx_set_task_state(struct task_struct *p, u32 state) break; case SCX_TASK_INIT: warn = prev_state != SCX_TASK_NONE; + p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; break; case SCX_TASK_READY: warn = prev_state == SCX_TASK_NONE; @@ -3585,22 +3586,6 @@ static int __scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fo return 0; } -static int scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fork) -{ - int ret; - - ret = __scx_init_task(sch, p, fork); - if (!ret) { - /* - * While @p's rq is not locked. @p is not visible to the rest of - * SCX yet and it's safe to update the flags and state. - */ - p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; - scx_set_task_state(p, SCX_TASK_INIT); - } - return ret; -} - static void __scx_enable_task(struct scx_sched *sch, struct task_struct *p) { struct rq *rq = task_rq(p); @@ -3763,10 +3748,11 @@ int scx_fork(struct task_struct *p, struct kernel_clone_args *kargs) #else struct scx_sched *sch = scx_root; #endif - ret = scx_init_task(sch, p, true); - if (!ret) - scx_set_task_sched(p, sch); - return ret; + ret = __scx_init_task(sch, p, true); + if (unlikely(ret)) + return ret; + scx_set_task_state(p, SCX_TASK_INIT); + scx_set_task_sched(p, sch); } return 0; @@ -6897,8 +6883,8 @@ static void scx_root_enable_workfn(struct kthread_work *work) scx_task_iter_unlock(&sti); - ret = scx_init_task(sch, p, false); - if (ret) { + ret = __scx_init_task(sch, p, false); + if (unlikely(ret)) { put_task_struct(p); scx_task_iter_stop(&sti); scx_error(sch, "ops.init_task() failed (%d) for %s[%d]", @@ -6906,6 +6892,7 @@ static void scx_root_enable_workfn(struct kthread_work *work) goto err_disable_unlock_all; } + scx_set_task_state(p, SCX_TASK_INIT); scx_set_task_sched(p, sch); scx_set_task_state(p, SCX_TASK_READY); -- cgit v1.2.3 From cceb8fa9cb2cf98e31d81ecf6353b6ba5ac57744 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 10 May 2026 10:08:16 -1000 Subject: sched_ext: Replace SCX_TASK_OFF_TASKS flag with SCX_TASK_DEAD state SCX_TASK_OFF_TASKS marked tasks already through sched_ext_dead() so cgroup task iteration would skip them. This can be expressed better with a task state. Replace the flag with SCX_TASK_DEAD. scx_disable_and_exit_task() resets state to NONE on its way out, so sched_ext_dead() now sets DEAD after the wrapper returns. The validation matrix grows NONE -> DEAD, warns on DEAD -> NONE, and tightens READY's predecessor to INIT or ENABLED so the new DEAD value cannot silently transition to READY. Prepares for the following enable vs dead race fix. Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- include/linux/sched/ext.h | 9 +++++---- kernel/sched/ext.c | 17 +++++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index adb9a4de068a..9f1a326ad03e 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -101,24 +101,25 @@ enum scx_ent_flags { SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ SCX_TASK_SUB_INIT = 1 << 4, /* task being initialized for a sub sched */ SCX_TASK_IMMED = 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */ - SCX_TASK_OFF_TASKS = 1 << 6, /* removed from scx_tasks by sched_ext_dead() */ /* - * Bits 8 and 9 are used to carry task state: + * Bits 8 to 10 are used to carry task state: * * NONE ops.init_task() not called yet * INIT ops.init_task() succeeded, but task can be cancelled * READY fully initialized, but not in sched_ext * ENABLED fully initialized and in sched_ext + * DEAD terminal state set by sched_ext_dead() */ - SCX_TASK_STATE_SHIFT = 8, /* bits 8 and 9 are used to carry task state */ - SCX_TASK_STATE_BITS = 2, + SCX_TASK_STATE_SHIFT = 8, + SCX_TASK_STATE_BITS = 3, SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, SCX_TASK_NONE = 0 << SCX_TASK_STATE_SHIFT, SCX_TASK_INIT = 1 << SCX_TASK_STATE_SHIFT, SCX_TASK_READY = 2 << SCX_TASK_STATE_SHIFT, SCX_TASK_ENABLED = 3 << SCX_TASK_STATE_SHIFT, + SCX_TASK_DEAD = 4 << SCX_TASK_STATE_SHIFT, /* * Bits 12 and 13 are used to carry reenqueue reason. In addition to diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 81841277a54f..2fc4a12711f9 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -723,17 +723,22 @@ static void scx_set_task_state(struct task_struct *p, u32 state) switch (state) { case SCX_TASK_NONE: + warn = prev_state == SCX_TASK_DEAD; break; case SCX_TASK_INIT: warn = prev_state != SCX_TASK_NONE; p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; break; case SCX_TASK_READY: - warn = prev_state == SCX_TASK_NONE; + warn = !(prev_state == SCX_TASK_INIT || + prev_state == SCX_TASK_ENABLED); break; case SCX_TASK_ENABLED: warn = prev_state != SCX_TASK_READY; break; + case SCX_TASK_DEAD: + warn = prev_state != SCX_TASK_NONE; + break; default: WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]", prev_state, state, p->comm, p->pid); @@ -972,11 +977,11 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter) /* * cgroup_task_dead() removes the dead tasks from cset->tasks * after sched_ext_dead() and cgroup iteration may see tasks - * which already finished sched_ext_dead(). %SCX_TASK_OFF_TASKS - * is set by sched_ext_dead() under @p's rq lock. Test it to + * which already finished sched_ext_dead(). %SCX_TASK_DEAD is + * set by sched_ext_dead() under @p's rq lock. Test it to * avoid visiting tasks which are already dead from SCX POV. */ - if (p->scx.flags & SCX_TASK_OFF_TASKS) { + if (scx_get_task_state(p) == SCX_TASK_DEAD) { __scx_task_iter_rq_unlock(iter); continue; } @@ -3847,7 +3852,7 @@ void sched_ext_dead(struct task_struct *p) * @p is off scx_tasks and wholly ours. scx_root_enable()'s READY -> * ENABLED transitions can't race us. Disable ops for @p. * - * %SCX_TASK_OFF_TASKS synchronizes against cgroup task iteration - see + * %SCX_TASK_DEAD synchronizes against cgroup task iteration - see * scx_task_iter_next_locked(). NONE tasks need no marking: cgroup * iteration is only used from sub-sched paths, which require root * enabled. Root enable transitions every live task to at least READY. @@ -3858,7 +3863,7 @@ void sched_ext_dead(struct task_struct *p) rq = task_rq_lock(p, &rf); scx_disable_and_exit_task(scx_task_sched(p), p); - p->scx.flags |= SCX_TASK_OFF_TASKS; + scx_set_task_state(p, SCX_TASK_DEAD); task_rq_unlock(rq, p, &rf); } } -- cgit v1.2.3 From c941d7391f258d5d06e0f7e962a52f99a547a83e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 10 May 2026 10:08:16 -1000 Subject: sched_ext: Close root-enable vs sched_ext_dead() race with SCX_TASK_INIT_BEGIN scx_root_enable_workfn() drops the iter rq lock for ops.init_task() and a TASK_DEAD @p can fall through sched_ext_dead() in that window. The race hits when sched_ext_dead() observes SCX_TASK_INIT (the intermediate state before @p->scx.sched is published) and dereferences NULL via SCX_HAS_OP(NULL, exit_task), or observes SCX_TASK_NONE during the unlocked init window and skips cleanup so exit_task() never runs. Add SCX_TASK_INIT_BEGIN. The enable path writes NONE -> INIT_BEGIN under the iter rq lock, then takes the rq lock again after init to walk INIT_BEGIN -> INIT -> READY. sched_ext_dead() that wins the rq-lock race observes INIT_BEGIN and sets DEAD without calling into ops; the post-init recheck unwinds via scx_sub_init_cancel_task(). scx_fork() runs single-threaded against sched_ext_dead() (the task is not on scx_tasks until scx_post_fork() adds it) so its INIT_BEGIN -> INIT walk needs no rq-lock pairing; it rolls back to NONE on ops.init_task() failure. The validation matrix grows the INIT_BEGIN row and the INIT_BEGIN -> DEAD edge; INIT now requires INIT_BEGIN as the predecessor. scx_sub_disable()'s migration writes INIT_BEGIN as a synthetic predecessor to satisfy the tightened verification. The sub-sched paths still race with sched_ext_dead() during the unlocked init window. This will be fixed by the next patch. Reported-by: zhidao su Link: https://lore.kernel.org/all/20260429133155.3825247-1-suzhidao@xiaomi.com/ Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- include/linux/sched/ext.h | 10 +++++---- kernel/sched/ext.c | 56 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 9f1a326ad03e..2129e18ada58 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -106,6 +106,7 @@ enum scx_ent_flags { * Bits 8 to 10 are used to carry task state: * * NONE ops.init_task() not called yet + * INIT_BEGIN ops.init_task() in flight; see sched_ext_dead() * INIT ops.init_task() succeeded, but task can be cancelled * READY fully initialized, but not in sched_ext * ENABLED fully initialized and in sched_ext @@ -116,10 +117,11 @@ enum scx_ent_flags { SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, SCX_TASK_NONE = 0 << SCX_TASK_STATE_SHIFT, - SCX_TASK_INIT = 1 << SCX_TASK_STATE_SHIFT, - SCX_TASK_READY = 2 << SCX_TASK_STATE_SHIFT, - SCX_TASK_ENABLED = 3 << SCX_TASK_STATE_SHIFT, - SCX_TASK_DEAD = 4 << SCX_TASK_STATE_SHIFT, + SCX_TASK_INIT_BEGIN = 1 << SCX_TASK_STATE_SHIFT, + SCX_TASK_INIT = 2 << SCX_TASK_STATE_SHIFT, + SCX_TASK_READY = 3 << SCX_TASK_STATE_SHIFT, + SCX_TASK_ENABLED = 4 << SCX_TASK_STATE_SHIFT, + SCX_TASK_DEAD = 5 << SCX_TASK_STATE_SHIFT, /* * Bits 12 and 13 are used to carry reenqueue reason. In addition to diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2fc4a12711f9..29fa9ffe7c7b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -725,8 +725,11 @@ static void scx_set_task_state(struct task_struct *p, u32 state) case SCX_TASK_NONE: warn = prev_state == SCX_TASK_DEAD; break; - case SCX_TASK_INIT: + case SCX_TASK_INIT_BEGIN: warn = prev_state != SCX_TASK_NONE; + break; + case SCX_TASK_INIT: + warn = prev_state != SCX_TASK_INIT_BEGIN; p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; break; case SCX_TASK_READY: @@ -737,7 +740,8 @@ static void scx_set_task_state(struct task_struct *p, u32 state) warn = prev_state != SCX_TASK_READY; break; case SCX_TASK_DEAD: - warn = prev_state != SCX_TASK_NONE; + warn = !(prev_state == SCX_TASK_NONE || + prev_state == SCX_TASK_INIT_BEGIN); break; default: WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]", @@ -3753,9 +3757,12 @@ int scx_fork(struct task_struct *p, struct kernel_clone_args *kargs) #else struct scx_sched *sch = scx_root; #endif + scx_set_task_state(p, SCX_TASK_INIT_BEGIN); ret = __scx_init_task(sch, p, true); - if (unlikely(ret)) + if (unlikely(ret)) { + scx_set_task_state(p, SCX_TASK_NONE); return ret; + } scx_set_task_state(p, SCX_TASK_INIT); scx_set_task_sched(p, sch); } @@ -3856,13 +3863,18 @@ void sched_ext_dead(struct task_struct *p) * scx_task_iter_next_locked(). NONE tasks need no marking: cgroup * iteration is only used from sub-sched paths, which require root * enabled. Root enable transitions every live task to at least READY. + * + * %INIT_BEGIN means ops.init_task() is running for @p. Don't call + * into ops; transition to %DEAD so the post-init recheck unwinds + * via scx_sub_init_cancel_task(). */ if (scx_get_task_state(p) != SCX_TASK_NONE) { struct rq_flags rf; struct rq *rq; rq = task_rq_lock(p, &rf); - scx_disable_and_exit_task(scx_task_sched(p), p); + if (scx_get_task_state(p) != SCX_TASK_INIT_BEGIN) + scx_disable_and_exit_task(scx_task_sched(p), p); scx_set_task_state(p, SCX_TASK_DEAD); task_rq_unlock(rq, p, &rf); } @@ -5773,6 +5785,7 @@ static void scx_sub_disable(struct scx_sched *sch) * $p having already been initialized, and then enable. */ scx_disable_and_exit_task(sch, p); + scx_set_task_state(p, SCX_TASK_INIT_BEGIN); scx_set_task_state(p, SCX_TASK_INIT); scx_set_task_sched(p, parent); scx_set_task_state(p, SCX_TASK_READY); @@ -6878,6 +6891,9 @@ static void scx_root_enable_workfn(struct kthread_work *work) scx_task_iter_start(&sti, NULL); while ((p = scx_task_iter_next_locked(&sti))) { + struct rq_flags rf; + struct rq *rq; + /* * @p may already be dead, have lost all its usages counts and * be waiting for RCU grace period before being freed. @p can't @@ -6886,10 +6902,26 @@ static void scx_root_enable_workfn(struct kthread_work *work) if (!tryget_task_struct(p)) continue; + /* + * Set %INIT_BEGIN under the iter's rq lock so that a concurrent + * sched_ext_dead() does not call ops.exit_task() on @p while + * ops.init_task() is running. If sched_ext_dead() runs before + * this store, it has already removed @p from scx_tasks and the + * iter won't visit @p; if it runs after, it observes + * %INIT_BEGIN and transitions to %DEAD without calling ops, + * leaving the post-init recheck below to unwind. + */ + scx_set_task_state(p, SCX_TASK_INIT_BEGIN); scx_task_iter_unlock(&sti); ret = __scx_init_task(sch, p, false); + + rq = task_rq_lock(p, &rf); + if (unlikely(ret)) { + if (scx_get_task_state(p) != SCX_TASK_DEAD) + scx_set_task_state(p, SCX_TASK_NONE); + task_rq_unlock(rq, p, &rf); put_task_struct(p); scx_task_iter_stop(&sti); scx_error(sch, "ops.init_task() failed (%d) for %s[%d]", @@ -6897,10 +6929,20 @@ static void scx_root_enable_workfn(struct kthread_work *work) goto err_disable_unlock_all; } - scx_set_task_state(p, SCX_TASK_INIT); - scx_set_task_sched(p, sch); - scx_set_task_state(p, SCX_TASK_READY); + if (scx_get_task_state(p) == SCX_TASK_DEAD) { + /* + * sched_ext_dead() observed %INIT_BEGIN and set %DEAD. + * ops.exit_task() is owed to the sched __scx_init_task() + * ran against; call it now. + */ + scx_sub_init_cancel_task(sch, p); + } else { + scx_set_task_state(p, SCX_TASK_INIT); + scx_set_task_sched(p, sch); + scx_set_task_state(p, SCX_TASK_READY); + } + task_rq_unlock(rq, p, &rf); put_task_struct(p); } scx_task_iter_stop(&sti); -- cgit v1.2.3 From cd6aab736702f981ac4d128e04a4e33105ea797d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 10 May 2026 10:08:16 -1000 Subject: sched_ext: Close sub-sched init race with post-init DEAD recheck scx_sub_enable_workfn()'s init pass and scx_sub_disable() migration both drop the rq lock to call __scx_init_task() against the other sched. A TASK_DEAD @p can fall through sched_ext_dead() in that window. sched_ext_dead() runs ops.exit_task() on the sched @p was attached to, not on the sched whose init just completed, so the new allocation leaks. Reuse the DEAD signal set by sched_ext_dead(). After __scx_init_task() returns, take task_rq_lock(p) and check for DEAD; on hit, call scx_sub_init_cancel_task() against the sub sched the init ran for and drop @p; on miss, proceed as before. Reported-by: zhidao su Link: https://lore.kernel.org/all/20260429133155.3825247-1-suzhidao@xiaomi.com/ Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 29fa9ffe7c7b..6fbe3160eccd 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5777,6 +5777,21 @@ static void scx_sub_disable(struct scx_sched *sch) } rq = task_rq_lock(p, &rf); + + if (scx_get_task_state(p) == SCX_TASK_DEAD) { + /* + * sched_ext_dead() raced us between __scx_init_task() + * and this rq lock and ran exit_task() on @sch (the + * sched @p was on at that point), not on $parent. + * $parent's just-completed init is owed an exit_task() + * and we issue it here. + */ + scx_sub_init_cancel_task(parent, p); + task_rq_unlock(rq, p, &rf); + put_task_struct(p); + continue; + } + scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) { /* * $p is initialized for $parent and still attached to @@ -5791,8 +5806,8 @@ static void scx_sub_disable(struct scx_sched *sch) scx_set_task_state(p, SCX_TASK_READY); scx_enable_task(parent, p); } - task_rq_unlock(rq, p, &rf); + task_rq_unlock(rq, p, &rf); put_task_struct(p); } scx_task_iter_stop(&sti); @@ -7212,6 +7227,21 @@ static void scx_sub_enable_workfn(struct kthread_work *work) goto abort; rq = task_rq_lock(p, &rf); + + if (scx_get_task_state(p) == SCX_TASK_DEAD) { + /* + * sched_ext_dead() raced us between __scx_init_task() + * and this rq lock and ran exit_task() on $parent (the + * sched @p was on at that point), not on @sch. @sch's + * just-completed init is owed an exit_task() and we + * issue it here. + */ + scx_sub_init_cancel_task(sch, p); + task_rq_unlock(rq, p, &rf); + put_task_struct(p); + continue; + } + p->scx.flags |= SCX_TASK_SUB_INIT; task_rq_unlock(rq, p, &rf); -- cgit v1.2.3 From d3e73a0808ddfb91ac36cd548643cbbeb00ad4db Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 10 May 2026 10:08:16 -1000 Subject: sched_ext: Handle SCX_TASK_NONE in disable/switched_from paths scx_fail_parent() leaves cgroup tasks at (state=NONE, sched=parent, sched_class=ext) until the parent itself is torn down by the scx_error() it raised. When the later root_disable iterates them, two paths trip on NONE. scx_disable_and_exit_task() re-enters the wrapper at NONE: the inner switch returns early but the trailing scx_set_task_sched(p, NULL) clobbers the parent sched left by scx_fail_parent(), and scx_set_task_state(p, NONE) wastes a write on an already-NONE task. switched_from_scx() then calls scx_disable_task(), which WARNs on non-ENABLED state and writes state=READY, producing a NONE -> READY transition the validation matrix rejects. Treat NONE as "nothing to do" in both paths. Add a NONE early-return at the top of scx_disable_and_exit_task() and a parallel NONE check in switched_from_scx() next to task_dead_and_done(). Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6fbe3160eccd..4efe0099f79a 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3703,6 +3703,15 @@ static void scx_sub_init_cancel_task(struct scx_sched *sch, struct task_struct * static void scx_disable_and_exit_task(struct scx_sched *sch, struct task_struct *p) { + /* + * %NONE means @p is already detached at the SCX level (e.g. handed + * back to the parent by scx_fail_parent() with no init to undo). + * Skip to avoid clobbering scx_task_sched() and writing %NONE again + * on a state that's already %NONE. + */ + if (scx_get_task_state(p) == SCX_TASK_NONE) + return; + __scx_disable_and_exit_task(sch, p); /* @@ -3921,6 +3930,16 @@ static void switched_from_scx(struct rq *rq, struct task_struct *p) if (task_dead_and_done(p)) return; + /* + * %NONE means SCX is no longer tracking @p at the task level (e.g. + * scx_fail_parent() handed @p back to the parent at NONE pending the + * parent's own teardown). There is nothing to disable; calling + * scx_disable_task() would WARN on the non-%ENABLED state and trigger a + * NONE -> READY validation failure. + */ + if (scx_get_task_state(p) == SCX_TASK_NONE) + return; + scx_disable_task(scx_task_sched(p), p); } -- cgit v1.2.3 From 5d6919055dec134de3c40167a490f33c74c12581 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 May 2026 14:08:09 -0700 Subject: Linux 7.1-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 93eded47ccd2..b7b80e84e1eb 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3 From b9d16482bebdeded6e61891a5158b51f4ef04f5f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 5 May 2026 19:47:44 +0300 Subject: MAINTAINERS: ASoC/ti: Remove myself and add Sen Wang as maintainer As I cannot spend adequate time to fulfill my role as maintainer for the TI ASoC drivers, it is for the better if I resign and hand over the role to Sen Wang. Signed-off-by: Peter Ujfalusi Acked-by: Nishanth Menon Link: https://patch.msgid.link/20260505164744.16134-1-peter.ujfalusi@gmail.com Signed-off-by: Mark Brown --- MAINTAINERS | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b2040011a386..c9495b60f31d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19448,7 +19448,6 @@ F: include/misc/ocxl* F: include/uapi/misc/ocxl.h OMAP AUDIO SUPPORT -M: Peter Ujfalusi M: Jarkko Nikula L: linux-sound@vger.kernel.org L: linux-omap@vger.kernel.org @@ -26351,7 +26350,7 @@ F: arch/xtensa/ F: drivers/irqchip/irq-xtensa-* TEXAS INSTRUMENTS ASoC DRIVERS -M: Peter Ujfalusi +M: Sen Wang L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml @@ -26853,12 +26852,6 @@ S: Maintained F: Documentation/devicetree/bindings/iio/adc/ti,tsc2046.yaml F: drivers/iio/adc/ti-tsc2046.c -TI TWL4030 SERIES SOC CODEC DRIVER -M: Peter Ujfalusi -L: linux-sound@vger.kernel.org -S: Maintained -F: sound/soc/codecs/twl4030* - TI VPE/CAL DRIVERS M: Yemike Abhilash Chandra L: linux-media@vger.kernel.org -- cgit v1.2.3 From cb196d50a78ddae227f09b3cd0b145f74a70d241 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 8 May 2026 13:24:37 -0500 Subject: ASoC; dt-bindings: mediatek,mt8173-rt5650-rt5514: Fix mediatek,audio-codec constraints A phandle-array is really a matrix and needs constraints on the number of elements for both the inner and outer dimensions. Add the missing inner constraints. Fixes: 472d77bdc511 ("ASoC: dt-bindings: mediatek,mt8173-rt5650-rt5514: convert to DT schema") Signed-off-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260508182438.1757394-1-robh@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml b/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml index ed698c9ff42b..becc7a11f8dc 100644 --- a/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml +++ b/Documentation/devicetree/bindings/sound/mediatek,mt8173-rt5650-rt5514.yaml @@ -18,7 +18,9 @@ properties: description: Phandles of rt5650 and rt5514 codecs items: - description: phandle of rt5650 codec + maxItems: 1 - description: phandle of rt5514 codec + maxItems: 1 mediatek,platform: $ref: /schemas/types.yaml#/definitions/phandle -- cgit v1.2.3 From 422bd00b71ab42163aa3b8f8370276fe4c1581e7 Mon Sep 17 00:00:00 2001 From: Krishnamoorthi M Date: Thu, 7 May 2026 23:30:51 +0530 Subject: spi: amd: Set correct bus number in ACPI probe path On platforms where the HID2 SPI controller (AMDI0063) is enumerated via ACPI instead of PCI, amd_spi_probe() unconditionally sets bus_num to 0, while the PCI probe path assigns bus_num 2 for HID2 controller. Align the ACPI probe path to use the same bus number so that userspace and SPI client drivers see a consistent bus assignment regardless of the enumeration method. Fixes: b644c2776652 ("spi: spi_amd: Add PCI-based driver for AMD HID2 SPI controller") Cc: stable@vger.kernel.org # v6.16+ Signed-off-by: Krishnamoorthi M Link: https://patch.msgid.link/20260507180051.4158674-1-krishnamoorthi.m@amd.com Signed-off-by: Mark Brown --- drivers/spi/spi-amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c index 4d1dce4f4974..71a6e5c475b0 100644 --- a/drivers/spi/spi-amd.c +++ b/drivers/spi/spi-amd.c @@ -868,7 +868,7 @@ static int amd_spi_probe(struct platform_device *pdev) dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr); amd_spi->version = (uintptr_t)device_get_match_data(dev); - host->bus_num = 0; + host->bus_num = (amd_spi->version == AMD_HID2_SPI) ? 2 : 0; return amd_spi_probe_common(dev, host); } -- cgit v1.2.3 From cbdbfba9e8907bea923874d05d6a35ff429a5544 Mon Sep 17 00:00:00 2001 From: Ihor Matushchak Date: Fri, 8 May 2026 10:49:33 +0200 Subject: regulator: Kconfig: fix a typo in help Fixes a typo in Kconfig, 'protectorvia' -> 'protector via'. Signed-off-by: Ihor Matushchak Link: https://patch.msgid.link/20260508084933.4076-1-ihor.matushchak@foobox.net Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index d71dac9436e3..78076ac6eac4 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -757,7 +757,7 @@ config REGULATOR_MAX20086 select REGMAP_I2C help This driver controls a Maxim MAX20086-MAX20089 camera power - protectorvia I2C bus. The regulator has 2 or 4 outputs depending on + protector via I2C bus. The regulator has 2 or 4 outputs depending on the device model. This driver is only capable to turn on/off them. config REGULATOR_MAX20411 -- cgit v1.2.3 From ac2f21ceddeec5553285e9fc4837a1f23d5e6a37 Mon Sep 17 00:00:00 2001 From: Mac Chiang Date: Fri, 8 May 2026 18:42:37 +0800 Subject: ASoC: Intel: soc-acpi-intel-arl-match: Reorder ACPI machine tables When the SOF device driver enumerates the machine tables, it selects the entry with the most numbers of matched links in ascending order. Align the ordering with commit 08095e20995ad6e3648af7416c90163627fe7e44 ("ASoC: Intel: soc-acpi-intel-ptl-match: Sort ACPI link/machine tables"). Signed-off-by: Mac Chiang Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260508104239.1247525-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-arl-match.c | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c index c952f7d2b2c0..cd4023ccadeb 100644 --- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c @@ -483,12 +483,18 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(0), - .links = arl_cs42l43_l0, + .link_mask = BIT(0) | BIT(2), + .links = arl_rt722_l0_rt1320_l2, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l0.tplg", + .sof_tplg_filename = "sof-arl-rt722-l0_rt1320-l2.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, + { + .link_mask = BIT(0) | BIT(3), + .links = arl_rt711_l0_rt1316_l3, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-arl-rt711-l0-rt1316-l3.tplg", + }, { .link_mask = BIT(2) | BIT(3), .links = arl_cs42l43_l2_cs35l56_l3, @@ -497,18 +503,12 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2), - .links = arl_cs42l43_l2, + .link_mask = BIT(0), + .links = arl_cs42l43_l0, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l0.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, - { - .link_mask = BIT(0) | BIT(3), - .links = arl_rt711_l0_rt1316_l3, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-rt711-l0-rt1316-l3.tplg", - }, { .link_mask = 0x1, /* link0 required */ .links = arl_rvp, @@ -522,10 +522,10 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .sof_tplg_filename = "sof-arl-rt711-l0.tplg", }, { - .link_mask = BIT(0) | BIT(2), - .links = arl_rt722_l0_rt1320_l2, + .link_mask = BIT(2), + .links = arl_cs42l43_l2, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-rt722-l0_rt1320-l2.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, {}, -- cgit v1.2.3 From 242200c297030d9bab62c0ea65f2094981bcf013 Mon Sep 17 00:00:00 2001 From: Gary C Wang Date: Fri, 8 May 2026 18:42:38 +0800 Subject: ASoC: soc-acpi-intel-arl-match: add rt712_l0_rt1320_l3 support Add support for using the rt712 multi-function codec on link 0 and the rt1320 amplifier on link 3 on ARL platforms. Signed-off-by: Gary C Wang Co-developed-by: Mac Chiang Signed-off-by: Mac Chiang Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260508104239.1247525-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-arl-match.c | 41 +++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c index cd4023ccadeb..52c5b5719f51 100644 --- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c @@ -8,6 +8,7 @@ #include #include #include +#include "soc-acpi-intel-sdca-quirks.h" #include "sof-function-topology-lib.h" static const struct snd_soc_acpi_endpoint single_endpoint = { @@ -237,6 +238,15 @@ static const struct snd_soc_acpi_adr_device rt722_0_agg_adr[] = { } }; +static const struct snd_soc_acpi_adr_device rt712_0_agg_adr[] = { + { + .adr = 0x000030025D071201ull, + .num_endpoints = ARRAY_SIZE(jack_amp_g1_dmic_endpoints), + .endpoints = jack_amp_g1_dmic_endpoints, + .name_prefix = "rt712" + } +}; + static const struct snd_soc_acpi_adr_device rt1316_3_single_adr[] = { { .adr = 0x000330025D131601ull, @@ -255,6 +265,15 @@ static const struct snd_soc_acpi_adr_device rt1320_2_single_adr[] = { } }; +static const struct snd_soc_acpi_adr_device rt1320_3_group1_adr[] = { + { + .adr = 0x000330025D132001ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "rt1320-1" + } +}; + static const struct snd_soc_acpi_link_adr arl_cs42l43_l0[] = { { .mask = BIT(0), @@ -404,6 +423,20 @@ static const struct snd_soc_acpi_link_adr arl_rt722_l0_rt1320_l2[] = { {} }; +static const struct snd_soc_acpi_link_adr arl_rt712_l0_rt1320_l3[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(rt712_0_agg_adr), + .adr_d = rt712_0_agg_adr, + }, + { + .mask = BIT(3), + .num_adr = ARRAY_SIZE(rt1320_3_group1_adr), + .adr_d = rt1320_3_group1_adr, + }, + {} +}; + static const struct snd_soc_acpi_codecs arl_essx_83x6 = { .num_codecs = 3, .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, @@ -495,6 +528,14 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .drv_name = "sof_sdw", .sof_tplg_filename = "sof-arl-rt711-l0-rt1316-l3.tplg", }, + { + .link_mask = BIT(0) | BIT(3), + .links = arl_rt712_l0_rt1320_l3, + .drv_name = "sof_sdw", + .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb, + .sof_tplg_filename = "sof-arl-rt712-l0-rt1320-l3.tplg", + .get_function_tplg_files = sof_sdw_get_tplg_files, + }, { .link_mask = BIT(2) | BIT(3), .links = arl_cs42l43_l2_cs35l56_l3, -- cgit v1.2.3 From d714913b61d55b936e9060e51e7b78216d34f6b1 Mon Sep 17 00:00:00 2001 From: Jang Pyohwan Date: Sat, 9 May 2026 17:53:10 +0900 Subject: ASoC: Intel: soc-acpi: add LG Gram 16Z90U RT713 + single RT1320 quirk Add a SoundWire machine table entry for the LG Gram Pro 2026 (16Z90U-KU7BK), which has an unusual configuration: sdw:0:1:025d:1320:01 single stereo RT1320 SmartAmp on link 1 sdw:0:3:025d:0713:01 RT713 jack/headset codec on link 3 Existing rt713-rt1320 boards have two RT1320 amps on different links ("link_mask = BIT(1) | BIT(2) | BIT(3)"). The LG Gram uses a single stereo RT1320 chip, so the new entry uses "link_mask = BIT(1) | BIT(3)" with the existing rt1320_1_group2_adr structure, leaving the two-channel routing to the topology. The RT713 on this board does not expose a SMART_MIC function in ACPI, so the .machine_check callback used by the existing entries (snd_soc_acpi_intel_sdca_is_device_rt712_vb) would reject this board. Drop machine_check for the new entry; speaker output and the headset jack do not depend on the SMART_MIC presence check. The corresponding topology source has been submitted to the SOF project at https://github.com/thesofproject/sof/pull/10760 . The generated sof-ptl-rt713-l3-rt1320-l1-2ch.tplg and nhlt-sof-ptl-rt713-l3-rt1320-l1.bin will follow in linux-firmware once that lands. Tested on Ubuntu 26.04 with kernel 7.0.0-15: speaker (RT1320 stereo), headphone jack with auto-routing, headset mic, and the internal NHLT DMIC array all work via the UCM HiFi profile. Signed-off-by: Jang Pyohwan Link: https://patch.msgid.link/20260509175317.DnhjxHczQay7kkp5z6t4lg@vhgksl.daum.net Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 3b7818355ff6..ad3af8834e43 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -493,6 +493,20 @@ static const struct snd_soc_acpi_link_adr ptl_sdw_rt713_vb_l3_rt1320_l12[] = { {} }; +static const struct snd_soc_acpi_link_adr ptl_sdw_rt713_vb_l3_rt1320_l1[] = { + { + .mask = BIT(3), + .num_adr = ARRAY_SIZE(rt713_vb_3_adr), + .adr_d = rt713_vb_3_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(rt1320_1_group2_adr), + .adr_d = rt1320_1_group2_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr ptl_sdw_rt712_vb_l2_rt1320_l1[] = { { .mask = BIT(2), @@ -578,6 +592,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { .sof_tplg_filename = "sof-ptl-rt713-l3-rt1320-l12.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, + { + .link_mask = BIT(1) | BIT(3), + .links = ptl_sdw_rt713_vb_l3_rt1320_l1, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-ptl-rt713-l3-rt1320-l1.tplg", + .get_function_tplg_files = sof_sdw_get_tplg_files, + }, { .link_mask = BIT(1) | BIT(2) | BIT(3), .links = ptl_cs42l43_l2_cs35l56x6_l13, -- cgit v1.2.3 From 9c37daee7c17fa17e8d41089ee1f658b06cb672a Mon Sep 17 00:00:00 2001 From: Mac Chiang Date: Fri, 8 May 2026 17:32:23 +0800 Subject: ASoC: sdw_utils: Add quirk to ignore RT712 CODEC_MIC Some devices do not use CODEC_MIC but use the host PCH_DMIC instead. Add a quirk to skip the CODEC_MIC DAI when it is not present in disco table, ensuring the correct capture device is used. If CODEC_MIC is present, it continues to be used as default. Fixes: 9489db97f6f0 ("ASoC: sdw_utils: add SmartMic DAI for RT712 VB") Signed-off-by: Mac Chiang Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260508093224.1246282-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 849ae876d7a4..863ded2ceda5 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -194,6 +194,8 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dai_type = SOC_SDW_DAI_TYPE_MIC, .dailink = {SOC_SDW_UNUSED_DAI_ID, SOC_SDW_DMIC_DAI_ID}, .rtd_init = asoc_sdw_rt_dmic_rtd_init, + .quirk = SOC_SDW_CODEC_MIC, + .quirk_exclude = true, }, }, .dai_num = 3, -- cgit v1.2.3 From fa749a77bdc50f0d695aaf81f1bd55967d77d10f Mon Sep 17 00:00:00 2001 From: Mac Chiang Date: Fri, 8 May 2026 17:32:24 +0800 Subject: ASoC: sdw_utils: Add quirk to ignore RT721 CODEC_MIC Add a quirk to skip the CODEC_MIC DAI when it is not present. This ensures PCH_DMIC is used as the fallback; otherwise, CODEC_MIC remains the default. Fixes: 846a8d3cf3ba ("ASoC: Intel: soc-acpi-intel-ptl-match: Add rt721 support") Signed-off-by: Mac Chiang Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260508093224.1246282-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 863ded2ceda5..be45a2e62d3e 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -503,6 +503,8 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dai_type = SOC_SDW_DAI_TYPE_MIC, .dailink = {SOC_SDW_UNUSED_DAI_ID, SOC_SDW_DMIC_DAI_ID}, .rtd_init = asoc_sdw_rt_dmic_rtd_init, + .quirk = SOC_SDW_CODEC_MIC, + .quirk_exclude = true, }, }, .dai_num = 3, -- cgit v1.2.3 From 796ad622040f7f955ccc3973085e953415920496 Mon Sep 17 00:00:00 2001 From: Guopeng Zhang Date: Mon, 11 May 2026 09:31:50 +0800 Subject: cgroup/dmem: Return -ENOMEM on failed pool preallocation get_cg_pool_unlocked() handles allocation failures under dmemcg_lock by dropping the lock, preallocating a pool with GFP_KERNEL, and retrying the locked lookup and creation path. If the fallback allocation fails too, pool remains NULL. Since the loop condition is while (!pool), the function can keep retrying instead of propagating the allocation failure to the caller. Set pool to ERR_PTR(-ENOMEM) when the fallback allocation fails so the loop exits through the existing common return path. The callers already handle ERR_PTR() from get_cg_pool_unlocked(), so this restores the expected error path. Fixes: b168ed458dde ("kernel/cgroup: Add "dmem" memory accounting cgroup") Cc: stable@vger.kernel.org # v6.14+ Signed-off-by: Guopeng Zhang Signed-off-by: Tejun Heo --- kernel/cgroup/dmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 1ab1fb47f271..4753a67d0f0f 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -602,6 +602,7 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region) pool = NULL; continue; } + pool = ERR_PTR(-ENOMEM); } } -- cgit v1.2.3 From e32e6f02168f2ad7991eb5d160d312d2001520c8 Mon Sep 17 00:00:00 2001 From: Hongfu Li Date: Sat, 9 May 2026 16:03:28 +0800 Subject: selftests/cgroup: Fix cg_read_strcmp() empty string comparison cg_read_strcmp() allocated a buffer sized to strlen(expected) + 1, then passed it to read_text() which calls read(fd, buf, size-1). When comparing against an empty string (""), strlen("") = 0 gives a 1-byte buffer, and read() is asked to read 0 bytes. The file content is never actually read, so strcmp("", buf) always returns 0 regardless of the real content. This caused cg_test_proc_killed() to always report the cgroup as empty immediately, making OOM tests pass without verifying that processes were killed. Signed-off-by: Hongfu Li Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/lib/cgroup_util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 6a7295347e90..42f54936f4bb 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -106,8 +106,9 @@ int cg_read_strcmp(const char *cgroup, const char *control, /* Handle the case of comparing against empty string */ if (!expected) return -1; - else - size = strlen(expected) + 1; + + /* needs size > 1, otherwise cg_read() reads 0 bytes */ + size = (expected[0] == '\0') ? 2 : strlen(expected) + 1; buf = malloc(size); if (!buf) -- cgit v1.2.3 From 2a3d7256faf06d1a15bb5b07e851ac4e1680c26d Mon Sep 17 00:00:00 2001 From: Hongfu Li Date: Mon, 11 May 2026 09:39:57 +0800 Subject: selftests/cgroup: Fix string comparison in write_test Use string comparison (!=) instead of numeric comparison (-ne) for cpuset values like "0-1". For example: $ [[ "0-1" != "2-3" ]] && echo "true" || echo "false" true $ [[ "0-1" -ne "2-3" ]] && echo "true" || echo "false" false Signed-off-by: Hongfu Li Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_v1_base.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh index 42a6628fb8bc..1c0444729e70 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh @@ -18,7 +18,7 @@ write_test() { echo "testing $interface $value" echo $value > $dir/$interface new=$(cat $dir/$interface) - [[ $value -ne $(cat $dir/$interface) ]] && { + [[ "$value" != "$new" ]] && { echo "$interface write $value failed: new:$new" exit 1 } -- cgit v1.2.3 From 3788e32516530dee66cf9186f846480a16799b05 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sun, 10 May 2026 19:52:11 +0200 Subject: selftests/sched_ext: Fix build error in dequeue selftest Building the dequeue selftest with newer compilers (e.g., gcc 16) triggers the following error: dequeue.c:28:22: error: variable 'sum' set but not used The 'volatile' qualifier prevents the writes from being optimized away, but does not silence the unused variable 'sum' is indeed only written and never read. Consume 'sum' via an empty asm() with a register input constraint. This forces the compiler to keep the accumulated value (preserving the CPU stress loop) and avoiding the build error. Fixes: 658ad2259b3e ("selftests/sched_ext: Add test to validate ops.dequeue() semantics") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/dequeue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/sched_ext/dequeue.c b/tools/testing/selftests/sched_ext/dequeue.c index 4e93262703ca..383d06e972a4 100644 --- a/tools/testing/selftests/sched_ext/dequeue.c +++ b/tools/testing/selftests/sched_ext/dequeue.c @@ -33,6 +33,7 @@ static void worker_fn(int id) /* Do some work to trigger scheduling events */ for (j = 0; j < 10000; j++) sum += j; + asm volatile("" : : "r"(sum)); /* Sleep to trigger dequeue */ usleep(1000 + (id * 100)); -- cgit v1.2.3 From bbf30b383cf6e87f2fe57c292fbd640b1d88b4c3 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 11 May 2026 08:18:12 +0200 Subject: sched_ext: Fix ops->priv clobber on concurrent attach/detach Under heavy concurrent attach/detach operations, scx_claim_exit() can trigger a NULL pointer dereference. This can be reproduced running the reload_loop kselftests inside a virtme-ng session: $ vng -v -- ./tools/testing/selftests/sched_ext/runner -t reload_loop ... BUG: kernel NULL pointer dereference, address: 0000000000000400 RIP: 0010:scx_claim_exit+0x3b/0x120 Call Trace: bpf_scx_unreg+0x45/0xb0 bpf_struct_ops_map_link_dealloc+0x39/0x50 bpf_link_release+0x18/0x20 __fput+0x10b/0x2e0 __x64_sys_close+0x47/0xa0 The underlying race (diagnosed by Tejun Heo) is a stomp of @ops->priv, not a missing NULL check: T2 unreg(K) T1 reg(K) ----------- --------- sch = ops->priv = sch_b800 scx_disable; flush_disable_work [scx_root_disable: scx_root=NULL, mutex_unlock, state=DISABLED] mutex_lock; state ok scx_alloc_and_add_sched: ops->priv = sch_a800 scx_root = sch_a800; init=0 state=ENABLED; mutex_unlock [flush returns] RCU_INIT_POINTER(ops->priv, NULL) <-- clobbers sch_a800 kobject_put(sch_b800) T1 acquires scx_enable_mutex inside scx_root_disable()'s mutex_unlock window and starts a fresh attach on the same kdata, assigning sch_a800 to @ops->priv. T2 then continues out of scx_disable()/flush_disable_work and clobbers @ops->priv to NULL, leaking sch_a800; the bpf_link is gone but state stays SCX_ENABLED, so all future attaches fail with -EBUSY permanently. The next bpf_scx_unreg() on that kdata then reads NULL @ops->priv and dereferences it in scx_claim_exit(). Make @ops->priv the lifecycle binding: in scx_root_enable_workfn() and scx_sub_enable_workfn(), after the existing state check and still under scx_enable_mutex, refuse with -EBUSY if @ops->priv is non-NULL. This rejects an attempt to reuse a kdata that is still bound to a previous scheduler instance, closing the race without changing the unreg side. Fixes: 105dcd005be2 ("sched_ext: Introduce scx_prog_sched()") Suggested-by: Tejun Heo Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 4efe0099f79a..8e06694094d7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6803,6 +6803,19 @@ static void scx_root_enable_workfn(struct kthread_work *work) goto err_unlock; } + /* + * @ops->priv binds @ops to its scx_sched instance. It is set here by + * scx_alloc_and_add_sched() and cleared at the tail of bpf_scx_unreg(), + * which runs after scx_root_disable() has dropped scx_enable_mutex. If + * it's still non-NULL here, a previous attachment on @ops has not + * finished tearing down; proceeding would let the in-flight unreg's + * RCU_INIT_POINTER(NULL) clobber the @ops->priv we are about to assign. + */ + if (rcu_access_pointer(ops->priv)) { + ret = -EBUSY; + goto err_unlock; + } + ret = alloc_kick_syncs(); if (ret) goto err_unlock; @@ -7120,6 +7133,12 @@ static void scx_sub_enable_workfn(struct kthread_work *work) goto out_unlock; } + /* See scx_root_enable_workfn() for the @ops->priv check. */ + if (rcu_access_pointer(ops->priv)) { + ret = -EBUSY; + goto out_unlock; + } + cgrp = cgroup_get_from_id(ops->sub_cgroup_id); if (IS_ERR(cgrp)) { ret = PTR_ERR(cgrp); -- cgit v1.2.3 From 8dfd3d8d74435344ee8dc9237596959c8b2a6cbe Mon Sep 17 00:00:00 2001 From: Eder Zulian Date: Fri, 10 Apr 2026 14:55:50 +0200 Subject: iommu/amd: Remove latent out-of-bounds access in IOMMU debugfs In iommu_mmio_write() and iommu_capability_write(), the variables dbg_mmio_offset and dbg_cap_offset are declared as int. However, they are populated using kstrtou32_from_user(). If a user provides a sufficiently large value, it can become a negative integer. Prior to this patch, the AMD IOMMU debugfs implementation was already protected by different mechanisms. 1. #define OFS_IN_SZ 8 ensures the user string <= 8 bytes, so e.g. 0xffffffff isn't a valid input. if (cnt > OFS_IN_SZ) return -EINVAL; 2. Implicit type promotion in iommu_mmio_write(), dbg_mmio_offset is int and iommu->mmio_phys_end is u64 if (dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64)) return -EINVAL; 3. The show handlers would currently catch the negative number and refuse to perform the read. Replace kstrtou32_from_user() with kstrtos32_from_user() to parse the input, and check for negative values to explicitly prevent out-of-bounds memory accesses directly in iommu_mmio_write() and iommu_capability_write(). Signed-off-by: Eder Zulian Fixes: 7a4ee419e8c1 ("iommu/amd: Add debugfs support to dump IOMMU MMIO registers") Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/debugfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index 4e66473d7cea..4c53b6361314 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -31,11 +31,12 @@ static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf, if (cnt > OFS_IN_SZ) return -EINVAL; - ret = kstrtou32_from_user(ubuf, cnt, 0, &dbg_mmio_offset); + ret = kstrtos32_from_user(ubuf, cnt, 0, &dbg_mmio_offset); if (ret) return ret; - if (dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64)) + if (dbg_mmio_offset < 0 || dbg_mmio_offset > + iommu->mmio_phys_end - sizeof(u64)) return -EINVAL; iommu->dbg_mmio_offset = dbg_mmio_offset; @@ -71,12 +72,12 @@ static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf if (cnt > OFS_IN_SZ) return -EINVAL; - ret = kstrtou32_from_user(ubuf, cnt, 0, &dbg_cap_offset); + ret = kstrtos32_from_user(ubuf, cnt, 0, &dbg_cap_offset); if (ret) return ret; /* Capability register at offset 0x14 is the last IOMMU capability register. */ - if (dbg_cap_offset > 0x14) + if (dbg_cap_offset < 0 || dbg_cap_offset > 0x14) return -EINVAL; iommu->dbg_cap_offset = dbg_cap_offset; -- cgit v1.2.3 From 07d0f496fe7ec5abe3bee7e38be709521567bb33 Mon Sep 17 00:00:00 2001 From: "Jose Fernandez (Anthropic)" Date: Tue, 21 Apr 2026 19:26:13 +0000 Subject: iommu/amd: Bounds-check devid in __rlookup_amd_iommu() iommu_device_register() walks every device on the PCI bus via bus_for_each_dev() and calls amd_iommu_probe_device() for each. The inlined check_device() path computes the device's sbdf, calls rlookup_amd_iommu() to find the owning IOMMU, and only afterwards verifies devid <= pci_seg->last_bdf. __rlookup_amd_iommu() indexes rlookup_table[devid] with no bounds check of its own, so for a PCI device whose BDF is not described by the IVRS, the lookup reads past the end of the allocation before the caller's bounds check can run. This was harmless before commit e874c666b15b ("iommu/amd: Change rlookup, irq_lookup, and alias to use kvalloc()"): the table was a zeroed page-order allocation, so the over-read returned NULL and the caller's NULL check skipped the device. After that commit the table is a tight kvcalloc() and the over-read returns adjacent slab contents, which check_device() then dereferences as a struct amd_iommu *, causing a boot-time GPF. Seen on Google Compute Engine ct6e VMs, where the virtualized IVRS describes only the four TPU endpoints 00:04.0-07.0; the gVNIC at 00:08.0 (devid 0x40) indexes 56 bytes past the 456-byte allocation, into the adjacent kmalloc-512 slab object: pci 0000:00:04.0: Adding to iommu group 0 pci 0000:00:05.0: Adding to iommu group 1 pci 0000:00:06.0: Adding to iommu group 2 pci 0000:00:07.0: Adding to iommu group 3 Oops: general protection fault, probably for non-canonical address 0x3a64695f78746382: 0000 [#1] SMP NOPTI CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.18.22 #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/06/2025 RIP: 0010:amd_iommu_probe_device+0x54/0x3a0 Call Trace: __iommu_probe_device+0x107/0x520 probe_iommu_group+0x29/0x50 bus_for_each_dev+0x7e/0xe0 iommu_device_register+0xc9/0x240 iommu_go_to_state+0x9c0/0x1c60 amd_iommu_init+0x14/0x40 pci_iommu_init+0x16/0x60 do_one_initcall+0x47/0x2f0 Guard the array access in __rlookup_amd_iommu(). With the fix applied on 6.18.22, the gVNIC at 00:08.0 is skipped cleanly and the VM boots. Fixes: e874c666b15b ("iommu/amd: Change rlookup, irq_lookup, and alias to use kvalloc()") Cc: stable@vger.kernel.org Reported-by: Ziyuan Chen Tested-by: Ziyuan Chen Reviewed-by: Josef Bacik Assisted-by: Claude:unspecified Signed-off-by: Jose Fernandez (Anthropic) Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f78e23f03938..57dc8fabc7d9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -351,8 +351,12 @@ static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid) struct amd_iommu_pci_seg *pci_seg; for_each_pci_segment(pci_seg) { - if (pci_seg->id == seg) - return pci_seg->rlookup_table[devid]; + if (pci_seg->id != seg) + continue; + /* IVRS may not describe every device on the bus */ + if (devid > pci_seg->last_bdf) + return NULL; + return pci_seg->rlookup_table[devid]; } return NULL; } -- cgit v1.2.3 From d769711fcddd005f1e654b3bde547140917fe696 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:20 -0700 Subject: iommu: Fix NULL group->domain dereference in pci_dev_reset_iommu_done() Local sashiko review pointed it out that group->domain could be NULL when a default domain fails to allocate during the first probe, which can crash at domain->ops->attach_dev dereference in __iommu_attach_device() invoked by pci_dev_reset_iommu_done(). pci_dev_reset_iommu_prepare() is fine as an old_domain pointer can be NULL. Skip the re-attach in pci_dev_reset_iommu_done() to fix the bug. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 61c12ba78206..b8847cc43e76 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -4073,8 +4073,13 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) if (WARN_ON(!group->blocking_domain)) return; - /* Re-attach RID domain back to group->domain */ - if (group->domain != group->blocking_domain) { + /* + * Re-attach RID domain back to group->domain + * + * Leave the device parked in the blocking_domain if group->domain isn't + * initialized yet + */ + if (group->domain && group->domain != group->blocking_domain) { WARN_ON(__iommu_attach_device(group->domain, &pdev->dev, group->blocking_domain)); } -- cgit v1.2.3 From 834ab85aa96656f87bb215a8285d34af870f4b66 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:21 -0700 Subject: iommu: Fix kdocs of pci_dev_reset_iommu_done() Remove the duplicated word. No functional change. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Reviewed-by: Shuai Xue Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b8847cc43e76..66659e5d1ec2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -4045,9 +4045,9 @@ EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); * @pdev: PCI device that has finished a reset routine * * After a PCIe device finishes a reset routine, it wants to restore its IOMMU - * IOMMU activity, including new translation as well as cache invalidation, by - * re-attaching all RID/PASID of the device's back to the domains retained in - * the core-level structure. + * activity, including new translation and cache invalidation, by re-attaching + * all RID/PASID of the device back to the domains retained in the core-level + * structure. * * Caller must pair it with a successful pci_dev_reset_iommu_prepare(). * -- cgit v1.2.3 From b296ca1fb43aa435edd86131f5230f70c03b2829 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:22 -0700 Subject: iommu: Replace per-group resetting_domain with per-gdev blocked flag The core tracks device resetting states with a per-group resetting_domain, while a reset is actually per group-device. Such a mismatch might lead to confusion and even difficulty to untangle per-gdev handling requirement. Shuai found that cxl_reset_bus_function() calls pci_reset_bus_function() internally while both are calling pci_dev_reset_iommu_prepare/done(). And the solution requires the core to track at the group_device level as well. Introduce a 'blocked' flag to struct group_device, to allow a multi-device group to isolate concurrent device resets independently. As the reset routine is per gdev, it cannot clear group->resetting_domain without iterating over the device list to ensure no other device is being reset. Simplify it by replacing the resetting_domain with a 'recovery_cnt' in the struct iommu_group. No functional change. But this is essential to apply following bug fixes. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Reported-by: Shuai Xue Closes: https://lore.kernel.org/all/absKsk7qQOwzhpzv@Asurada-Nvidia/ Reviewed-by: Shuai Xue Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 102 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 66659e5d1ec2..2515786d4156 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -62,14 +62,14 @@ struct iommu_group { int id; struct iommu_domain *default_domain; struct iommu_domain *blocking_domain; - /* - * During a group device reset, @resetting_domain points to the physical - * domain, while @domain points to the attached domain before the reset. - */ - struct iommu_domain *resetting_domain; struct iommu_domain *domain; struct list_head entry; unsigned int owner_cnt; + /* + * Number of devices in the group undergoing or awaiting recovery. + * If non-zero, concurrent domain attachments are rejected. + */ + unsigned int recovery_cnt; void *owner; }; @@ -77,12 +77,32 @@ struct group_device { struct list_head list; struct device *dev; char *name; + /* + * Device is blocked for a pending recovery while its group->domain is + * retained. This can happen when: + * - Device is undergoing a reset + */ + bool blocked; }; /* Iterate over each struct group_device in a struct iommu_group */ #define for_each_group_device(group, pos) \ list_for_each_entry(pos, &(group)->devices, list) +static struct group_device *__dev_to_gdev(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + struct group_device *gdev; + + lockdep_assert_held(&group->mutex); + + for_each_group_device(group, gdev) { + if (gdev->dev == dev) + return gdev; + } + return NULL; +} + struct iommu_group_attribute { struct attribute attr; ssize_t (*show)(struct iommu_group *group, char *buf); @@ -2196,6 +2216,8 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) { + struct group_device *gdev; + /* * This is called on the dma mapping fast path so avoid locking. This is * racy, but we have an expectation that the driver will setup its DMAs @@ -2206,14 +2228,18 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) guard(mutex)(&dev->iommu_group->mutex); + gdev = __dev_to_gdev(dev); + if (WARN_ON(!gdev)) + return -ENODEV; + /* - * This is a concurrent attach during a device reset. Reject it until + * This is a concurrent attach during device recovery. Reject it until * pci_dev_reset_iommu_done() attaches the device to group->domain. * * Note that this might fail the iommu_dma_map(). But there's nothing * more we can do here. */ - if (dev->iommu_group->resetting_domain) + if (gdev->blocked) return -EBUSY; return __iommu_attach_device(domain, dev, NULL); } @@ -2270,19 +2296,24 @@ EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev) { struct iommu_group *group = dev->iommu_group; + struct group_device *gdev; lockdep_assert_held(&group->mutex); + gdev = __dev_to_gdev(dev); + if (WARN_ON(!gdev)) + return NULL; + /* * Driver handles the low-level __iommu_attach_device(), including the * one invoked by pci_dev_reset_iommu_done() re-attaching the device to * the cached group->domain. In this case, the driver must get the old - * domain from group->resetting_domain rather than group->domain. This + * domain from group->blocking_domain rather than group->domain. This * prevents it from re-attaching the device from group->domain (old) to * group->domain (new). */ - if (group->resetting_domain) - return group->resetting_domain; + if (gdev->blocked) + return group->blocking_domain; return group->domain; } @@ -2441,10 +2472,10 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, return -EINVAL; /* - * This is a concurrent attach during a device reset. Reject it until + * This is a concurrent attach during device recovery. Reject it until * pci_dev_reset_iommu_done() attaches the device to group->domain. */ - if (group->resetting_domain) + if (group->recovery_cnt) return -EBUSY; /* @@ -3615,10 +3646,10 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, mutex_lock(&group->mutex); /* - * This is a concurrent attach during a device reset. Reject it until + * This is a concurrent attach during device recovery. Reject it until * pci_dev_reset_iommu_done() attaches the device to group->domain. */ - if (group->resetting_domain) { + if (group->recovery_cnt) { ret = -EBUSY; goto out_unlock; } @@ -3708,10 +3739,10 @@ int iommu_replace_device_pasid(struct iommu_domain *domain, mutex_lock(&group->mutex); /* - * This is a concurrent attach during a device reset. Reject it until + * This is a concurrent attach during device recovery. Reject it until * pci_dev_reset_iommu_done() attaches the device to group->domain. */ - if (group->resetting_domain) { + if (group->recovery_cnt) { ret = -EBUSY; goto out_unlock; } @@ -3982,12 +4013,12 @@ EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); * routine wants to block any IOMMU activity: translation and ATS invalidation. * * This function attaches the device's RID/PASID(s) the group->blocking_domain, - * setting the group->resetting_domain. This allows the IOMMU driver pausing any + * incrementing the group->recovery_cnt, to allow the IOMMU driver pausing any * IOMMU activity while leaving the group->domain pointer intact. Later when the * reset is finished, pci_dev_reset_iommu_done() can restore everything. * * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done() - * before/after the core-level reset routine, to unset the resetting_domain. + * before/after the core-level reset routine, to decrement the recovery_cnt. * * Return: 0 on success or negative error code if the preparation failed. * @@ -4000,6 +4031,7 @@ EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) { struct iommu_group *group = pdev->dev.iommu_group; + struct group_device *gdev; unsigned long pasid; void *entry; int ret; @@ -4009,8 +4041,12 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) guard(mutex)(&group->mutex); + gdev = __dev_to_gdev(&pdev->dev); + if (WARN_ON(!gdev)) + return -ENODEV; + /* Re-entry is not allowed */ - if (WARN_ON(group->resetting_domain)) + if (WARN_ON(gdev->blocked)) return -EBUSY; ret = __iommu_group_alloc_blocking_domain(group); @@ -4025,6 +4061,13 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) return ret; } + /* + * Update gdev->blocked upon the domain change, as it is used to return + * the correct domain in iommu_driver_get_domain_for_dev() that might be + * called in a set_dev_pasid callback function. + */ + gdev->blocked = true; + /* * Stage PASID domains at blocking_domain while retaining pasid_array. * @@ -4035,7 +4078,7 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) iommu_remove_dev_pasid(&pdev->dev, pasid, pasid_array_entry_to_domain(entry)); - group->resetting_domain = group->blocking_domain; + group->recovery_cnt++; return ret; } EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); @@ -4057,6 +4100,7 @@ EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); void pci_dev_reset_iommu_done(struct pci_dev *pdev) { struct iommu_group *group = pdev->dev.iommu_group; + struct group_device *gdev; unsigned long pasid; void *entry; @@ -4065,11 +4109,13 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) guard(mutex)(&group->mutex); - /* pci_dev_reset_iommu_prepare() was bypassed for the device */ - if (!group->resetting_domain) + gdev = __dev_to_gdev(&pdev->dev); + if (WARN_ON(!gdev)) + return; + + if (!gdev->blocked) return; - /* pci_dev_reset_iommu_prepare() was not successfully called */ if (WARN_ON(!group->blocking_domain)) return; @@ -4084,6 +4130,13 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) group->blocking_domain)); } + /* + * Update gdev->blocked upon the domain change, as it is used to return + * the correct domain in iommu_driver_get_domain_for_dev() that might be + * called in a set_dev_pasid callback function. + */ + gdev->blocked = false; + /* * Re-attach PASID domains back to the domains retained in pasid_array. * @@ -4095,7 +4148,8 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) pasid_array_entry_to_domain(entry), group, pasid, group->blocking_domain)); - group->resetting_domain = NULL; + if (!WARN_ON(group->recovery_cnt == 0)) + group->recovery_cnt--; } EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done); -- cgit v1.2.3 From 1615e8896a8f6d7b2adf6495e538a81bf6cea3e0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:23 -0700 Subject: iommu: Fix pasid attach in pci_dev_reset_iommu_prepare/done() Now the helpers handle per-gdev resets. Replace __iommu_set_group_pasid() with set_dev_pasid() accordingly, in the pci_dev_reset_iommu_done(). Also add max_pasids check as other callers. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Reported-by: Shuai Xue Closes: https://lore.kernel.org/all/ad858513-09fc-455e-bbc5-fe38a225cc78@linux.alibaba.com/ Reviewed-by: Shuai Xue Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2515786d4156..221be84db5ad 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -4074,9 +4074,14 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) * The pasid_array is mostly fenced by group->mutex, except one reader * in iommu_attach_handle_get(), so it's safe to read without xa_lock. */ - xa_for_each_start(&group->pasid_array, pasid, entry, 1) - iommu_remove_dev_pasid(&pdev->dev, pasid, - pasid_array_entry_to_domain(entry)); + if (pdev->dev.iommu->max_pasids > 0) { + xa_for_each_start(&group->pasid_array, pasid, entry, 1) { + struct iommu_domain *pasid_dom = + pasid_array_entry_to_domain(entry); + + iommu_remove_dev_pasid(&pdev->dev, pasid, pasid_dom); + } + } group->recovery_cnt++; return ret; @@ -4143,10 +4148,16 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) * The pasid_array is mostly fenced by group->mutex, except one reader * in iommu_attach_handle_get(), so it's safe to read without xa_lock. */ - xa_for_each_start(&group->pasid_array, pasid, entry, 1) - WARN_ON(__iommu_set_group_pasid( - pasid_array_entry_to_domain(entry), group, pasid, - group->blocking_domain)); + if (pdev->dev.iommu->max_pasids > 0) { + xa_for_each_start(&group->pasid_array, pasid, entry, 1) { + struct iommu_domain *pasid_dom = + pasid_array_entry_to_domain(entry); + + WARN_ON(pasid_dom->ops->set_dev_pasid( + pasid_dom, &pdev->dev, pasid, + group->blocking_domain)); + } + } if (!WARN_ON(group->recovery_cnt == 0)) group->recovery_cnt--; -- cgit v1.2.3 From 0d5fd7a9323ce6bedd170e21e1e90b8904917c75 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:24 -0700 Subject: iommu: Fix nested pci_dev_reset_iommu_prepare/done() Shuai found that cxl_reset_bus_function() calls pci_reset_bus_function() internally while both are calling pci_dev_reset_iommu_prepare/done(). As pci_dev_reset_iommu_prepare() doesn't support re-entry, the inner call will trigger a WARN_ON and return -EBUSY, resulting in failing the entire device reset. On the other hand, removing the outer calls in the PCI callers is unsafe. As pointed out by Kevin, device-specific quirks like reset_hinic_vf_dev() execute custom firmware waits after their inner pcie_flr() completes. If the IOMMU protection relies solely on the inner reset, the IOMMU will be unblocked prematurely while the device is still resetting. Instead, fix this by making pci_dev_reset_iommu_prepare/done() reentrant. Introduce gdev->reset_depth to handle the re-entries on the same device. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Reported-by: Shuai Xue Closes: https://lore.kernel.org/all/absKsk7qQOwzhpzv@Asurada-Nvidia/ Suggested-by: Kevin Tian Reviewed-by: Shuai Xue Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 221be84db5ad..301c76c40e3d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -83,6 +83,7 @@ struct group_device { * - Device is undergoing a reset */ bool blocked; + unsigned int reset_depth; }; /* Iterate over each struct group_device in a struct iommu_group */ @@ -4045,20 +4046,23 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) if (WARN_ON(!gdev)) return -ENODEV; - /* Re-entry is not allowed */ - if (WARN_ON(gdev->blocked)) - return -EBUSY; + if (gdev->reset_depth++) + return 0; ret = __iommu_group_alloc_blocking_domain(group); - if (ret) + if (ret) { + gdev->reset_depth--; return ret; + } /* Stage RID domain at blocking_domain while retaining group->domain */ if (group->domain != group->blocking_domain) { ret = __iommu_attach_device(group->blocking_domain, &pdev->dev, group->domain); - if (ret) + if (ret) { + gdev->reset_depth--; return ret; + } } /* @@ -4118,7 +4122,10 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) if (WARN_ON(!gdev)) return; - if (!gdev->blocked) + /* Unbalanced done() calls would underflow the counter */ + if (WARN_ON(gdev->reset_depth == 0)) + return; + if (--gdev->reset_depth) return; if (WARN_ON(!group->blocking_domain)) -- cgit v1.2.3 From fc3523b16d2b4b88e61e69504b0ae0b18b869c8f Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:25 -0700 Subject: iommu: Fix ATS invalidation timeouts during __iommu_remove_group_pasid() If a device is blocked, its PASID domains are already detached. Repeating iommu_remove_dev_pasid() is unnecessary and might trigger ATS invalidation timeouts. Skip the iommu_remove_dev_pasid() call upon gdev->blocked. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Closes: https://sashiko.dev/#/patchset/20260407194644.171304-1-nicolinc%40nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 301c76c40e3d..a5e3e90883f8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3602,7 +3602,12 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, struct group_device *device; for_each_group_device(group, device) { - if (device->dev->iommu->max_pasids > 0) + /* + * A group-level detach cannot fail, even if there is a blocked + * device. In fact, blocked devices must be already detached for + * a pending device recovery. + */ + if (!device->blocked && device->dev->iommu->max_pasids > 0) iommu_remove_dev_pasid(device->dev, pasid, domain); } } -- cgit v1.2.3 From 5474e6e17a262db45c60575c73f70210f5c7001f Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:26 -0700 Subject: iommu: Fix WARN_ON in __iommu_group_set_domain_nofail() due to reset In __iommu_group_set_domain_internal(), concurrent domain attachments are rejected when any device in the group is recovering. This is necessary to fence concurrent attachments to a multi-device group where devices might share the same RID due to PCI DMA alias quirks, but triggers the WARN_ON in __iommu_group_set_domain_nofail(). Other IOMMU_SET_DOMAIN_MUST_SUCCEED callers in detach/teardown paths, such as __iommu_group_set_core_domain and __iommu_release_dma_ownership, should not be rejected, as the domain would be freed anyway in these nofail paths while group->domain is still pointing to it. So pci_dev_reset_iommu_done() could trigger a UAF when re-attaching group->domain. Honor the IOMMU_SET_DOMAIN_MUST_SUCCEED flag, allowing the callers through the group->recovery_cnt fence, so as to update the group->domain pointer. Instead add a gdev->blocked check in the device iteration loop, to prevent any concurrent per-device detachment. Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()") Cc: stable@vger.kernel.org Closes: https://sashiko.dev/#/patchset/20260407194644.171304-1-nicolinc%40nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a5e3e90883f8..845284a9eeaf 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2474,9 +2474,10 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, /* * This is a concurrent attach during device recovery. Reject it until - * pci_dev_reset_iommu_done() attaches the device to group->domain. + * pci_dev_reset_iommu_done() attaches the device to group->domain, if + * IOMMU_SET_DOMAIN_MUST_SUCCEED is not set. */ - if (group->recovery_cnt) + if (group->recovery_cnt && !(flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)) return -EBUSY; /* @@ -2487,6 +2488,13 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, */ result = 0; for_each_group_device(group, gdev) { + /* + * Device under recovery is attached to group->blocking_domain. + * Don't change that. pci_dev_reset_iommu_done() will re-attach + * its domain to the updated group->domain, after the recovery. + */ + if (gdev->blocked) + continue; ret = __iommu_device_set_domain(group, gdev->dev, new_domain, group->domain, flags); if (ret) { -- cgit v1.2.3 From 15dd29ca620648a18a0334a3f6b26af181154a23 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Apr 2026 18:15:27 -0700 Subject: iommu: Warn on premature unblock during DMA aliased sibling reset When two aliased siblings are in the same iommu_group, they might share the same RID. The reset functions don't support this case, though it is unclear whether there is a real case of having an ATS capable device on a PCI/PCI-X bus. Theoretically, however, if two aliased devices are resetting concurrently, one might be unblocked prematurely in the middle of the reset by the other sibling who completes the reset first. This isn't a regression from this series but it's better to spit a warning, so we can know if such use case is common enough for us to make subsequent patches for its coverage. Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 845284a9eeaf..e7bd28cc77ee 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -4105,6 +4105,41 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare); +static int __group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias, + void *data) +{ + return alias == *(u16 *)data; +} + +static int group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias, + void *data) +{ + return pci_for_each_dma_alias(data, __group_device_cmp_dma_alias, + &alias); +} + +static bool group_device_dma_alias_is_blocked(struct iommu_group *group, + struct group_device *gdev) +{ + struct group_device *sibling; + + lockdep_assert_held(&group->mutex); + + if (!dev_is_pci(gdev->dev)) + return false; + + for_each_group_device(group, sibling) { + if (sibling == gdev || !sibling->blocked || + !dev_is_pci(sibling->dev)) + continue; + if (pci_for_each_dma_alias(to_pci_dev(gdev->dev), + group_device_cmp_dma_alias, + to_pci_dev(sibling->dev))) + return true; + } + return false; +} + /** * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done * @pdev: PCI device that has finished a reset routine @@ -4144,6 +4179,20 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev) if (WARN_ON(!group->blocking_domain)) return; + if (group_device_dma_alias_is_blocked(group, gdev)) { + /* + * FIXME: DMA aliased devices share the same RID, which would be + * convoluted to handle, as "gdev->blocked" is not sufficient: + * - "blocked" state is effectively shared across these devices + * - if the core skipped the blocking on the second device, the + * IOMMU driver's attachment state would diverge from the HW + * state + * For now, just warn and see whether real ATS use cases hit it. + */ + pci_warn(pdev, + "DMA-aliased sibling may be prematurely unblocked\n"); + } + /* * Re-attach RID domain back to group->domain * -- cgit v1.2.3 From 4a39eda5fdd867fc39f3c039714dd432cee00268 Mon Sep 17 00:00:00 2001 From: Guopeng Zhang Date: Sat, 9 May 2026 18:20:30 +0800 Subject: cgroup/cpuset: Reset DL migration state on can_attach() failure cpuset_can_attach() accumulates temporary SCHED_DEADLINE migration state in the destination cpuset while walking the taskset. If a later task_can_attach() or security_task_setscheduler() check fails, cgroup_migrate_execute() treats cpuset as the failing subsystem and does not call cpuset_cancel_attach() for it. The partially accumulated state is then left behind and can be consumed by a later attach, corrupting cpuset DL task accounting and pending DL bandwidth accounting. Reset the pending DL migration state from the common error exit when ret is non-zero. Successful can_attach() keeps the state for cpuset_attach() or cpuset_cancel_attach(). Fixes: 2ef269ef1ac0 ("cgroup/cpuset: Free DL BW in case can_attach() fails") Cc: stable@vger.kernel.org # v6.10+ Signed-off-by: Guopeng Zhang Signed-off-by: Tejun Heo Reviewed-by: Chen Ridong Reviewed-by: Waiman Long --- kernel/cgroup/cpuset.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a48901a0416a..3fbf6e7f68c3 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3050,16 +3050,13 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); if (unlikely(cpu >= nr_cpu_ids)) { - reset_migrate_dl_data(cs); ret = -EINVAL; goto out_unlock; } ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); - if (ret) { - reset_migrate_dl_data(cs); + if (ret) goto out_unlock; - } cs->dl_bw_cpu = cpu; } @@ -3070,7 +3067,10 @@ out_success: * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; + out_unlock: + if (ret) + reset_migrate_dl_data(cs); mutex_unlock(&cpuset_mutex); return ret; } -- cgit v1.2.3 From 2cda2e10dc8343ae01eae9e999a876b7e7d37861 Mon Sep 17 00:00:00 2001 From: Naval Alcalá Date: Sat, 9 May 2026 10:43:44 +0800 Subject: iommu/vt-d: Disable DMAR for Intel Q35 IGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel Q35 integrated graphics (8086:29b2) exhibits broken DMAR behaviour similar to other G4x/GM45 devices for which DMAR is already disabled via quirks. When DMAR is enabled, the system may hard lock up during boot or early device initialization, requiring a reset. Add the missing PCI ID to the existing quirk list to disable DMAR for this device. Fixes: 1f76249cc3be ("iommu/vt-d: Declare Broadwell igfx dmar support snafu") Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=201185 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216064 Signed-off-by: Naval Alcalá Link: https://lore.kernel.org/r/20260410161622.13549-1-ari@naval.cat Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c3d18cd77d2f..2a6b6813a78d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3937,6 +3937,9 @@ static void quirk_iommu_igfx(struct pci_dev *dev) disable_igfx_iommu = 1; } +/* Q35 integrated gfx dmar support is totally busted. */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x29b2, quirk_iommu_igfx); + /* G4x/GM45 integrated gfx dmar support is totally busted. */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx); -- cgit v1.2.3 From a6dea58d8625c06b9654c0555f101742481335c3 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sat, 9 May 2026 10:43:45 +0800 Subject: iommu/vt-d: Fix oops due to out of scope access Below oops triggers when kill QEMU process: Oops: general protection fault, probably for non-canonical address 0x7fffffff844eaaa7: 0000 [#1] SMP NOPTI Call Trace: do_raw_spin_lock+0xaa/0xc0 _raw_spin_lock_irqsave+0x21/0x40 domain_remove_dev_pasid+0x52/0x160 intel_nested_set_dev_pasid+0x1b9/0x1e0 __iommu_set_group_pasid+0x56/0x120 pci_dev_reset_iommu_done+0xe3/0x180 pcie_flr+0x65/0x160 __pci_reset_function_locked+0x5b/0x120 vfio_pci_core_close_device+0x63/0xe0 [vfio_pci_core] vfio_df_close+0x4f/0xa0 vfio_df_unbind_iommufd+0x2d/0x60 vfio_device_fops_release+0x3e/0x40 __fput+0xe5/0x2c0 task_work_run+0x58/0xa0 do_exit+0x2c8/0x600 do_group_exit+0x2f/0xa0 get_signal+0x863/0x8c0 arch_do_signal_or_restart+0x24/0x100 exit_to_user_mode_loop+0x87/0x380 do_syscall_64+0x2ff/0x11e0 entry_SYSCALL_64_after_hwframe+0x76/0x7e The global static blocked domain is a dummy domain without corresponding dmar_domain structure, accessing beyond iommu_domain structure triggers oops easily. Fix it by return early in domain_remove_dev_pasid() like identity domain. Fixes: 7d0c9da6c150 ("iommu/vt-d: Add set_dev_pasid callback for dma domain") Cc: stable@vger.kernel.org Signed-off-by: Zhenzhong Duan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20260421031347.1408890-1-zhenzhong.duan@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2a6b6813a78d..a4b123c33022 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3530,8 +3530,8 @@ void domain_remove_dev_pasid(struct iommu_domain *domain, if (!domain) return; - /* Identity domain has no meta data for pasid. */ - if (domain->type == IOMMU_DOMAIN_IDENTITY) + /* Identity domain and blocked domain have no meta data for pasid. */ + if (domain->type == IOMMU_DOMAIN_IDENTITY || domain->type == IOMMU_DOMAIN_BLOCKED) return; dmar_domain = to_dmar_domain(domain); -- cgit v1.2.3 From 79ea2feb917b05366b49d85573c9c5331f043b2c Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sat, 9 May 2026 10:43:46 +0800 Subject: iommu/vt-d: Avoid NULL pointer dereference or refcount corruption Commit 60f030f7418d ("iommu/vt-d: Avoid use of NULL after WARN_ON_ONCE") fixed a NULL pointer dereference in an unlikely situation partly. If dev_pasid is not found in the dev_pasids list, it remains NULL. However, the teardown operations are executed unconditionally, this lead to a NULL pointer dereference or refcount corruption. If the domain was never attached to this IOMMU, info will be NULL, which would cause an immediate dereference when checking --info->refcnt. Even if info is not NULL, decrementing the refcount without having removed a valid PASID might unbalance the count. This could lead to premature dropping of the refcount to 0, potentially causing a use-after-free for the remaining active devices sharing the domain. Fix it by returning early if dev_pasid is NULL, before executing the teardown operations. Issue found by AI review and suggested by Kevin Tian. https://sashiko.dev/#/patchset/20260421031347.1408890-1-zhenzhong.duan%40intel.com Fixes: 60f030f7418d ("iommu/vt-d: Avoid use of NULL after WARN_ON_ONCE") Cc: stable@vger.kernel.org Suggested-by: Kevin Tian Signed-off-by: Zhenzhong Duan Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20260422033538.95000-1-zhenzhong.duan@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index a4b123c33022..4d0e65bc131d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3545,12 +3545,13 @@ void domain_remove_dev_pasid(struct iommu_domain *domain, } spin_unlock_irqrestore(&dmar_domain->lock, flags); + if (WARN_ON_ONCE(!dev_pasid)) + return; + cache_tag_unassign_domain(dmar_domain, dev, pasid); domain_detach_iommu(dmar_domain, iommu); - if (!WARN_ON_ONCE(!dev_pasid)) { - intel_iommu_debugfs_remove_dev_pasid(dev_pasid); - kfree(dev_pasid); - } + intel_iommu_debugfs_remove_dev_pasid(dev_pasid); + kfree(dev_pasid); } static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, -- cgit v1.2.3 From 7dbac7680eb629b3b4dc7e98c34f943b8814c0c8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 May 2026 21:23:28 +0800 Subject: xfrm: ipcomp: Free destination pages on acomp errors Move the out_free_req label up by a couple of lines so that the allocated dst SG list gets freed on error as well as success. Fixes: eb2953d26971 ("xfrm: ipcomp: Use crypto_acomp interface") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reported-by: Yilin Zhu Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 5f38dff16177..671d48f8c937 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -51,11 +51,15 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) struct scatterlist *dsg; int len, dlen; - if (unlikely(err)) - goto out_free_req; + if (unlikely(!req)) + return err; extra = acomp_request_extra(req); dsg = extra->sg; + + if (unlikely(err)) + goto out_free_req; + dlen = req->dlen; pskb_trim_unique(skb, 0); @@ -84,10 +88,10 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) skb_shinfo(skb)->nr_frags++; } while ((dlen -= len)); - for (; dsg; dsg = sg_next(dsg)) +out_free_req: + for (; dsg && sg_page(dsg); dsg = sg_next(dsg)) __free_page(sg_page(dsg)); -out_free_req: acomp_request_free(req); return err; } -- cgit v1.2.3 From 4c79fc2d598694bda845b46229c9d48b65042970 Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Wed, 22 Apr 2026 10:47:13 +0200 Subject: libceph: Fix potential out-of-bounds access in crush_decode() A message of type CEPH_MSG_OSD_MAP containing a crush map with at least one bucket has two fields holding the bucket algorithm. If the values in these two fields differ, an out-of-bounds access can occur. This is the case because the first algorithm field (alg) is used to allocate the correct amount of memory for a bucket of this type, while the second algorithm field inside the bucket (b->alg) is used in the subsequent processing. This patch fixes the issue by adding a check that compares alg and b->alg and aborts the processing in case they differ. Furthermore, b->alg is set to 0 in this case, because the destruction of the crush map also uses this field to determine the bucket type, which can again result in an out-of-bounds access when trying to free the memory pointed to by the fields of the bucket. To correctly free the memory allocated for the bucket in such a case, the corresponding call to kfree is moved from the algorithm-specific crush_destroy_bucket functions to the generic crush_destroy_bucket(). Cc: stable@vger.kernel.org Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/crush/crush.c | 6 +----- net/ceph/osdmap.c | 4 ++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 254ded0b05f6..521aec1d5fc0 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -47,7 +47,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p) void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) { kfree(b->h.items); - kfree(b); } void crush_destroy_bucket_list(struct crush_bucket_list *b) @@ -55,14 +54,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b) kfree(b->item_weights); kfree(b->sum_weights); kfree(b->h.items); - kfree(b); } void crush_destroy_bucket_tree(struct crush_bucket_tree *b) { kfree(b->h.items); kfree(b->node_weights); - kfree(b); } void crush_destroy_bucket_straw(struct crush_bucket_straw *b) @@ -70,14 +67,12 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b) kfree(b->straws); kfree(b->item_weights); kfree(b->h.items); - kfree(b); } void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b) { kfree(b->item_weights); kfree(b->h.items); - kfree(b); } void crush_destroy_bucket(struct crush_bucket *b) @@ -99,6 +94,7 @@ void crush_destroy_bucket(struct crush_bucket *b) crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b); break; } + kfree(b); } /** diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index c89e66d4fcb7..a87268058e61 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -516,6 +516,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end) b->id = ceph_decode_32(p); b->type = ceph_decode_16(p); b->alg = ceph_decode_8(p); + if (b->alg != alg) { + b->alg = 0; + goto bad; + } b->hash = ceph_decode_8(p); b->weight = ceph_decode_32(p); b->size = ceph_decode_32(p); -- cgit v1.2.3 From 596f91294b351866956808b1ecb8dfae15382a6d Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Fri, 24 Apr 2026 15:37:37 +0200 Subject: libceph: Fix unnecessarily high ceph_decode_need() for uniform bucket In crush_decode_uniform_bucket(), the item_weight field of the bucket is set. This is a single field of type u32 since the uniform bucket uses the same weight for all items. The value in ceph_decode_need() is set to (1+b->h.size) * sizeof(u32), which is higher than actually needed. This patch removes the call to ceph_decode_need() with the unnecessarily high value and switches the subsequent operation from ceph_decode_32() to ceph_decode_32_safe(), which already includes the correct bounds check. Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index a87268058e61..669348d883f0 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -72,8 +72,7 @@ static int crush_decode_uniform_bucket(void **p, void *end, struct crush_bucket_uniform *b) { dout("crush_decode_uniform_bucket %p to %p\n", *p, end); - ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad); - b->item_weight = ceph_decode_32(p); + ceph_decode_32_safe(p, end, b->item_weight, bad); return 0; bad: return -EINVAL; -- cgit v1.2.3 From 5d3cc36b4e77a27ce7b686b7c59c7072bcb3fa8e Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Thu, 9 Apr 2026 12:26:02 -0700 Subject: ceph: fix a buffer leak in __ceph_setxattr() The old_blob in __ceph_setxattr() can store ci->i_xattrs.prealloc_blob value during the retry. However, it is never called the ceph_buffer_put() for the old_blob object. This patch fixes the issue of the buffer leak. Cc: stable@vger.kernel.org Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5f87f62091a1..c6fcbf428317 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1294,6 +1294,7 @@ retry: do_sync: spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); do_sync_unlocked: if (lock_snap_rwsem) up_read(&mdsc->snap_rwsem); -- cgit v1.2.3 From 0c22d9511cbde746622f8e4c11aaa63fe76d45f9 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Thu, 9 Apr 2026 12:43:40 -0700 Subject: ceph: fix BUG_ON in __ceph_build_xattrs_blob() due to stale blob size The generic/642 test-case can reproduce the kernel crash: [40243.605254] ------------[ cut here ]------------ [40243.605956] kernel BUG at fs/ceph/xattr.c:918! [40243.607142] Oops: invalid opcode: 0000 [#1] SMP PTI [40243.608067] CPU: 7 UID: 0 PID: 498762 Comm: kworker/7:1 Not tainted 7.0.0-rc7+ #3 PREEMPT(full) [40243.609700] Hardware name: QEMU Ubuntu 25.10 PC v2 (i440FX + PIIX, + 10.1 machine, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [40243.611820] Workqueue: ceph-msgr ceph_con_workfn [40243.612715] RIP: 0010:__ceph_build_xattrs_blob+0x1b8/0x1e0 [40243.613731] Code: 0f 84 82 fe ff ff e9 cf 8e 56 ff 48 8d 65 e8 31 c0 5b 41 5c 41 5d 5d 31 d2 31 c9 31 f6 31 ff 45 31 c0 45 31 c9 c3 cc cc cc cc <0f> 0b 4c 8b 62 08 41 8b 85 24 07 00 00 49 83 c4 04 41 89 44 24 fc [40243.616888] RSP: 0018:ffffcc80c4d4b688 EFLAGS: 00010287 [40243.617773] RAX: 0000000000010026 RBX: 0000000000000001 RCX: 0000000000000000 [40243.618928] RDX: ffff8a773798dee0 RSI: 0000000000000000 RDI: 0000000000000000 [40243.620158] RBP: ffffcc80c4d4b6a0 R08: 0000000000000000 R09: 0000000000000000 [40243.621573] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8a75f3b58000 [40243.622907] R13: ffff8a75f3b58000 R14: 0000000000000080 R15: 000000000000bffd [40243.624054] FS: 0000000000000000(0000) GS:ffff8a787d1b4000(0000) knlGS:0000000000000000 [40243.625331] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [40243.626269] CR2: 000072f390b623c0 CR3: 000000011c02a003 CR4: 0000000000372ef0 [40243.627408] Call Trace: [40243.627839] [40243.628188] __prep_cap+0x3fd/0x4a0 [40243.628789] ? do_raw_spin_unlock+0x4e/0xe0 [40243.629474] ceph_check_caps+0x46a/0xc80 [40243.630094] ? __lock_acquire+0x4a2/0x2650 [40243.630773] ? find_held_lock+0x31/0x90 [40243.631347] ? handle_cap_grant+0x79f/0x1060 [40243.632068] ? lock_release+0xd9/0x300 [40243.632696] ? __mutex_unlock_slowpath+0x3e/0x340 [40243.633429] ? lock_release+0xd9/0x300 [40243.634052] handle_cap_grant+0xcf6/0x1060 [40243.634745] ceph_handle_caps+0x122b/0x2110 [40243.635415] mds_dispatch+0x5bd/0x2160 [40243.636034] ? ceph_con_process_message+0x65/0x190 [40243.636828] ? lock_release+0xd9/0x300 [40243.637431] ceph_con_process_message+0x7a/0x190 [40243.638184] ? kfree+0x311/0x4f0 [40243.638749] ? kfree+0x311/0x4f0 [40243.639268] process_message+0x16/0x1a0 [40243.639915] ? sg_free_table+0x39/0x90 [40243.640572] ceph_con_v2_try_read+0xf58/0x2120 [40243.641255] ? lock_acquire+0xc8/0x300 [40243.641863] ceph_con_workfn+0x151/0x820 [40243.642493] process_one_work+0x22f/0x630 [40243.643093] ? process_one_work+0x254/0x630 [40243.643770] worker_thread+0x1e2/0x400 [40243.644332] ? __pfx_worker_thread+0x10/0x10 [40243.645020] kthread+0x109/0x140 [40243.645560] ? __pfx_kthread+0x10/0x10 [40243.646125] ret_from_fork+0x3f8/0x480 [40243.646752] ? __pfx_kthread+0x10/0x10 [40243.647316] ? __pfx_kthread+0x10/0x10 [40243.647919] ret_from_fork_asm+0x1a/0x30 [40243.648556] [40243.648902] Modules linked in: overlay hctr2 libpolyval chacha libchacha adiantum libnh libpoly1305 essiv intel_rapl_msr intel_rapl_common intel_uncore_frequency_common skx_edac_common nfit kvm_intel kvm irqbypass joydev ghash_clmulni_intel aesni_intel rapl input_leds mac_hid psmouse vga16fb serio_raw vgastate floppy i2c_piix4 pata_acpi bochs qemu_fw_cfg i2c_smbus sch_fq_codel rbd dm_crypt msr parport_pc ppdev lp parport efi_pstore [40243.654766] ---[ end trace 0000000000000000 ]--- Commit d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size") moved the required_blob_size computation to before the __build_xattrs() call, introducing a race. __build_xattrs() releases and reacquires i_ceph_lock during execution. In that window, handle_cap_grant() may update i_xattrs.blob with a newer MDS-provided blob and bump i_xattrs.version. When __build_xattrs() detects that index_version < version, it destroys and rebuilds the entire xattr rb-tree from the new blob, potentially increasing count, names_size, and vals_size. The prealloc_blob size check that follows still uses the stale required_blob_size computed before the rebuild, so it passes even when prealloc_blob is too small for the now-larger tree. After __set_xattr() adds one more xattr on top, __ceph_build_xattrs_blob() is called from the cap flush path and hits: BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len); Fix this by recomputing required_blob_size after __build_xattrs() returns, using the current tree state. Also re-validate against m_max_xattr_size to fall back to the sync path if the rebuilt tree now exceeds the MDS limit. Cc: stable@vger.kernel.org Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size") Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index c6fcbf428317..e773be07f767 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1254,6 +1254,22 @@ retry: ceph_vinop(inode), name, ceph_cap_string(issued)); __build_xattrs(inode); + /* + * __build_xattrs() may have released and reacquired i_ceph_lock, + * during which handle_cap_grant() could have replaced i_xattrs.blob + * with a newer MDS-provided blob and bumped i_xattrs.version. If that + * caused __build_xattrs() to rebuild the rb-tree from the new blob, + * count/names_size/vals_size may now be larger than when + * required_blob_size was computed above. Recompute it here so the + * prealloc_blob size check below reflects the current tree state. + */ + required_blob_size = __get_required_blob_size(ci, name_len, val_len); + if (required_blob_size > mdsc->mdsmap->m_max_xattr_size) { + doutc(cl, "sync (size too large): %d > %llu\n", + required_blob_size, mdsc->mdsmap->m_max_xattr_size); + goto do_sync; + } + if (!ci->i_xattrs.prealloc_blob || required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { struct ceph_buffer *blob; -- cgit v1.2.3 From 821365487aa58d06bda65c676ba215d506ba9768 Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Tue, 28 Apr 2026 14:15:46 +0200 Subject: libceph: Fix potential out-of-bounds access in __ceph_x_decrypt() In __ceph_x_decrypt(), a part of the buffer p is interpreted as a ceph_x_encrypt_header, and the magic field of this struct is accessed. This happens without any guarantee that the buffer is large enough to hold this struct. The function parameter ciphertext_len represents the length of the ciphertext to decrypt and is guaranteed to be at most the remaining size of the allocated buffer p. However, this value is not necessarily greater than sizeof(ceph_x_encrypt_header). E.g., a message frame of type FRAME_TAG_AUTH_REPLY_MORE, that is just as long to hold the ciphertext at its end with a ciphertext_len of 8 or less, can trigger an out-of-bounds memory access when accessing hdr->magic. This patch fixes the issue by adding a check to ensure that the decrypted plaintext in the buffer is large enough to represent at least the ceph_x_encrypt_header. Cc: stable@vger.kernel.org Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/auth_x.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 692e0b868822..9e64e82d0b63 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -115,6 +115,11 @@ static int __ceph_x_decrypt(const struct ceph_crypto_key *key, int usage_slot, if (ret) return ret; + if (plaintext_len < sizeof(*hdr)) { + pr_err("%s plaintext too small %d\n", __func__, plaintext_len); + return -EINVAL; + } + hdr = p + ceph_crypt_data_offset(key); if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) { pr_err("%s bad magic\n", __func__); -- cgit v1.2.3 From 10d9be401108a0fc8b3bc99ba07bdee8fff875ac Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Thu, 9 Apr 2026 11:33:23 -0700 Subject: ceph: add ceph_has_realms_with_quotas() check to ceph_quota_update_statfs() When MDS rejects a session, remove_session_caps() -> __ceph_remove_cap() -> ceph_change_snap_realm() clears i_snap_realm for every inode that loses its last cap. The realm is restored once caps are re-granted after reconnect. It is not a real error and this patch changes pr_err_ratelimited_client() on doutc(). Every quota methods ceph_quota_is_max_files_exceeded(), ceph_quota_is_max_bytes_exceeded(), ceph_quota_is_max_bytes_approaching() calls ceph_has_realms_with_quotas() check. This patch adds the missing ceph_has_realms_with_quotas() call into ceph_quota_update_statfs(). [ idryomov: add braces around both arms of multiline ifs ] Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/quota.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 4dc9426643e8..053d5bf0c9f0 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -228,12 +228,19 @@ static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode, restart: realm = ceph_inode(inode)->i_snap_realm; - if (realm) + if (realm) { ceph_get_snap_realm(mdsc, realm); - else - pr_err_ratelimited_client(cl, - "%p %llx.%llx null i_snap_realm\n", - inode, ceph_vinop(inode)); + } else { + /* + * i_snap_realm is NULL when all caps have been released, e.g. + * after an MDS session rejection. This is a transient state; + * the realm will be restored once caps are re-granted. + * Treat it as "no quota realm found". + */ + doutc(cl, "%p %llx.%llx null i_snap_realm\n", + inode, ceph_vinop(inode)); + } + while (realm) { bool has_inode; @@ -340,12 +347,19 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, down_read(&mdsc->snap_rwsem); restart: realm = ceph_inode(inode)->i_snap_realm; - if (realm) + if (realm) { ceph_get_snap_realm(mdsc, realm); - else - pr_err_ratelimited_client(cl, - "%p %llx.%llx null i_snap_realm\n", - inode, ceph_vinop(inode)); + } else { + /* + * i_snap_realm is NULL when all caps have been released, e.g. + * after an MDS session rejection. This is a transient state; + * the realm will be restored once caps are re-granted. + * Treat it as "quota not exceeded". + */ + doutc(cl, "%p %llx.%llx null i_snap_realm\n", + inode, ceph_vinop(inode)); + } + while (realm) { bool has_inode; @@ -496,6 +510,9 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) u64 total = 0, used, free; bool is_updated = false; + if (!ceph_has_realms_with_quotas(d_inode(fsc->sb->s_root))) + return false; + down_read(&mdsc->snap_rwsem); get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES, &realm, true); -- cgit v1.2.3 From 544576f0f05c4a759806acddfaaeb686f14fb4b0 Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Mon, 4 May 2026 18:54:45 +0300 Subject: ceph: put folios not suitable for writeback The batch holds references to the folios (see `filemap_get_folios`, `folio_batch_release`), so we need to `folio_put` the folios we remove. Tested on v6.18. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/74156 Signed-off-by: Hristo Venev Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1454760332ff..0a86f672cc09 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1336,6 +1336,7 @@ void ceph_process_folio_batch(struct address_space *mapping, ceph_wbc, folio); if (rc == -ENODATA) { folio_unlock(folio); + folio_put(folio); ceph_wbc->fbatch.folios[i] = NULL; continue; } else if (rc == -E2BIG) { @@ -1346,6 +1347,7 @@ void ceph_process_folio_batch(struct address_space *mapping, if (!folio_clear_dirty_for_io(folio)) { doutc(cl, "%p !folio_clear_dirty_for_io\n", folio); folio_unlock(folio); + folio_put(folio); ceph_wbc->fbatch.folios[i] = NULL; continue; } -- cgit v1.2.3 From 86ecb1c1a1f5c1bf4a45b91f54f8220c3121bd3b Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 11 May 2026 10:31:30 +0200 Subject: sched_ext: Clear ops->priv on scx_alloc_and_add_sched() error paths scx_alloc_and_add_sched() can fail after @sch has been assigned to ops->priv. In those cases @sch is torn down (either via kfree() through the err_free_* chain or via kobject_put() -> scx_kobj_release() -> RCU work), but @ops->priv is left pointing at the about-to-be-freed pointer. With the recent -EBUSY gate in scx_root_enable_workfn() and scx_sub_enable_workfn() that rejects an attach when @ops->priv is still non-NULL, see commit bbf30b383cf6 ("sched_ext: Fix ops->priv clobber on concurrent attach/detach"), a dangling @ops->priv permanently locks the kdata out: every future attach attempt sees a stale binding and returns -EBUSY even though no scheduler is actually attached. Clear @ops->priv on the post-assign failure paths so that the kdata returns to its pre-attach state when the function returns ERR_PTR(). Fixes: bbf30b383cf6 ("sched_ext: Fix ops->priv clobber on concurrent attach/detach") Suggested-by: Tejun Heo Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8e06694094d7..1efd5d82b08b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6670,6 +6670,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root"); if (ret < 0) { + RCU_INIT_POINTER(ops->priv, NULL); kobject_put(&sch->kobj); return ERR_PTR(ret); } @@ -6677,6 +6678,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, if (ops->sub_attach) { sch->sub_kset = kset_create_and_add("sub", NULL, &sch->kobj); if (!sch->sub_kset) { + RCU_INIT_POINTER(ops->priv, NULL); kobject_put(&sch->kobj); return ERR_PTR(-ENOMEM); } @@ -6684,6 +6686,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, #else /* CONFIG_EXT_SUB_SCHED */ ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root"); if (ret < 0) { + RCU_INIT_POINTER(ops->priv, NULL); kobject_put(&sch->kobj); return ERR_PTR(ret); } @@ -6692,6 +6695,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, #ifdef CONFIG_EXT_SUB_SCHED err_free_lb_resched: + RCU_INIT_POINTER(ops->priv, NULL); free_cpumask_var(sch->bypass_lb_resched_cpumask); #endif err_free_lb_cpumask: -- cgit v1.2.3 From 01deda0152066c6c955f0619114ea6afa070aaec Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 10 May 2026 19:16:56 -0400 Subject: thunderbolt: property: Reject u32 wrap in tb_property_entry_valid() entry->value is u32 and entry->length is u16; the sum is performed in u32 and wraps. A malicious XDomain peer can pick value = 0xffffff00, length = 0x100 so the sum 0x100000000 wraps to 0 and passes the > block_len check. tb_property_parse() then passes entry->value to parse_dwdata() as a dword offset into the property block, reading attacker-directed memory far past the allocation. For TEXT-typed entries with the "deviceid" or "vendorid" keys this lands in xd->device_name / xd->vendor_name and is readable back via the per-XDomain device_name / vendor_name sysfs attributes; the leak is NUL-bounded (kstrdup() stops at the first zero byte) and untargeted (the attacker picks a delta, not an absolute address). DATA-typed entries are parsed into property->value.data but not generically surfaced to userspace. Use check_add_overflow() so a wrapped sum is rejected. Fixes: cdae7c07e3e3 ("thunderbolt: Add support for XDomain properties") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Signed-off-by: Mika Westerberg --- drivers/thunderbolt/property.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c index 50cbfc92fe65..29cd60c11ac4 100644 --- a/drivers/thunderbolt/property.c +++ b/drivers/thunderbolt/property.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -52,13 +53,16 @@ static inline void format_dwdata(void *dst, const void *src, size_t dwords) static bool tb_property_entry_valid(const struct tb_property_entry *entry, size_t block_len) { + u32 end; + switch (entry->type) { case TB_PROPERTY_TYPE_DIRECTORY: case TB_PROPERTY_TYPE_DATA: case TB_PROPERTY_TYPE_TEXT: if (entry->length > block_len) return false; - if (entry->value + entry->length > block_len) + if (check_add_overflow(entry->value, entry->length, &end) || + end > block_len) return false; break; -- cgit v1.2.3 From de21b59c29e31c5108ddc04210631bbfab81b997 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 10 May 2026 19:16:57 -0400 Subject: thunderbolt: property: Reject dir_len < 4 to prevent size_t underflow On the non-root path, __tb_property_parse_dir() takes dir_len from entry->length (u16 widened to size_t). Two distinct OOB conditions follow when entry->length < 4: 1. The non-root path begins with kmemdup(&block[dir_offset], sizeof(*dir->uuid), ...) which always reads 4 dwords from dir_offset. tb_property_entry_valid() only enforces dir_offset + entry->length <= block_len, so a crafted entry with dir_offset close to the end of the property block and entry->length in 0..3 passes that gate but lets the UUID copy run off the block (e.g. dir_offset = 497, dir_len = 3 in a 500-dword block reads block[497..501]). 2. After the kmemdup, content_len = dir_len - 4 underflows size_t to ~SIZE_MAX, nentries becomes SIZE_MAX / 4, and the entry walk runs OOB on each iteration until an entry fails validation or the kernel oopses on an unmapped page. Reject dir_len < 4 on the non-root path *before* the UUID kmemdup, which closes both holes. Also move INIT_LIST_HEAD(&dir->properties) up to immediately after the dir allocation so the new error-return path (and the existing uuid-alloc failure path) calling tb_property_free_dir() sees a walkable list rather than the zero-initialized NULL next/prev that list_for_each_entry_safe() would oops on. Fixes: cdae7c07e3e3 ("thunderbolt: Add support for XDomain properties") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Signed-off-by: Mika Westerberg --- drivers/thunderbolt/property.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c index 29cd60c11ac4..74c92f9801ff 100644 --- a/drivers/thunderbolt/property.c +++ b/drivers/thunderbolt/property.c @@ -174,10 +174,16 @@ static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, if (!dir) return NULL; + INIT_LIST_HEAD(&dir->properties); + if (is_root) { content_offset = dir_offset + 2; content_len = dir_len; } else { + if (dir_len < 4) { + tb_property_free_dir(dir); + return NULL; + } dir->uuid = kmemdup(&block[dir_offset], sizeof(*dir->uuid), GFP_KERNEL); if (!dir->uuid) { @@ -191,8 +197,6 @@ static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, entries = (const struct tb_property_entry *)&block[content_offset]; nentries = content_len / (sizeof(*entries) / 4); - INIT_LIST_HEAD(&dir->properties); - for (i = 0; i < nentries; i++) { struct tb_property *property; -- cgit v1.2.3 From 928abe19fbf0127003abcb1ea69cabc1c897d0ab Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 10 May 2026 19:16:58 -0400 Subject: thunderbolt: property: Cap recursion depth in __tb_property_parse_dir() A DIRECTORY entry's value field is used as the dir_offset for a recursive call into __tb_property_parse_dir() with no depth counter. A crafted peer that chains DIRECTORY entries into a back-reference loop drives the parser until the kernel stack is exhausted and the guard page fires. Any untrusted XDomain peer (cable, dock, in-line inspector, adjacent host) that reaches the PROPERTIES_REQUEST control-plane exchange can trigger this without authentication. Thread a depth counter through tb_property_parse() and __tb_property_parse_dir(), and reject blocks that exceed TB_PROPERTY_MAX_DEPTH = 8. That is comfortably larger than any observed legitimate XDomain layout. Operators who do not need XDomain host-to-host discovery can disable the path entirely with thunderbolt.xdomain=0 on the kernel command line. Fixes: cdae7c07e3e3 ("thunderbolt: Add support for XDomain properties") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Signed-off-by: Mika Westerberg --- drivers/thunderbolt/property.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c index 74c92f9801ff..da2c59a17db5 100644 --- a/drivers/thunderbolt/property.c +++ b/drivers/thunderbolt/property.c @@ -35,10 +35,11 @@ struct tb_property_dir_entry { }; #define TB_PROPERTY_ROOTDIR_MAGIC 0x55584401 +#define TB_PROPERTY_MAX_DEPTH 8 static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, size_t block_len, unsigned int dir_offset, size_t dir_len, - bool is_root); + bool is_root, unsigned int depth); static inline void parse_dwdata(void *dst, const void *src, size_t dwords) { @@ -97,7 +98,8 @@ tb_property_alloc(const char *key, enum tb_property_type type) } static struct tb_property *tb_property_parse(const u32 *block, size_t block_len, - const struct tb_property_entry *entry) + const struct tb_property_entry *entry, + unsigned int depth) { char key[TB_PROPERTY_KEY_SIZE + 1]; struct tb_property *property; @@ -118,7 +120,7 @@ static struct tb_property *tb_property_parse(const u32 *block, size_t block_len, switch (property->type) { case TB_PROPERTY_TYPE_DIRECTORY: dir = __tb_property_parse_dir(block, block_len, entry->value, - entry->length, false); + entry->length, false, depth + 1); if (!dir) { kfree(property); return NULL; @@ -163,13 +165,17 @@ static struct tb_property *tb_property_parse(const u32 *block, size_t block_len, } static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, - size_t block_len, unsigned int dir_offset, size_t dir_len, bool is_root) + size_t block_len, unsigned int dir_offset, size_t dir_len, bool is_root, + unsigned int depth) { const struct tb_property_entry *entries; size_t i, content_len, nentries; unsigned int content_offset; struct tb_property_dir *dir; + if (depth > TB_PROPERTY_MAX_DEPTH) + return NULL; + dir = kzalloc_obj(*dir); if (!dir) return NULL; @@ -200,7 +206,7 @@ static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, for (i = 0; i < nentries; i++) { struct tb_property *property; - property = tb_property_parse(block, block_len, &entries[i]); + property = tb_property_parse(block, block_len, &entries[i], depth); if (!property) { tb_property_free_dir(dir); return NULL; @@ -239,7 +245,7 @@ struct tb_property_dir *tb_property_parse_dir(const u32 *block, return NULL; return __tb_property_parse_dir(block, block_len, 0, rootdir->length, - true); + true, 0); } /** -- cgit v1.2.3 From 657b594b2084b39a4bc6d8493aa2140cb00cea49 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 7 May 2026 16:46:29 +0900 Subject: fprobe: Fix unregister_fprobe() to wait for RCU grace period Commit 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer") changed fprobe to register struct fprobe to an rcu-hlist, but it forgot to wait for RCU GP. Thus there can be use-after-free if the fprobe is released right after unregistering. This can be happened on fprobe event and sample module code. To fix this issue, add synchronize_rcu() in unregister_fprobe(). Note that BPF is OK because fprobe is used as a part of bpf_kprobe_multi_link. This unregisters its fprobe in bpf_kprobe_multi_link_release() and it is deallocated via bpf_kprobe_multi_link_dealloc(), which is invoked from bpf_link_defer_dealloc_rcu_gp() RCU callback. For BPF, this also introduced unregister_fprobe_async() which does NOT wait for RCU grace priod. Link: https://lore.kernel.org/all/177813998919.256460.2809243930741138224.stgit@mhiramat.tok.corp.google.com/ Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer") Signed-off-by: Masami Hiramatsu (Google) --- include/linux/fprobe.h | 5 +++++ kernel/trace/bpf_trace.c | 3 ++- kernel/trace/fprobe.c | 23 +++++++++++++++++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 0a3bcd1718f3..be1b38c981d4 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -94,6 +94,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num); int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); int unregister_fprobe(struct fprobe *fp); +int unregister_fprobe_async(struct fprobe *fp); bool fprobe_is_registered(struct fprobe *fp); int fprobe_count_ips_from_filter(const char *filter, const char *notfilter); #else @@ -113,6 +114,10 @@ static inline int unregister_fprobe(struct fprobe *fp) { return -EOPNOTSUPP; } +static inline int unregister_fprobe_async(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} static inline bool fprobe_is_registered(struct fprobe *fp) { return false; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index af7079aa0f36..a02bd258677e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2384,7 +2384,8 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link) struct bpf_kprobe_multi_link *kmulti_link; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); - unregister_fprobe(&kmulti_link->fp); + /* Don't wait for RCU GP here. */ + unregister_fprobe_async(&kmulti_link->fp); kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); } diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index cc49ebd2a773..f378613ad120 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -1093,14 +1093,15 @@ static int unregister_fprobe_nolock(struct fprobe *fp) } /** - * unregister_fprobe() - Unregister fprobe. + * unregister_fprobe_async() - Unregister fprobe without RCU GP wait * @fp: A fprobe data structure to be unregistered. * * Unregister fprobe (and remove ftrace hooks from the function entries). + * This function will NOT wait until the fprobe is no longer used. * * Return 0 if @fp is unregistered successfully, -errno if not. */ -int unregister_fprobe(struct fprobe *fp) +int unregister_fprobe_async(struct fprobe *fp) { guard(mutex)(&fprobe_mutex); if (!fp || !fprobe_registered(fp)) @@ -1108,6 +1109,24 @@ int unregister_fprobe(struct fprobe *fp) return unregister_fprobe_nolock(fp); } + +/** + * unregister_fprobe() - Unregister fprobe with RCU GP wait + * @fp: A fprobe data structure to be unregistered. + * + * Unregister fprobe (and remove ftrace hooks from the function entries). + * This function will block until the fprobe is no longer used. + * + * Return 0 if @fp is unregistered successfully, -errno if not. + */ +int unregister_fprobe(struct fprobe *fp) +{ + int ret = unregister_fprobe_async(fp); + + if (!ret) + synchronize_rcu(); + return ret; +} EXPORT_SYMBOL_GPL(unregister_fprobe); static int __init fprobe_initcall(void) -- cgit v1.2.3 From 5082d8835070fe63f3c61fe574cbb5319bd94575 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2026 07:24:57 +0200 Subject: xfs: fix the "limiting open zones" message The xfs logging macros include a newline, remove the \n, which adds an extra one. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Andrey Albershteyn Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index c64f9ab743a6..5e297b75a85f 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -1170,7 +1170,7 @@ xfs_calc_open_zones( if (bdev_open_zones && bdev_open_zones < mp->m_max_open_zones) { mp->m_max_open_zones = bdev_open_zones; - xfs_info(mp, "limiting open zones to %u due to hardware limit.\n", + xfs_info(mp, "limiting open zones to %u due to hardware limit.", bdev_open_zones); } -- cgit v1.2.3 From 509fdeb3326be0db055e88d0f689a3888f147f90 Mon Sep 17 00:00:00 2001 From: Md Shofiqul Islam Date: Wed, 6 May 2026 19:36:58 +0300 Subject: xfs: Fix typo in comment Fix spelling mistake in comment: - occured -> occurred Reviewed-by: Darrick J. Wong Signed-off-by: Md Shofiqul Islam Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_notify_failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index 64c8afb935c2..b994ff15d5e4 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -350,7 +350,7 @@ xfs_dax_notify_dev_failure( /* * Shutdown fs from a force umount in pre-remove case which won't fail, * so errors can be ignored. Otherwise, shutdown the filesystem with - * CORRUPT flag if error occured or notify.want_shutdown was set during + * CORRUPT flag if error occurred or notify.want_shutdown was set during * RMAP querying. */ if (mf_flags & MF_MEM_PRE_REMOVE) -- cgit v1.2.3 From 0c3887a134f191723b53e2a47e501b534c8723ee Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sun, 10 May 2026 04:25:31 +0000 Subject: platform/x86: lenovo-wmi-helpers: Fix memory leak in lwmi_dev_evaluate_int() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lwmi_dev_evaluate_int() leaks output.pointer when retval == NULL (found by sashiko.dev [1]). Fix it by moving `ret_obj = output.pointer' outside of the `if (retval)' block so that it is always freed by the __free cleanup callback. No functional change intended. Reviewed-by: Mark Pearson Fixes: e521d16e76cd ("platform/x86: Add lenovo-wmi-helpers") Cc: stable@vger.kernel.org Link: https://sashiko.dev/#/patchset/20260331181208.421552-1-derekjohn.clark%40gmail.com [1] Signed-off-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-2-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/lenovo/wmi-helpers.c b/drivers/platform/x86/lenovo/wmi-helpers.c index 7379defac500..018d7642e2bd 100644 --- a/drivers/platform/x86/lenovo/wmi-helpers.c +++ b/drivers/platform/x86/lenovo/wmi-helpers.c @@ -46,7 +46,6 @@ int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id, unsigned char *buf, size_t size, u32 *retval) { struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *ret_obj __free(kfree) = NULL; struct acpi_buffer input = { size, buf }; acpi_status status; @@ -55,8 +54,9 @@ int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id, if (ACPI_FAILURE(status)) return -EIO; + union acpi_object *ret_obj __free(kfree) = output.pointer; + if (retval) { - ret_obj = output.pointer; if (!ret_obj) return -ENODATA; -- cgit v1.2.3 From 55a279ae819adaea99a94c609f31970b70e0ec0c Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sun, 10 May 2026 04:25:32 +0000 Subject: platform/x86: lenovo-wmi-other: Balance IDA id allocation and free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the IDA id is only freed on wmi-other device removal or failure to create firmware-attributes device, kset, or attributes. It leaks IDA ids if the wmi-other device is bound multiple times, as the unbind callback never frees the previously allocated IDA id. Additionally, if the wmi-other device has failed to create a firmware-attributes device before it gets removed, the wmi-device removal callback double frees the same IDA id. These bugs were found by sashiko.dev [1]. Fix them by moving ida_free() into lwmi_om_fw_attr_remove() so it is balanced with ida_alloc() in lwmi_om_fw_attr_add(). With them fixed, properly set and utilize the validity of priv->ida_id to balance firmware-attributes registration and removal, without relying on propagating the registration error to the component framework, which is more reliable and aligns with the hwmon device registration and removal sequences. No functional change intended. Reviewed-by: Mark Pearson Fixes: edc4b183b794 ("platform/x86: Add Lenovo Other Mode WMI Driver") Cc: stable@vger.kernel.org Link: https://sashiko.dev/#/patchset/20260331181208.421552-1-derekjohn.clark%40gmail.com [1] Signed-off-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-3-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-other.c | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 6c2febe1a595..ebef3649d0a7 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -959,17 +959,17 @@ static struct capdata01_attr_group cd01_attr_groups[] = { /** * lwmi_om_fw_attr_add() - Register all firmware_attributes_class members * @priv: The Other Mode driver data. - * - * Return: Either 0, or an error code. */ -static int lwmi_om_fw_attr_add(struct lwmi_om_priv *priv) +static void lwmi_om_fw_attr_add(struct lwmi_om_priv *priv) { unsigned int i; int err; - priv->ida_id = ida_alloc(&lwmi_om_ida, GFP_KERNEL); - if (priv->ida_id < 0) - return priv->ida_id; + err = ida_alloc(&lwmi_om_ida, GFP_KERNEL); + if (err < 0) + goto err_no_ida; + + priv->ida_id = err; priv->fw_attr_dev = device_create(&firmware_attributes_class, NULL, MKDEV(0, 0), NULL, "%s-%u", @@ -995,7 +995,7 @@ static int lwmi_om_fw_attr_add(struct lwmi_om_priv *priv) cd01_attr_groups[i].tunable_attr->dev = &priv->wdev->dev; } - return 0; + return; err_remove_groups: while (i--) @@ -1009,7 +1009,12 @@ err_destroy_classdev: err_free_ida: ida_free(&lwmi_om_ida, priv->ida_id); - return err; + +err_no_ida: + priv->ida_id = -EIDRM; + + dev_warn(&priv->wdev->dev, + "failed to register firmware-attributes device: %d\n", err); } /** @@ -1018,12 +1023,17 @@ err_free_ida: */ static void lwmi_om_fw_attr_remove(struct lwmi_om_priv *priv) { + if (priv->ida_id < 0) + return; + for (unsigned int i = 0; i < ARRAY_SIZE(cd01_attr_groups) - 1; i++) sysfs_remove_group(&priv->fw_attr_kset->kobj, cd01_attr_groups[i].attr_group); kset_unregister(priv->fw_attr_kset); device_unregister(priv->fw_attr_dev); + ida_free(&lwmi_om_ida, priv->ida_id); + priv->ida_id = -EIDRM; } /* ======== Self (master: lenovo-wmi-other) ======== */ @@ -1065,7 +1075,9 @@ static int lwmi_om_master_bind(struct device *dev) lwmi_om_fan_info_collect_cd00(priv); - return lwmi_om_fw_attr_add(priv); + lwmi_om_fw_attr_add(priv); + + return 0; } /** @@ -1117,13 +1129,7 @@ static int lwmi_other_probe(struct wmi_device *wdev, const void *context) static void lwmi_other_remove(struct wmi_device *wdev) { - struct lwmi_om_priv *priv = dev_get_drvdata(&wdev->dev); - component_master_del(&wdev->dev, &lwmi_om_master_ops); - - /* No IDA to free if the driver is never bound to its components. */ - if (priv->ida_id >= 0) - ida_free(&lwmi_om_ida, priv->ida_id); } static const struct wmi_device_id lwmi_other_id_table[] = { -- cgit v1.2.3 From 2fe2504abcfa4f82a4208e8d0c21ec0f22baca43 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sun, 10 May 2026 04:25:33 +0000 Subject: platform/x86: lenovo-wmi-other: Balance component bind and unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When lwmi_om_master_bind() fails, the master device's components are left bound, with the aggregate device destroyed due to the failure (found by sashiko.dev [1]). Balance calls to component_bind_all() and component_unbind_all() when an error is propagated to the component framework. No functional change intended. Reviewed-by: Mark Pearson Reviewed-by: Ilpo Järvinen Fixes: edc4b183b794 ("platform/x86: Add Lenovo Other Mode WMI Driver") Cc: stable@vger.kernel.org Link: https://sashiko.dev/#/patchset/20260331181208.421552-1-derekjohn.clark%40gmail.com [1] Signed-off-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-4-derekjohn.clark@gmail.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-other.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index ebef3649d0a7..70fcb8406c27 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -1070,8 +1070,11 @@ static int lwmi_om_master_bind(struct device *dev) priv->cd00_list = binder.cd00_list; priv->cd01_list = binder.cd01_list; - if (!priv->cd00_list || !priv->cd01_list) + if (!priv->cd00_list || !priv->cd01_list) { + component_unbind_all(dev, NULL); + return -ENODEV; + } lwmi_om_fan_info_collect_cd00(priv); -- cgit v1.2.3 From 816fbd5dacee977ca56bab79bf97f71f2f7ac24e Mon Sep 17 00:00:00 2001 From: "Derek J. Clark" Date: Sun, 10 May 2026 04:25:34 +0000 Subject: platform/x86: lenovo-wmi-other: Zero initialize WMI arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds explicit initialization of wmi_method_args_32 declarations with zero values to prevent uninitialized data from being sent to the device BIOS when passed. No functional change intended. Reviewed-by: Mark Pearson Fixes: 22024ac5366f ("platform/x86: Add Lenovo Gamezone WMI Driver") Fixes: edc4b183b794 ("platform/x86: Add Lenovo Other Mode WMI Driver") Reported-by: Rong Zhang Closes: https://lore.kernel.org/platform-driver-x86/95c7e7b539dd0af41189c754fcd35cec5b6fe182.camel@rong.moe/ Cc: stable@vger.kernel.org Reviewed-by: Rong Zhang Tested-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-5-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-gamezone.c | 2 +- drivers/platform/x86/lenovo/wmi-other.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c index c7fe7e3c9f17..098239c9311a 100644 --- a/drivers/platform/x86/lenovo/wmi-gamezone.c +++ b/drivers/platform/x86/lenovo/wmi-gamezone.c @@ -201,7 +201,7 @@ static int lwmi_gz_profile_set(struct device *dev, enum platform_profile_option profile) { struct lwmi_gz_priv *priv = dev_get_drvdata(dev); - struct wmi_method_args_32 args; + struct wmi_method_args_32 args = {}; enum thermal_mode mode; int ret; diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 70fcb8406c27..c1b429269f89 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -166,7 +166,7 @@ MODULE_PARM_DESC(relax_fan_constraint, */ static int lwmi_om_fan_get_set(struct lwmi_om_priv *priv, int channel, u32 *val, bool set) { - struct wmi_method_args_32 args; + struct wmi_method_args_32 args = {}; u32 method_id, retval; int err; @@ -775,7 +775,7 @@ static ssize_t attr_current_value_store(struct kobject *kobj, struct tunable_attr_01 *tunable_attr) { struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev); - struct wmi_method_args_32 args; + struct wmi_method_args_32 args = {}; struct capdata01 capdata; enum thermal_mode mode; u32 attribute_id; @@ -838,7 +838,7 @@ static ssize_t attr_current_value_show(struct kobject *kobj, struct tunable_attr_01 *tunable_attr) { struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev); - struct wmi_method_args_32 args; + struct wmi_method_args_32 args = {}; enum thermal_mode mode; u32 attribute_id; int retval; -- cgit v1.2.3 From 71f3843e0f81e3c097a088c1121154bb9a44da0a Mon Sep 17 00:00:00 2001 From: "Derek J. Clark" Date: Sun, 10 May 2026 04:25:35 +0000 Subject: platform/x86: lenovo-wmi-other: Fix tunable_attr_01 struct members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In struct tunable_attr_01 the capdata pointer is unused and the size of the id members is u32 when it should be u8. Fix these prior to adding additional members. No functional change intended. Reviewed-by: Mark Pearson Cc: stable@vger.kernel.org Reviewed-by: Rong Zhang Tested-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-6-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-other.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index c1b429269f89..526075ff52d0 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -548,11 +548,10 @@ out: /* ======== fw_attributes (component: lenovo-wmi-capdata 01) ======== */ struct tunable_attr_01 { - struct capdata01 *capdata; struct device *dev; - u32 feature_id; - u32 device_id; - u32 type_id; + u8 feature_id; + u8 device_id; + u8 type_id; }; static struct tunable_attr_01 ppt_pl1_spl = { -- cgit v1.2.3 From e8d5460ad3fd22409f2566ecbe2c82d94aabc246 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sun, 10 May 2026 04:25:36 +0000 Subject: platform/x86: lenovo: Decouple lenovo-wmi-gamezone and lenovo-wmi-other MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, lenovo-wmi-gamezone depends on lenovo-wmi-other as the former imports symbols from the latter. The imported symbols are just used to register a notifier block. However, there is no runtime dependency between both drivers, and either of them can run without the other, which is the major purpose of using the notifier framework. Such a link-time dependency is non-optimal. A previous attempt to "fix" it made LENOVO_WMI_GAMEZONE select LENOVO_WMI_TUNING, which was fundamentally broken and resulted in undefined Kconfig behavior, as `select' cannot be used on a symbol with potentially unmet dependencies. Decouple both drivers by moving the thermal mode notifier chain to lenovo-wmi-helpers. Methods for notifier block (un)registration are exported for lenovo-wmi-gamezone, while a method for querying the current thermal mode are exported for lenovo-wmi-other. This turns the dependency graph from +------------ lenovo-wmi-gamezone | | v | lenovo-wmi-helpers | ^ | | V +------------ lenovo-wmi-other into +------------ lenovo-wmi-gamezone | v lenovo-wmi-helpers ^ | +------------ lenovo-wmi-other To make it clear, the name of the notifier chain is also renamed from `om_chain_head' to `tm_chain_head', indicating that it's used to query the current thermal mode. No functional change intended. Reviewed-by: Mark Pearson Fixes: 6e38b9fcbfa3 ("platform/x86: lenovo: gamezone needs "other mode"") Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603252259.gHvJDyh3-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202603260302.X0NjQOda-lkp@intel.com/ Signed-off-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-7-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/Kconfig | 1 - drivers/platform/x86/lenovo/wmi-gamezone.c | 4 +- drivers/platform/x86/lenovo/wmi-helpers.c | 101 ++++++++++++++++++++++++++++ drivers/platform/x86/lenovo/wmi-helpers.h | 8 +++ drivers/platform/x86/lenovo/wmi-other.c | 104 +---------------------------- drivers/platform/x86/lenovo/wmi-other.h | 16 ----- 6 files changed, 112 insertions(+), 122 deletions(-) delete mode 100644 drivers/platform/x86/lenovo/wmi-other.h diff --git a/drivers/platform/x86/lenovo/Kconfig b/drivers/platform/x86/lenovo/Kconfig index f885127b007f..09b1b055d2e0 100644 --- a/drivers/platform/x86/lenovo/Kconfig +++ b/drivers/platform/x86/lenovo/Kconfig @@ -252,7 +252,6 @@ config LENOVO_WMI_GAMEZONE select ACPI_PLATFORM_PROFILE select LENOVO_WMI_EVENTS select LENOVO_WMI_HELPERS - select LENOVO_WMI_TUNING help Say Y here if you have a WMI aware Lenovo Legion device and would like to use the platform-profile firmware interface to manage power usage. diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c index 098239c9311a..c28785d25673 100644 --- a/drivers/platform/x86/lenovo/wmi-gamezone.c +++ b/drivers/platform/x86/lenovo/wmi-gamezone.c @@ -23,7 +23,6 @@ #include "wmi-events.h" #include "wmi-gamezone.h" #include "wmi-helpers.h" -#include "wmi-other.h" #define LENOVO_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0" @@ -383,7 +382,7 @@ static int lwmi_gz_probe(struct wmi_device *wdev, const void *context) return ret; priv->mode_nb.notifier_call = lwmi_gz_mode_call; - return devm_lwmi_om_register_notifier(&wdev->dev, &priv->mode_nb); + return devm_lwmi_tm_register_notifier(&wdev->dev, &priv->mode_nb); } static const struct wmi_device_id lwmi_gz_id_table[] = { @@ -405,7 +404,6 @@ module_wmi_driver(lwmi_gz_driver); MODULE_IMPORT_NS("LENOVO_WMI_EVENTS"); MODULE_IMPORT_NS("LENOVO_WMI_HELPERS"); -MODULE_IMPORT_NS("LENOVO_WMI_OTHER"); MODULE_DEVICE_TABLE(wmi, lwmi_gz_id_table); MODULE_AUTHOR("Derek J. Clark "); MODULE_DESCRIPTION("Lenovo GameZone WMI Driver"); diff --git a/drivers/platform/x86/lenovo/wmi-helpers.c b/drivers/platform/x86/lenovo/wmi-helpers.c index 018d7642e2bd..7a198259e393 100644 --- a/drivers/platform/x86/lenovo/wmi-helpers.c +++ b/drivers/platform/x86/lenovo/wmi-helpers.c @@ -21,11 +21,15 @@ #include #include #include +#include #include #include #include "wmi-helpers.h" +/* Thermal mode notifier chain. */ +static BLOCKING_NOTIFIER_HEAD(tm_chain_head); + /** * lwmi_dev_evaluate_int() - Helper function for calling WMI methods that * return an integer. @@ -84,6 +88,103 @@ int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id, }; EXPORT_SYMBOL_NS_GPL(lwmi_dev_evaluate_int, "LENOVO_WMI_HELPERS"); +/** + * lwmi_tm_register_notifier() - Add a notifier to the blocking notifier chain + * @nb: The notifier_block struct to register + * + * Call blocking_notifier_chain_register to register the notifier block to the + * thermal mode notifier chain. + * + * Return: 0 on success, %-EEXIST on error. + */ +int lwmi_tm_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&tm_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(lwmi_tm_register_notifier, "LENOVO_WMI_HELPERS"); + +/** + * lwmi_tm_unregister_notifier() - Remove a notifier from the blocking notifier + * chain. + * @nb: The notifier_block struct to register + * + * Call blocking_notifier_chain_unregister to unregister the notifier block from the + * thermal mode notifier chain. + * + * Return: 0 on success, %-ENOENT on error. + */ +int lwmi_tm_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&tm_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(lwmi_tm_unregister_notifier, "LENOVO_WMI_HELPERS"); + +/** + * devm_lwmi_tm_unregister_notifier() - Remove a notifier from the blocking + * notifier chain. + * @data: Void pointer to the notifier_block struct to register. + * + * Call lwmi_tm_unregister_notifier to unregister the notifier block from the + * thermal mode notifier chain. + * + * Return: 0 on success, %-ENOENT on error. + */ +static void devm_lwmi_tm_unregister_notifier(void *data) +{ + struct notifier_block *nb = data; + + lwmi_tm_unregister_notifier(nb); +} + +/** + * devm_lwmi_tm_register_notifier() - Add a notifier to the blocking notifier + * chain. + * @dev: The parent device of the notifier_block struct. + * @nb: The notifier_block struct to register + * + * Call lwmi_tm_register_notifier to register the notifier block to the + * thermal mode notifier chain. Then add devm_lwmi_tm_unregister_notifier + * as a device managed action to automatically unregister the notifier block + * upon parent device removal. + * + * Return: 0 on success, or an error code. + */ +int devm_lwmi_tm_register_notifier(struct device *dev, + struct notifier_block *nb) +{ + int ret; + + ret = lwmi_tm_register_notifier(nb); + if (ret < 0) + return ret; + + return devm_add_action_or_reset(dev, devm_lwmi_tm_unregister_notifier, + nb); +} +EXPORT_SYMBOL_NS_GPL(devm_lwmi_tm_register_notifier, "LENOVO_WMI_HELPERS"); + +/** + * lwmi_tm_notifier_call() - Call functions for the notifier call chain. + * @mode: Pointer to a thermal mode enum to retrieve the data from. + * + * Call blocking_notifier_call_chain to retrieve the thermal mode from the + * lenovo-wmi-gamezone driver. + * + * Return: 0 on success, or an error code. + */ +int lwmi_tm_notifier_call(enum thermal_mode *mode) +{ + int ret; + + ret = blocking_notifier_call_chain(&tm_chain_head, + LWMI_GZ_GET_THERMAL_MODE, &mode); + if ((ret & ~NOTIFY_STOP_MASK) != NOTIFY_OK) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(lwmi_tm_notifier_call, "LENOVO_WMI_HELPERS"); + MODULE_AUTHOR("Derek J. Clark "); MODULE_DESCRIPTION("Lenovo WMI Helpers Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/lenovo/wmi-helpers.h b/drivers/platform/x86/lenovo/wmi-helpers.h index 20fd21749803..651a039228ed 100644 --- a/drivers/platform/x86/lenovo/wmi-helpers.h +++ b/drivers/platform/x86/lenovo/wmi-helpers.h @@ -7,6 +7,8 @@ #include +struct device; +struct notifier_block; struct wmi_device; struct wmi_method_args_32 { @@ -17,4 +19,10 @@ struct wmi_method_args_32 { int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id, unsigned char *buf, size_t size, u32 *retval); +int lwmi_tm_register_notifier(struct notifier_block *nb); +int lwmi_tm_unregister_notifier(struct notifier_block *nb); +int devm_lwmi_tm_register_notifier(struct device *dev, + struct notifier_block *nb); +int lwmi_tm_notifier_call(enum thermal_mode *mode); + #endif /* !_LENOVO_WMI_HELPERS_H_ */ diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 526075ff52d0..292fed8bcc80 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -49,7 +48,6 @@ #include "wmi-events.h" #include "wmi-gamezone.h" #include "wmi-helpers.h" -#include "wmi-other.h" #include "../firmware_attributes_class.h" #define LENOVO_OTHER_MODE_GUID "DC2A8805-3A8C-41BA-A6F7-092E0089CD3B" @@ -81,7 +79,6 @@ #define LWMI_OM_FW_ATTR_BASE_PATH "lenovo-wmi-other" #define LWMI_OM_HWMON_NAME "lenovo_wmi_other" -static BLOCKING_NOTIFIER_HEAD(om_chain_head); static DEFINE_IDA(lwmi_om_ida); enum attribute_property { @@ -109,7 +106,6 @@ struct lwmi_om_priv { struct device *hwmon_dev; struct device *fw_attr_dev; struct kset *fw_attr_kset; - struct notifier_block nb; struct wmi_device *wdev; int ida_id; @@ -577,102 +573,6 @@ struct capdata01_attr_group { struct tunable_attr_01 *tunable_attr; }; -/** - * lwmi_om_register_notifier() - Add a notifier to the blocking notifier chain - * @nb: The notifier_block struct to register - * - * Call blocking_notifier_chain_register to register the notifier block to the - * lenovo-wmi-other driver notifier chain. - * - * Return: 0 on success, %-EEXIST on error. - */ -int lwmi_om_register_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&om_chain_head, nb); -} -EXPORT_SYMBOL_NS_GPL(lwmi_om_register_notifier, "LENOVO_WMI_OTHER"); - -/** - * lwmi_om_unregister_notifier() - Remove a notifier from the blocking notifier - * chain. - * @nb: The notifier_block struct to register - * - * Call blocking_notifier_chain_unregister to unregister the notifier block from the - * lenovo-wmi-other driver notifier chain. - * - * Return: 0 on success, %-ENOENT on error. - */ -int lwmi_om_unregister_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&om_chain_head, nb); -} -EXPORT_SYMBOL_NS_GPL(lwmi_om_unregister_notifier, "LENOVO_WMI_OTHER"); - -/** - * devm_lwmi_om_unregister_notifier() - Remove a notifier from the blocking - * notifier chain. - * @data: Void pointer to the notifier_block struct to register. - * - * Call lwmi_om_unregister_notifier to unregister the notifier block from the - * lenovo-wmi-other driver notifier chain. - * - * Return: 0 on success, %-ENOENT on error. - */ -static void devm_lwmi_om_unregister_notifier(void *data) -{ - struct notifier_block *nb = data; - - lwmi_om_unregister_notifier(nb); -} - -/** - * devm_lwmi_om_register_notifier() - Add a notifier to the blocking notifier - * chain. - * @dev: The parent device of the notifier_block struct. - * @nb: The notifier_block struct to register - * - * Call lwmi_om_register_notifier to register the notifier block to the - * lenovo-wmi-other driver notifier chain. Then add devm_lwmi_om_unregister_notifier - * as a device managed action to automatically unregister the notifier block - * upon parent device removal. - * - * Return: 0 on success, or an error code. - */ -int devm_lwmi_om_register_notifier(struct device *dev, - struct notifier_block *nb) -{ - int ret; - - ret = lwmi_om_register_notifier(nb); - if (ret < 0) - return ret; - - return devm_add_action_or_reset(dev, devm_lwmi_om_unregister_notifier, - nb); -} -EXPORT_SYMBOL_NS_GPL(devm_lwmi_om_register_notifier, "LENOVO_WMI_OTHER"); - -/** - * lwmi_om_notifier_call() - Call functions for the notifier call chain. - * @mode: Pointer to a thermal mode enum to retrieve the data from. - * - * Call blocking_notifier_call_chain to retrieve the thermal mode from the - * lenovo-wmi-gamezone driver. - * - * Return: 0 on success, or an error code. - */ -static int lwmi_om_notifier_call(enum thermal_mode *mode) -{ - int ret; - - ret = blocking_notifier_call_chain(&om_chain_head, - LWMI_GZ_GET_THERMAL_MODE, &mode); - if ((ret & ~NOTIFY_STOP_MASK) != NOTIFY_OK) - return -EINVAL; - - return 0; -} - /* Attribute Methods */ /** @@ -781,7 +681,7 @@ static ssize_t attr_current_value_store(struct kobject *kobj, u32 value; int ret; - ret = lwmi_om_notifier_call(&mode); + ret = lwmi_tm_notifier_call(&mode); if (ret) return ret; @@ -843,7 +743,7 @@ static ssize_t attr_current_value_show(struct kobject *kobj, int retval; int ret; - ret = lwmi_om_notifier_call(&mode); + ret = lwmi_tm_notifier_call(&mode); if (ret) return ret; diff --git a/drivers/platform/x86/lenovo/wmi-other.h b/drivers/platform/x86/lenovo/wmi-other.h deleted file mode 100644 index 8ebf5602bb99..000000000000 --- a/drivers/platform/x86/lenovo/wmi-other.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -/* Copyright (C) 2025 Derek J. Clark */ - -#ifndef _LENOVO_WMI_OTHER_H_ -#define _LENOVO_WMI_OTHER_H_ - -struct device; -struct notifier_block; - -int lwmi_om_register_notifier(struct notifier_block *nb); -int lwmi_om_unregister_notifier(struct notifier_block *nb); -int devm_lwmi_om_register_notifier(struct device *dev, - struct notifier_block *nb); - -#endif /* !_LENOVO_WMI_OTHER_H_ */ -- cgit v1.2.3 From 7e27896e16a1c450085c3fe020eeb1b223880f37 Mon Sep 17 00:00:00 2001 From: "Derek J. Clark" Date: Sun, 10 May 2026 04:25:37 +0000 Subject: platform/x86: lenovo-wmi-helpers: Move gamezone enums to wmi-helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a later patch in the series the thermal mode enum will be accessed across three separate drivers (wmi-capdata, wmi-gamezonem and wmi-other). An additional patch in the series will also add a function prototype that needs to reference this enum in wmi-helpers.h. To avoid having all these drivers begin to import each others headers, and to avoid declaring an opaque enum to hande the second case, move the thermal mode enum to helpers where it can be safely accessed by everything that needs it from a single import. While at it, since the gamezone_events_type enum is the only remaining item in the header, move that as well and remove the gamezone header entirely. Cc: stable@vger.kernel.org Reviewed-by: Mark Pearson Reviewed-by: Rong Zhang Tested-by: Rong Zhang Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-8-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-events.c | 2 +- drivers/platform/x86/lenovo/wmi-gamezone.c | 1 - drivers/platform/x86/lenovo/wmi-gamezone.h | 20 -------------------- drivers/platform/x86/lenovo/wmi-helpers.h | 13 +++++++++++++ drivers/platform/x86/lenovo/wmi-other.c | 1 - 5 files changed, 14 insertions(+), 23 deletions(-) delete mode 100644 drivers/platform/x86/lenovo/wmi-gamezone.h diff --git a/drivers/platform/x86/lenovo/wmi-events.c b/drivers/platform/x86/lenovo/wmi-events.c index 4a6a2c82413a..fc25bba68a7c 100644 --- a/drivers/platform/x86/lenovo/wmi-events.c +++ b/drivers/platform/x86/lenovo/wmi-events.c @@ -17,7 +17,7 @@ #include #include "wmi-events.h" -#include "wmi-gamezone.h" +#include "wmi-helpers.h" #define THERMAL_MODE_EVENT_GUID "D320289E-8FEA-41E0-86F9-911D83151B5F" diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c index c28785d25673..109c0b564a9f 100644 --- a/drivers/platform/x86/lenovo/wmi-gamezone.c +++ b/drivers/platform/x86/lenovo/wmi-gamezone.c @@ -21,7 +21,6 @@ #include #include "wmi-events.h" -#include "wmi-gamezone.h" #include "wmi-helpers.h" #define LENOVO_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0" diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.h b/drivers/platform/x86/lenovo/wmi-gamezone.h deleted file mode 100644 index 6b163a5eeb95..000000000000 --- a/drivers/platform/x86/lenovo/wmi-gamezone.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -/* Copyright (C) 2025 Derek J. Clark */ - -#ifndef _LENOVO_WMI_GAMEZONE_H_ -#define _LENOVO_WMI_GAMEZONE_H_ - -enum gamezone_events_type { - LWMI_GZ_GET_THERMAL_MODE = 1, -}; - -enum thermal_mode { - LWMI_GZ_THERMAL_MODE_QUIET = 0x01, - LWMI_GZ_THERMAL_MODE_BALANCED = 0x02, - LWMI_GZ_THERMAL_MODE_PERFORMANCE = 0x03, - LWMI_GZ_THERMAL_MODE_EXTREME = 0xE0, /* Ver 6+ */ - LWMI_GZ_THERMAL_MODE_CUSTOM = 0xFF, -}; - -#endif /* !_LENOVO_WMI_GAMEZONE_H_ */ diff --git a/drivers/platform/x86/lenovo/wmi-helpers.h b/drivers/platform/x86/lenovo/wmi-helpers.h index 651a039228ed..ed7db3ebba6c 100644 --- a/drivers/platform/x86/lenovo/wmi-helpers.h +++ b/drivers/platform/x86/lenovo/wmi-helpers.h @@ -16,6 +16,19 @@ struct wmi_method_args_32 { u32 arg1; }; +enum lwmi_event_type { + LWMI_GZ_GET_THERMAL_MODE = 0x01, +}; + +enum thermal_mode { + LWMI_GZ_THERMAL_MODE_NONE = 0x00, + LWMI_GZ_THERMAL_MODE_QUIET = 0x01, + LWMI_GZ_THERMAL_MODE_BALANCED = 0x02, + LWMI_GZ_THERMAL_MODE_PERFORMANCE = 0x03, + LWMI_GZ_THERMAL_MODE_EXTREME = 0xE0, /* Ver 6+ */ + LWMI_GZ_THERMAL_MODE_CUSTOM = 0xFF, +}; + int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id, unsigned char *buf, size_t size, u32 *retval); diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 292fed8bcc80..19f48aeda228 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -46,7 +46,6 @@ #include "wmi-capdata.h" #include "wmi-events.h" -#include "wmi-gamezone.h" #include "wmi-helpers.h" #include "../firmware_attributes_class.h" -- cgit v1.2.3 From 30a4ad208a7f7bdb790cd31d368595890045334f Mon Sep 17 00:00:00 2001 From: "Derek J. Clark" Date: Sun, 10 May 2026 04:25:38 +0000 Subject: platform/x86: lenovo-wmi-other: Add Attribute ID helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds lwmi_attr_id() function. In the same vein as LWMI_ATTR_ID_FAN_RPM(), but as a generic, to de-duplicate attribute_id assignment boilerplate. Adds tunable_attr_01_id() function that breaks out the members of a tunable_attr_01 struct and passes them to lwmi_attr_id(). No functional change intended. Cc: stable@vger.kernel.org Reviewed-by: Rong Zhang Tested-by: Rong Zhang Reviewed-by: Mark Pearson Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-9-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-capdata.c | 8 ++--- drivers/platform/x86/lenovo/wmi-capdata.h | 20 +++++++++++++ drivers/platform/x86/lenovo/wmi-other.c | 49 +++++++++++++------------------ 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/platform/x86/lenovo/wmi-capdata.c b/drivers/platform/x86/lenovo/wmi-capdata.c index b73d378f0e8b..714aa6fd6f1f 100644 --- a/drivers/platform/x86/lenovo/wmi-capdata.c +++ b/drivers/platform/x86/lenovo/wmi-capdata.c @@ -27,7 +27,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -48,6 +47,7 @@ #include #include "wmi-capdata.h" +#include "wmi-helpers.h" #define LENOVO_CAPABILITY_DATA_00_GUID "362A3AFE-3D96-4665-8530-96DAD5BB300E" #define LENOVO_CAPABILITY_DATA_01_GUID "7A8F5407-CB67-4D6E-B547-39B3BE018154" @@ -57,9 +57,9 @@ #define LWMI_FEATURE_ID_FAN_TEST 0x05 -#define LWMI_ATTR_ID_FAN_TEST \ - (FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, LWMI_DEVICE_ID_FAN) | \ - FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, LWMI_FEATURE_ID_FAN_TEST)) +#define LWMI_ATTR_ID_FAN_TEST \ + lwmi_attr_id(LWMI_DEVICE_ID_FAN, LWMI_FEATURE_ID_FAN_TEST, \ + LWMI_GZ_THERMAL_MODE_NONE, LWMI_TYPE_ID_NONE) enum lwmi_cd_type { LENOVO_CAPABILITY_DATA_00, diff --git a/drivers/platform/x86/lenovo/wmi-capdata.h b/drivers/platform/x86/lenovo/wmi-capdata.h index 8c1df3efcc55..c3e760b8c3c3 100644 --- a/drivers/platform/x86/lenovo/wmi-capdata.h +++ b/drivers/platform/x86/lenovo/wmi-capdata.h @@ -6,6 +6,7 @@ #define _LENOVO_WMI_CAPDATA_H_ #include +#include #include #define LWMI_SUPP_VALID BIT(0) @@ -19,6 +20,8 @@ #define LWMI_DEVICE_ID_FAN 0x04 +#define LWMI_TYPE_ID_NONE 0x00 + struct component_match; struct device; struct cd_list; @@ -57,6 +60,23 @@ struct lwmi_cd_binder { cd_list_cb_t cd_fan_list_cb; }; +/** + * lwmi_attr_id() - Formats a capability data attribute ID + * @dev_id: The u8 corresponding to the device ID. + * @feat_id: The u8 corresponding to the feature ID on the device. + * @mode_id: The u8 corresponding to the wmi-gamezone mode for set/get. + * @type_id: The u8 corresponding to the sub-device. + * + * Return: encoded capability data attribute ID. + */ +static inline u32 lwmi_attr_id(u8 dev_id, u8 feat_id, u8 mode_id, u8 type_id) +{ + return (FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, dev_id) | + FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, feat_id) | + FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, mode_id) | + FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, type_id)); +} + void lwmi_cd_match_add_all(struct device *master, struct component_match **matchptr); int lwmi_cd00_get_data(struct cd_list *list, u32 attribute_id, struct capdata00 *output); int lwmi_cd01_get_data(struct cd_list *list, u32 attribute_id, struct capdata01 *output); diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index 19f48aeda228..a06b188eadac 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -59,8 +59,6 @@ #define LWMI_FEATURE_ID_FAN_RPM 0x03 -#define LWMI_TYPE_ID_NONE 0x00 - #define LWMI_FEATURE_VALUE_GET 17 #define LWMI_FEATURE_VALUE_SET 18 @@ -68,13 +66,12 @@ #define LWMI_FAN_NR 4 #define LWMI_FAN_ID(x) ((x) + LWMI_FAN_ID_BASE) -#define LWMI_ATTR_ID_FAN_RPM(x) \ - (FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, LWMI_DEVICE_ID_FAN) | \ - FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, LWMI_FEATURE_ID_FAN_RPM) | \ - FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, LWMI_FAN_ID(x))) - #define LWMI_FAN_DIV 100 +#define LWMI_ATTR_ID_FAN_RPM(x) \ + lwmi_attr_id(LWMI_DEVICE_ID_FAN, LWMI_FEATURE_ID_FAN_RPM, \ + LWMI_GZ_THERMAL_MODE_NONE, LWMI_FAN_ID(x)) + #define LWMI_OM_FW_ATTR_BASE_PATH "lenovo-wmi-other" #define LWMI_OM_HWMON_NAME "lenovo_wmi_other" @@ -549,6 +546,18 @@ struct tunable_attr_01 { u8 type_id; }; +/** + * tunable_attr_01_id() - Formats a tunable_attr_01 to a capdata attribute ID + * @attr: The tunable_attr_01 to format. + * @mode: The u8 corresponding to the wmi-gamezone mode for set/get. + * + * Return: encoded capability data attribute ID. + */ +static u32 tunable_attr_01_id(struct tunable_attr_01 *attr, u8 mode) +{ + return lwmi_attr_id(attr->device_id, attr->feature_id, mode, attr->type_id); +} + static struct tunable_attr_01 ppt_pl1_spl = { .device_id = LWMI_DEVICE_ID_CPU, .feature_id = LWMI_FEATURE_ID_CPU_SPL, @@ -616,12 +625,7 @@ static ssize_t attr_capdata01_show(struct kobject *kobj, u32 attribute_id; int value, ret; - attribute_id = - FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, tunable_attr->device_id) | - FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, tunable_attr->feature_id) | - FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, - LWMI_GZ_THERMAL_MODE_CUSTOM) | - FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, tunable_attr->type_id); + attribute_id = tunable_attr_01_id(tunable_attr, LWMI_GZ_THERMAL_MODE_CUSTOM); ret = lwmi_cd01_get_data(priv->cd01_list, attribute_id, &capdata); if (ret) @@ -676,7 +680,6 @@ static ssize_t attr_current_value_store(struct kobject *kobj, struct wmi_method_args_32 args = {}; struct capdata01 capdata; enum thermal_mode mode; - u32 attribute_id; u32 value; int ret; @@ -687,13 +690,9 @@ static ssize_t attr_current_value_store(struct kobject *kobj, if (mode != LWMI_GZ_THERMAL_MODE_CUSTOM) return -EBUSY; - attribute_id = - FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, tunable_attr->device_id) | - FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, tunable_attr->feature_id) | - FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, mode) | - FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, tunable_attr->type_id); + args.arg0 = tunable_attr_01_id(tunable_attr, mode); - ret = lwmi_cd01_get_data(priv->cd01_list, attribute_id, &capdata); + ret = lwmi_cd01_get_data(priv->cd01_list, args.arg0, &capdata); if (ret) return ret; @@ -704,7 +703,6 @@ static ssize_t attr_current_value_store(struct kobject *kobj, if (value < capdata.min_value || value > capdata.max_value) return -EINVAL; - args.arg0 = attribute_id; args.arg1 = value; ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_SET, @@ -738,7 +736,6 @@ static ssize_t attr_current_value_show(struct kobject *kobj, struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev); struct wmi_method_args_32 args = {}; enum thermal_mode mode; - u32 attribute_id; int retval; int ret; @@ -746,13 +743,7 @@ static ssize_t attr_current_value_show(struct kobject *kobj, if (ret) return ret; - attribute_id = - FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, tunable_attr->device_id) | - FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, tunable_attr->feature_id) | - FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, mode) | - FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, tunable_attr->type_id); - - args.arg0 = attribute_id; + args.arg0 = tunable_attr_01_id(tunable_attr, mode); ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_GET, (unsigned char *)&args, sizeof(args), -- cgit v1.2.3 From 03bb5147da083cb91e5c8c2599fcb2f8fd05cb8f Mon Sep 17 00:00:00 2001 From: "Derek J. Clark" Date: Sun, 10 May 2026 04:25:39 +0000 Subject: platform/x86: lenovo-wmi-other: Limit adding attributes to supported devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds lwmi_is_attr_01_supported, and only creates the attribute subfolder if the attribute is supported by the hardware. Due to some poorly implemented BIOS this is a multi-step sequence of events. This is because: - Some BIOS support getting the capability data from custom mode (0xff), while others only support it in no-mode (0x00). - Some BIOS support get/set for the current value from custom mode (0xff), while others only support it in no-mode (0x00). - Some BIOS report capability data for a method that is not fully implemented. - Some BIOS have methods fully implemented, but no complimentary capability data. To ensure we only expose fully implemented methods with corresponding capability data, we check each outcome before reporting that an attribute can be supported. Checking for lwmi_is_attr_01_supported during remove is not done to ensure that we don't attempt to call cd01 or send WMI events if one of the interfaces being removed was the cause of the driver unloading. Fixes: edc4b183b794 ("platform/x86: Add Lenovo Other Mode WMI Driver") Reported-by: Kurt Borja Closes: https://lore.kernel.org/platform-driver-x86/DG60P3SHXR8H.3NSEHMZ6J7XRC@gmail.com/ Cc: stable@vger.kernel.org Reviewed-by: Rong Zhang Tested-by: Rong Zhang Reviewed-by: Mark Pearson Signed-off-by: Derek J. Clark Link: https://patch.msgid.link/20260510042546.436874-10-derekjohn.clark@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lenovo/wmi-other.c | 92 +++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c index a06b188eadac..d318ba432fdc 100644 --- a/drivers/platform/x86/lenovo/wmi-other.c +++ b/drivers/platform/x86/lenovo/wmi-other.c @@ -544,6 +544,8 @@ struct tunable_attr_01 { u8 feature_id; u8 device_id; u8 type_id; + u8 cd_mode_id; /* mode arg for searching capdata */ + u8 cv_mode_id; /* mode arg for set/get current_value */ }; /** @@ -625,7 +627,7 @@ static ssize_t attr_capdata01_show(struct kobject *kobj, u32 attribute_id; int value, ret; - attribute_id = tunable_attr_01_id(tunable_attr, LWMI_GZ_THERMAL_MODE_CUSTOM); + attribute_id = tunable_attr_01_id(tunable_attr, tunable_attr->cd_mode_id); ret = lwmi_cd01_get_data(priv->cd01_list, attribute_id, &capdata); if (ret) @@ -690,7 +692,7 @@ static ssize_t attr_current_value_store(struct kobject *kobj, if (mode != LWMI_GZ_THERMAL_MODE_CUSTOM) return -EBUSY; - args.arg0 = tunable_attr_01_id(tunable_attr, mode); + args.arg0 = tunable_attr_01_id(tunable_attr, tunable_attr->cd_mode_id); ret = lwmi_cd01_get_data(priv->cd01_list, args.arg0, &capdata); if (ret) @@ -703,6 +705,7 @@ static ssize_t attr_current_value_store(struct kobject *kobj, if (value < capdata.min_value || value > capdata.max_value) return -EINVAL; + args.arg0 = tunable_attr_01_id(tunable_attr, tunable_attr->cv_mode_id); args.arg1 = value; ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_SET, @@ -743,6 +746,10 @@ static ssize_t attr_current_value_show(struct kobject *kobj, if (ret) return ret; + /* If "no-mode" is the supported mode, ensure we never send current mode */ + if (tunable_attr->cv_mode_id == LWMI_GZ_THERMAL_MODE_NONE) + mode = tunable_attr->cv_mode_id; + args.arg0 = tunable_attr_01_id(tunable_attr, mode); ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_GET, @@ -754,6 +761,81 @@ static ssize_t attr_current_value_show(struct kobject *kobj, return sysfs_emit(buf, "%d\n", retval); } +/** + * lwmi_attr_01_is_supported() - Determine if the given attribute is supported. + * @tunable_attr: The attribute to verify. + * + * For an attribute to be supported it must have a functional get/set method, + * as well as associated capability data stored in the capdata01 table. + * + * First check if the attribute has a corresponding data table under custom mode + * (0xff), then under no mode (0x00). If either of those passes, check if the + * supported field of the capdata struct is > 0. If it is supported, store the + * successful mode in the cd_mode_id field of tunable_attr. + * + * If the attribute capdata shows it is supported, attempt to determine the mode + * for the current value property get/set methods using a similar pattern to the + * capdata table check. If the value returned by either mode is 0 or an error, + * assume that mode is not supported. Otherwise, store the successful mode in the + * cv_mode_id field of tunable_attr. + * + * If any of the above checks fail then the attribute is not fully supported. + * + * Return: true if capdata and set/get modes are found, otherwise false. + */ +static bool lwmi_attr_01_is_supported(struct tunable_attr_01 *tunable_attr) +{ + u8 modes[2] = { LWMI_GZ_THERMAL_MODE_CUSTOM, LWMI_GZ_THERMAL_MODE_NONE }; + struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev); + struct wmi_method_args_32 args = {}; + bool cd_mode_found = false; + bool cv_mode_found = false; + struct capdata01 capdata; + int retval, ret, i; + + /* Determine tunable_attr->cd_mode_id */ + for (i = 0; i < ARRAY_SIZE(modes); i++) { + args.arg0 = tunable_attr_01_id(tunable_attr, modes[i]); + + ret = lwmi_cd01_get_data(priv->cd01_list, args.arg0, &capdata); + if (ret || !capdata.supported) + continue; + + tunable_attr->cd_mode_id = modes[i]; + cd_mode_found = true; + break; + } + + if (!cd_mode_found) + return cd_mode_found; + + dev_dbg(tunable_attr->dev, + "cd_mode_id: %#010x\n", args.arg0); + + /* Determine tunable_attr->cv_mode_id, returns 1 if supported */ + for (i = 0; i < ARRAY_SIZE(modes); i++) { + args.arg0 = tunable_attr_01_id(tunable_attr, modes[i]); + + ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_GET, + (u8 *)&args, sizeof(args), + &retval); + if (ret || !retval) + continue; + + tunable_attr->cv_mode_id = modes[i]; + cv_mode_found = true; + break; + } + + if (!cv_mode_found) + return cv_mode_found; + + dev_dbg(tunable_attr->dev, "cv_mode_id: %#010x, attribute support level: %#010x\n", + args.arg0, capdata.supported); + + return capdata.supported > 0; +} + /* Lenovo WMI Other Mode Attribute macros */ #define __LWMI_ATTR_RO(_func, _name) \ { \ @@ -877,12 +959,14 @@ static void lwmi_om_fw_attr_add(struct lwmi_om_priv *priv) } for (i = 0; i < ARRAY_SIZE(cd01_attr_groups) - 1; i++) { + cd01_attr_groups[i].tunable_attr->dev = &priv->wdev->dev; + if (!lwmi_attr_01_is_supported(cd01_attr_groups[i].tunable_attr)) + continue; + err = sysfs_create_group(&priv->fw_attr_kset->kobj, cd01_attr_groups[i].attr_group); if (err) goto err_remove_groups; - - cd01_attr_groups[i].tunable_attr->dev = &priv->wdev->dev; } return; -- cgit v1.2.3 From a3bf0f28d4ba16e1f35f8c983bb04426b87e2a78 Mon Sep 17 00:00:00 2001 From: Junyoung Jang Date: Mon, 4 May 2026 20:26:49 +0900 Subject: fs/statmount: fix slab out-of-bounds write in statmount_mnt_idmap statmount_mnt_idmap() writes one mapping with seq_printf() and then manually advances seq->count to include the NUL separator. If seq_printf() overflows, seq_set_overflow() sets seq->count to seq->size. The manual seq->count++ changes this to seq->size + 1. seq_has_overflowed() then no longer detects the overflow. The corrupted count returns to statmount_string(), which later executes: seq->buf[seq->count++] = '\0'; This causes a 1-byte NULL out-of-bounds write on the dynamically allocated seq buffer. Fix this by checking for overflow immediately after seq_printf(). Fixes: 37c4a9590e1e ("statmount: allow to retrieve idmappings") Signed-off-by: Junyoung Jang Link: https://patch.msgid.link/20260504112649.1862936-1-graypanda.inzag@gmail.com Signed-off-by: Christian Brauner --- fs/mnt_idmapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 6472c4ea3d1e..cb61fbdb52e9 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -375,6 +375,8 @@ int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_ continue; seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); + if (seq_has_overflowed(seq)) + return -EAGAIN; seq->count++; /* mappings are separated by \0 */ if (seq_has_overflowed(seq)) -- cgit v1.2.3 From a7cf1da7ac016490d6a1106f2aa6b602d34e9a12 Mon Sep 17 00:00:00 2001 From: Hongling Zeng Date: Fri, 1 May 2026 15:10:58 +0800 Subject: fs: Fix return in jfs_mkdir and orangefs_mkdir Return NULL instead of passing to ERR_PTR while err is zero Fixes these smatch warnings: - fs/jfs/namei.c:311 jfs_mkdir() warn: passing zero to 'ERR_PTR' - fs/orangefs/namei.c:369 orangefs_mkdir() warn: passing zero to 'ERR_PTR' Fixes: 88d5baf69082 ("Change inode_operations.mkdir to return struct dentry *") Signed-off-by: Hongling Zeng Link: https://patch.msgid.link/20260501071058.1243245-1-zenghongling@kylinos.cn Reviewed-by: Jori Koolstra Signed-off-by: Christian Brauner --- fs/jfs/namei.c | 2 +- fs/orangefs/namei.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 60c4a0e0fca5..442d62679262 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -309,7 +309,7 @@ static struct dentry *jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip, out1: jfs_info("jfs_mkdir: rc:%d", rc); - return ERR_PTR(rc); + return rc ? ERR_PTR(rc) : NULL; } /* diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index bec5475de094..75e65e72c2d6 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -362,7 +362,7 @@ static struct dentry *orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir, __orangefs_setattr(dir, &iattr); out: op_release(new_op); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : NULL; } static int orangefs_rename(struct mnt_idmap *idmap, -- cgit v1.2.3 From c3880a7b10e487e033dc6f388bda118436566f7a Mon Sep 17 00:00:00 2001 From: Junxi Qian Date: Wed, 6 May 2026 20:24:15 +0800 Subject: fuse: fix writeback array overflow when max_pages is one fuse_iomap_writeback_range() appends one folio pointer and one fuse_folio_desc for every dirty range that is merged into the current writeback request. The merge decision checks the byte budget against fc->max_pages and fc->max_write, but it does not check whether the folio and descriptor arrays still have another free slot. This is not sufficient for fuseblk, where the filesystem block size can be smaller than PAGE_SIZE. With writeback cache enabled and max_pages negotiated as one, contiguous sub-page dirty ranges can fit within the byte budget while spanning more than one folio. The next append can then write past the one-slot folios and descs arrays. Split the request when the number of already attached folios has reached fc->max_pages. This keeps the folio/descriptor slot accounting in sync with the send decision. Fixes: ef7e7cbb323f ("fuse: use iomap for writeback") Cc: stable@vger.kernel.org Reviewed-by: Joanne Koong Signed-off-by: Junxi Qian Link: https://patch.msgid.link/20260506122415.205340-1-qjx1298677004@gmail.com Acked-by: Miklos Szeredi Signed-off-by: Christian Brauner --- fs/fuse/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c59452d60b8d..f94f3dc082c6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2176,7 +2176,10 @@ static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos, WARN_ON(!ap->num_folios); - /* Reached max pages */ + /* Reached max pages or max folio slots */ + if (ap->num_folios >= fc->max_pages) + return true; + if (DIV_ROUND_UP(bytes, PAGE_SIZE) > fc->max_pages) return true; -- cgit v1.2.3 From dec85d2fbd20de3711a71e65397dfdb40c3fa953 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 6 May 2026 09:37:02 +0000 Subject: irqchip/gic-v5: Move LPI allocation into the LPI domain The IPI and ITS MSI domains currently allocate and release LPIs directly, then pass the selected LPI ID to the parent LPI domain. This leaks the LPI domain's allocation policy into its child domains and forces each child to duplicate part of the parent domain's teardown. Make the LPI domain allocate LPIs in its .alloc() callback and release them in a matching .free() callback. Child domains can then request a parent interrupt without passing an implementation-specific LPI ID, and the LPI lifetime is tied to the domain that owns the LPI namespace. Remove the gicv5_alloc_lpi() and gicv5_free_lpi() wrappers now that no external caller needs to manage LPIs directly. This is a preparatory change for an actual leakage problem in the allocation code and therefore tagged with the same Fixes tag. Fixes: 0f0101325876 ("irqchip/gic-v5: Add GICv5 LPI/IPI support") Signed-off-by: Sascha Bischoff Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260506093634.382062-2-sascha.bischoff@arm.com --- drivers/irqchip/irq-gic-v5-its.c | 14 ++-------- drivers/irqchip/irq-gic-v5.c | 53 +++++++++++++++++++------------------- include/linux/irqchip/arm-gic-v5.h | 3 --- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c index 36a8d1368f0e..36d03f82ef68 100644 --- a/drivers/irqchip/irq-gic-v5-its.c +++ b/drivers/irqchip/irq-gic-v5-its.c @@ -929,8 +929,8 @@ static void gicv5_its_free_eventid(struct gicv5_its_dev *its_dev, u32 event_id_b static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { - u32 device_id, event_id_base, lpi; struct gicv5_its_dev *its_dev; + u32 device_id, event_id_base; msi_alloc_info_t *info = arg; irq_hw_number_t hwirq; struct irq_data *irqd; @@ -949,16 +949,8 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi device_id = its_dev->device_id; for (i = 0; i < nr_irqs; i++) { - ret = gicv5_alloc_lpi(); - if (ret < 0) { - pr_debug("Failed to find free LPI!\n"); - goto out_free_irqs; - } - lpi = ret; - - ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi); + ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, NULL); if (ret) { - gicv5_free_lpi(lpi); goto out_free_irqs; } @@ -983,7 +975,6 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi out_free_irqs: while (--i >= 0) { irqd = irq_domain_get_irq_data(domain, virq + i); - gicv5_free_lpi(irqd->parent_data->hwirq); irq_domain_reset_irq_data(irqd); irq_domain_free_irqs_parent(domain, virq + i, 1); } @@ -1013,7 +1004,6 @@ static void gicv5_its_irq_domain_free(struct irq_domain *domain, unsigned int vi for (i = 0; i < nr_irqs; i++) { d = irq_domain_get_irq_data(domain, virq + i); - gicv5_free_lpi(d->parent_data->hwirq); irq_domain_reset_irq_data(d); irq_domain_free_irqs_parent(domain, virq + i, 1); } diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c index 6b0903be8ebf..15a2a04398d2 100644 --- a/drivers/irqchip/irq-gic-v5.c +++ b/drivers/irqchip/irq-gic-v5.c @@ -59,16 +59,6 @@ static void release_lpi(u32 lpi) ida_free(&lpi_ida, lpi); } -int gicv5_alloc_lpi(void) -{ - return alloc_lpi(); -} - -void gicv5_free_lpi(u32 lpi) -{ - release_lpi(lpi); -} - static void gicv5_ppi_priority_init(void) { write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_PRI_MI), SYS_ICC_PPI_PRIORITYR0_EL1); @@ -806,18 +796,36 @@ static void gicv5_lpi_config_reset(struct irq_data *d) gicv5_lpi_irq_write_pending_state(d, false); } +static void gicv5_irq_lpi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d; + + if (WARN_ON_ONCE(nr_irqs != 1)) + return; + + d = irq_domain_get_irq_data(domain, virq); + + release_lpi(d->hwirq); + + irq_set_handler(virq, NULL); + irq_domain_reset_irq_data(d); +} + static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { irq_hw_number_t hwirq; struct irq_data *irqd; - u32 *lpi = arg; int ret; if (WARN_ON_ONCE(nr_irqs != 1)) return -EINVAL; - hwirq = *lpi; + ret = alloc_lpi(); + if (ret < 0) + return ret; + hwirq = ret; irqd = irq_domain_get_irq_data(domain, virq); @@ -826,8 +834,10 @@ static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int vi irqd_set_single_target(irqd); ret = gicv5_irs_iste_alloc(hwirq); - if (ret < 0) + if (ret < 0) { + release_lpi(hwirq); return ret; + } gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_LPI); gicv5_lpi_config_reset(irqd); @@ -837,7 +847,7 @@ static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int vi static const struct irq_domain_ops gicv5_irq_lpi_domain_ops = { .alloc = gicv5_irq_lpi_domain_alloc, - .free = gicv5_irq_domain_free, + .free = gicv5_irq_lpi_domain_free, }; void __init gicv5_init_lpi_domain(void) @@ -859,21 +869,12 @@ static int gicv5_irq_ipi_domain_alloc(struct irq_domain *domain, unsigned int vi { struct irq_data *irqd; int ret, i; - u32 lpi; for (i = 0; i < nr_irqs; i++) { - ret = gicv5_alloc_lpi(); - if (ret < 0) + ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, NULL); + if (ret) return ret; - lpi = ret; - - ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi); - if (ret) { - gicv5_free_lpi(lpi); - return ret; - } - irqd = irq_domain_get_irq_data(domain, virq + i); irq_domain_set_hwirq_and_chip(domain, virq + i, i, @@ -899,8 +900,6 @@ static void gicv5_irq_ipi_domain_free(struct irq_domain *domain, unsigned int vi if (!d) return; - gicv5_free_lpi(d->parent_data->hwirq); - irq_set_handler(virq + i, NULL); irq_domain_reset_irq_data(d); irq_domain_free_irqs_parent(domain, virq + i, 1); diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h index 40d2fce68294..f78787e654f4 100644 --- a/include/linux/irqchip/arm-gic-v5.h +++ b/include/linux/irqchip/arm-gic-v5.h @@ -425,9 +425,6 @@ struct gicv5_its_itt_cfg { void gicv5_init_lpis(u32 max); void gicv5_deinit_lpis(void); -int gicv5_alloc_lpi(void); -void gicv5_free_lpi(u32 lpi); - void __init gicv5_its_of_probe(struct device_node *parent); void __init gicv5_its_acpi_probe(void); #endif -- cgit v1.2.3 From eb6f6d523813ead9dc2799194a2839d42c049734 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 6 May 2026 09:37:23 +0000 Subject: irqchip/gic-v5: Support range allocation for LPIs The per-IPI parent allocation loop returns immediately on failure and leaks any parent interrupts allocated by earlier iterations. The GICv5 LPI domain now owns LPI allocation and teardown internally, but its irq_domain callbacks still reject requests where nr_irqs is greater than one. This forces child domains to allocate and free LPIs one at a time even when the interrupt core requests a contiguous range. Handle multi-interrupt allocation and teardown in the LPI domain by iterating over the requested range and unwinding any partially allocated state on failure. Allocate the parent LPIs for the IPI domain with a single range request as well, which cures the leakage problem. Fixes: 0f0101325876 ("irqchip/gic-v5: Add GICv5 LPI/IPI support") Signed-off-by: Sascha Bischoff Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260506093634.382062-3-sascha.bischoff@arm.com --- drivers/irqchip/irq-gic-v5.c | 77 ++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c index 15a2a04398d2..c1af07083cef 100644 --- a/drivers/irqchip/irq-gic-v5.c +++ b/drivers/irqchip/irq-gic-v5.c @@ -801,15 +801,14 @@ static void gicv5_irq_lpi_domain_free(struct irq_domain *domain, unsigned int vi { struct irq_data *d; - if (WARN_ON_ONCE(nr_irqs != 1)) - return; - - d = irq_domain_get_irq_data(domain, virq); + for (unsigned int i = 0; i < nr_irqs; i++, virq++) { + d = irq_domain_get_irq_data(domain, virq); - release_lpi(d->hwirq); + release_lpi(d->hwirq); - irq_set_handler(virq, NULL); - irq_domain_reset_irq_data(d); + irq_set_handler(virq, NULL); + irq_domain_reset_irq_data(d); + } } static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int virq, @@ -817,32 +816,39 @@ static int gicv5_irq_lpi_domain_alloc(struct irq_domain *domain, unsigned int vi { irq_hw_number_t hwirq; struct irq_data *irqd; + unsigned int i; int ret; - if (WARN_ON_ONCE(nr_irqs != 1)) - return -EINVAL; - - ret = alloc_lpi(); - if (ret < 0) - return ret; - hwirq = ret; + for (i = 0; i < nr_irqs; i++) { + ret = alloc_lpi(); + if (ret < 0) + goto out_free_lpis; + hwirq = ret; + + ret = gicv5_irs_iste_alloc(hwirq); + if (ret < 0) { + /* Undo partial state first, then clean up the rest */ + release_lpi(hwirq); + goto out_free_lpis; + } - irqd = irq_domain_get_irq_data(domain, virq); + irqd = irq_domain_get_irq_data(domain, virq + i); - irq_domain_set_info(domain, virq, hwirq, &gicv5_lpi_irq_chip, NULL, - handle_fasteoi_irq, NULL, NULL); - irqd_set_single_target(irqd); + irq_domain_set_info(domain, virq + i, hwirq, &gicv5_lpi_irq_chip, + NULL, handle_fasteoi_irq, NULL, NULL); + irqd_set_single_target(irqd); - ret = gicv5_irs_iste_alloc(hwirq); - if (ret < 0) { - release_lpi(hwirq); - return ret; + gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_LPI); + gicv5_lpi_config_reset(irqd); } - gicv5_hwirq_init(hwirq, GICV5_IRQ_PRI_MI, GICV5_HWIRQ_TYPE_LPI); - gicv5_lpi_config_reset(irqd); - return 0; + +out_free_lpis: + if (i) + gicv5_irq_lpi_domain_free(domain, virq, i); + + return ret; } static const struct irq_domain_ops gicv5_irq_lpi_domain_ops = { @@ -868,21 +874,21 @@ static int gicv5_irq_ipi_domain_alloc(struct irq_domain *domain, unsigned int vi unsigned int nr_irqs, void *arg) { struct irq_data *irqd; - int ret, i; + int ret; - for (i = 0; i < nr_irqs; i++) { - ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, NULL); - if (ret) - return ret; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret) + return ret; - irqd = irq_domain_get_irq_data(domain, virq + i); + for (unsigned int i = 0; i < nr_irqs; i++, virq++) { + irqd = irq_domain_get_irq_data(domain, virq); - irq_domain_set_hwirq_and_chip(domain, virq + i, i, - &gicv5_ipi_irq_chip, NULL); + irq_domain_set_hwirq_and_chip(domain, virq, i, + &gicv5_ipi_irq_chip, NULL); irqd_set_single_target(irqd); - irq_set_handler(virq + i, handle_percpu_irq); + irq_set_handler(virq, handle_percpu_irq); } return 0; @@ -902,8 +908,9 @@ static void gicv5_irq_ipi_domain_free(struct irq_domain *domain, unsigned int vi irq_set_handler(virq + i, NULL); irq_domain_reset_irq_data(d); - irq_domain_free_irqs_parent(domain, virq + i, 1); } + + irq_domain_free_irqs_parent(domain, virq, nr_irqs); } static const struct irq_domain_ops gicv5_irq_ipi_domain_ops = { -- cgit v1.2.3 From a7c7e42654b6a8676610ee09d22901432c4851af Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 6 May 2026 09:37:43 +0000 Subject: irqchip/gic-v5: Allocate ITS parent LPIs as a range The ITS MSI domain no longer manages LPI allocation directly. LPIs are allocated and freed by the parent LPI domain, which can now handle a full range of interrupts and unwind partial allocations internally. Make the ITS domain request and release the parent IRQs as a single range instead of iterating over each interrupt. The ITS allocation path then only needs to reserve EventIDs, allocate the parent range, and fill in the ITS irq_data for each MSI. Since no operation in the per-MSI loop can fail, the partial parent-free unwind becomes unnecessary. On teardown, reset the ITS irq_data for the range and then release the parent range in one call, leaving LPI teardown to the LPI domain. Fixes: 0f0101325876 ("irqchip/gic-v5: Add GICv5 LPI/IPI support") Signed-off-by: Sascha Bischoff Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260506093634.382062-4-sascha.bischoff@arm.com --- drivers/irqchip/irq-gic-v5-its.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c index 36d03f82ef68..28e39b065de0 100644 --- a/drivers/irqchip/irq-gic-v5-its.c +++ b/drivers/irqchip/irq-gic-v5-its.c @@ -937,6 +937,7 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi int ret, i; its_dev = info->scratchpad[0].ptr; + device_id = its_dev->device_id; ret = gicv5_its_alloc_eventid(its_dev, info, nr_irqs, &event_id_base); if (ret) @@ -946,14 +947,11 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi if (ret) goto out_eventid; - device_id = its_dev->device_id; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, NULL); + if (ret) + goto out_eventid; for (i = 0; i < nr_irqs; i++) { - ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, NULL); - if (ret) { - goto out_free_irqs; - } - /* * Store eventid and deviceid into the hwirq for later use. * @@ -972,12 +970,6 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi return 0; -out_free_irqs: - while (--i >= 0) { - irqd = irq_domain_get_irq_data(domain, virq + i); - irq_domain_reset_irq_data(irqd); - irq_domain_free_irqs_parent(domain, virq + i, 1); - } out_eventid: gicv5_its_free_eventid(its_dev, event_id_base, nr_irqs); return ret; @@ -1000,14 +992,14 @@ static void gicv5_its_irq_domain_free(struct irq_domain *domain, unsigned int vi bitmap_release_region(its_dev->event_map, event_id_base, get_count_order(nr_irqs)); - /* Hierarchically free irq data */ for (i = 0; i < nr_irqs; i++) { d = irq_domain_get_irq_data(domain, virq + i); - irq_domain_reset_irq_data(d); - irq_domain_free_irqs_parent(domain, virq + i, 1); } + /* Hierarchically free irq data */ + irq_domain_free_irqs_parent(domain, virq, nr_irqs); + gicv5_its_syncr(its, its_dev); gicv5_irs_syncr(); } -- cgit v1.2.3 From 512718bbc51b851140380b7068ec7365bd039cba Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 7 May 2026 12:05:18 +0100 Subject: genirq/chip: Don't call add_interrupt_randomness() for NMIs Recently handle_percpu_devid_irq() was changed to call add_interrupt_randomness(). This introduced a potential deadlock when handle_percpu_devid_irq() is used to handle an NMI, which can be detected with lockdep, e.g. ================================ WARNING: inconsistent lock state 7.1.0-rc2-pnmi #465 Not tainted -------------------------------- inconsistent {INITIAL USE} -> {IN-NMI} usage. perf/695 [HC1[1]:SC0[0]:HE0:SE1] takes: ffff00837dfd3a18 (&base->lock){-.-.}-{2:2}, at: lock_timer_base+0x6c/0xac {INITIAL USE} state was registered at: _raw_spin_lock_irqsave+0x68/0xb0 lock_timer_base+0x6c/0xac __mod_timer+0x100/0x32c add_timer_global+0x2c/0x40 __queue_delayed_work+0xf0/0x140 queue_delayed_work_on+0x134/0x138 mem_cgroup_css_online+0x30c/0x310 online_css+0x34/0x10c cgroup_init_subsys+0x158/0x1c8 cgroup_init+0x440/0x524 start_kernel+0x888/0x998 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&base->lock); lock(&base->lock); *** DEADLOCK *** Call trace: _raw_spin_lock_irqsave+0x68/0xb0 lock_timer_base+0x6c/0xac add_timer_on+0x78/0x16c add_interrupt_randomness+0x124/0x134 handle_percpu_devid_irq+0xd4/0x16c handle_irq_desc+0x40/0x58 generic_handle_domain_nmi+0x28/0x50 __gic_handle_nmi.isra.0+0x4c/0xa0 gic_handle_irq+0x38/0x2bc call_on_irq_stack+0x30/0x48 do_interrupt_handler+0x80/0x98 el1_interrupt+0x90/0xac el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x80/0x84 [...] During review, Thomas pointed out it wouldn't be safe for handle_percpu_devid_irq() to call add_interrupt_randomness() if it was used to handle NMIs: https://lore.kernel.org/lkml/87bjgik042.ffs@tglx/ ... but evidently people missed that handle_percpu_devid_irq() *is* used for NMIs. While it might seem that NMIs should be handled with a separate handle_percpu_devid_nmi() function, for various structural reasons this was impractical, and handle_percpu_devid_irq() has been expected to be used for NMIs since commits: 21bbbc50f398f ("irqchip/gic-v3: Switch high priority PPIs over to handle_percpu_devid_irq()") 5ff78c8de9d83 ("genirq: Kill handle_percpu_devid_fasteoi_nmi()") Taking the above into account, avoid the deadlock by not calling add_interrupt_randomness() when handle_percpu_devid_irq() is called in an NMI context. This is consistent with other NNI handling flows, which do not call add_interrupt_randomness(). At the same time, update the kernel-doc comment to make it clear that handle_percpu_devid_irq() can be called in NMI context. The rest of handle_percpu_devid_irq() is currently NMI safe and doesn't need to change. Fixes: fd7400cfcbaa ("genirq/chip: Invoke add_interrupt_randomness() in handle_percpu_devid_irq()") Reported-by: Ada Couprie Diaz Signed-off-by: Mark Rutland Signed-off-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Reviewed-by: Marc Zyngier Link: https://patch.msgid.link/20260507110518.3128248-1-mark.rutland@arm.com --- kernel/irq/chip.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6c9b1dc4e7d4..b635e3c5d5b6 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -893,7 +894,10 @@ void handle_percpu_irq(struct irq_desc *desc) * * action->percpu_dev_id is a pointer to percpu variables which * contain the real device id for the cpu on which this handler is - * called + * called. + * + * May be used for NMI interrupt lines, and so may be called in IRQ or NMI + * context. */ void handle_percpu_devid_irq(struct irq_desc *desc) { @@ -930,7 +934,8 @@ void handle_percpu_devid_irq(struct irq_desc *desc) enabled ? " and unmasked" : "", irq, cpu); } - add_interrupt_randomness(irq); + if (!in_nmi()) + add_interrupt_randomness(irq); if (chip->irq_eoi) chip->irq_eoi(&desc->irq_data); -- cgit v1.2.3 From 0fa10fb77069fb67aa51384868ef3702b7791465 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Wed, 6 May 2026 01:55:22 -0700 Subject: irqchip/ath79-cpu: Remove unused function ath79_cpu_irq_init() was part of the legacy pre-OF code that got removed a while back. Remove it to get rid of a missing prototype warning, reported by the kernel test robot. [ tglx: Fix the subject prefix. Sigh ... ] Fixes: 51fa4f8912c0 ("MIPS: ath79: drop legacy IRQ code") Reported-by: kernel test robot Signed-off-by: Rosen Penev Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260506085522.1210143-1-rosenp@gmail.com Closes: https://lore.kernel.org/oe-kbuild-all/202412011509.kGQkDr1y-lkp@intel.com/ --- drivers/irqchip/irq-ath79-cpu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/irqchip/irq-ath79-cpu.c b/drivers/irqchip/irq-ath79-cpu.c index 923e4bba3776..9b7273a7f8ce 100644 --- a/drivers/irqchip/irq-ath79-cpu.c +++ b/drivers/irqchip/irq-ath79-cpu.c @@ -85,10 +85,3 @@ static int __init ar79_cpu_intc_of_init( } IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", ar79_cpu_intc_of_init); - -void __init ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3) -{ - irq_wb_chan[2] = irq_wb_chan2; - irq_wb_chan[3] = irq_wb_chan3; - mips_cpu_irq_init(); -} -- cgit v1.2.3 From 5363b67ac8ebcc3e227dbf59fc8061949109841d Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Fri, 8 May 2026 07:36:54 +0000 Subject: irqchip/meson-gpio: Use the correct register in meson_s4_gpio_irq_set_type() meson_s4_gpio_irq_set_type() uses the both-edge trigger register for configuring level type and single edge mode interrupts, which is not correct. Use REG_EDGE_POL instead. Fixes: bbd6fcc76b39 ("irqchip: Add support for Amlogic A4 and A5 SoCs") Signed-off-by: Xianwei Zhao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260508-a9-gpio-irqchip-v1-1-9dc5f3e022e0@amlogic.com --- drivers/irqchip/irq-meson-gpio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index f722e9c57e2e..74a376ef452e 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -415,8 +415,7 @@ static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl, if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) val |= BIT(ctl->params->edge_single_offset + idx); - meson_gpio_irq_update_bits(ctl, params->edge_pol_reg, - BIT(idx) | BIT(12 + idx), val); + meson_gpio_irq_update_bits(ctl, REG_EDGE_POL, BIT(idx) | BIT(12 + idx), val); return 0; }; -- cgit v1.2.3 From cefafbd561402b0fe6447449364a30315b9b1570 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 8 May 2026 02:31:21 -0700 Subject: irqchip/riscv-imsic: Clear interrupt move state during CPU offlining Affinity changes of IMSIC interrupts have to be careful to not lose an interrupt in the process. Each vector keeps track of an affinity change in progress with two pointers in struct imsic_vector. imsic_vector::move_prev points to the previous CPU target data and imsic_vector::move_next to the designated new CPU target data. imsic_vector::move_prev on the new CPU can only be cleared after the previous CPU has cleared imsic_vector::move_next, which ususally happens in __imsic_remote_sync(). In case of CPU hot-unplug __imsic_remote_sync() is not invoked because the CPU is already marked offline. That means imsic_vector::move_prev becomes stale until the CPU is onlined again. The stale pointer prevents further affinity changes for the affected interrupts. Solve this by clearing the imsic_vector::move_prev pointers in the CPU hotplug offline path. [ tglx: Replace word salad in change log ] Fixes: 0f67911e821c ("irqchip/riscv-imsic: Separate next and previous pointers in IMSIC vector") Signed-off-by: Yong-Xuan Wang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260508-imsic-v2-1-e9f08dd46cf5@sifive.com --- drivers/irqchip/irq-riscv-imsic-early.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index ba903fa689bd..a7a1852b548c 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -158,6 +158,8 @@ static int imsic_dying_cpu(unsigned int cpu) /* Cleanup IPIs */ imsic_ipi_dying_cpu(); + imsic_local_sync_all(false); + /* Mark per-CPU IMSIC state as offline */ imsic_state_offline(); -- cgit v1.2.3 From 834e98acb748025c04fed3cac9c8954454f4b520 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Mon, 11 May 2026 13:19:18 +0200 Subject: fs: fix forced iversion increment on lazytime timestamp updates When updating timestamps with lazytime enabled, if only I_DIRTY_TIME is set (pure lazytime update), inode_maybe_inc_iversion() should not be forced to increment i_version. The force parameter should only be true when actual data or metadata changes require an iversion bump. The current code uses "!!dirty" which evaluates to true whenever dirty has any bits set, including the I_DIRTY_TIME bit alone. This forces an iversion increment on every lazytime timestamp update, which then sets I_DIRTY_SYNC, triggering expensive log flushes on subsequent fdatasync calls. Andres reported this issue when he noticed a perf regression[1]. Fix this by using "dirty != I_DIRTY_TIME" as the force parameter. This passes false for pure lazytime updates (allowing the I_VERSION_QUERIED optimization to work), while still forcing the increment when dirty contains other flags indicating real changes that require iversion updates. [1] https://lore.kernel.org/linux-xfs/7ys6erh3nnyeerv2nybyfvp7dmaknuxrlxv74wx56ocdothkc6@ekfiadtkfn2r/ Fixes: 85c871a02b03 ("fs: add support for non-blocking timestamp updates") Signed-off-by: Pankaj Raghav Link: https://patch.msgid.link/20260511111918.1793689-1-p.raghav@samsung.com Reviewed-by: Jeff Layton Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Christian Brauner --- fs/inode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 6a3cbc7dcd28..62c579a0cf7d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2124,7 +2124,13 @@ static int inode_update_cmtime(struct inode *inode, unsigned int flags) inode_iversion_need_inc(inode)) return -EAGAIN; } else { - if (inode_maybe_inc_iversion(inode, !!dirty)) + /* + * Don't force iversion increment for pure lazytime + * updates (I_DIRTY_TIME only), let I_VERSION_QUERIED + * dictate whether the increment is needed. + */ + if (inode_maybe_inc_iversion(inode, + dirty != I_DIRTY_TIME)) dirty |= I_DIRTY_SYNC; } } -- cgit v1.2.3 From 3799c2570982577551023ae035f5a786cf39a76e Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Sun, 10 May 2026 16:41:19 +0800 Subject: io_uring/fdinfo: translate SqThread PID through caller's pid_ns SQPOLL stores current->pid (init_pid_ns view) in sqd->task_pid at thread creation. fdinfo prints it raw via seq_printf("SqThread:\t%d\n", sq_pid). A reader inside a non-initial pid_ns sees the host PID, not the kthread's PID in the reader's own pid_ns. The SQPOLL kthread is created with CLONE_THREAD and no CLONE_NEW*, so it lives in the submitter's pid_ns. An unprivileged user_ns + pid_ns submitter can read fdinfo and learn the host PID of a kthread whose in-namespace PID is different. Reproducer (mainline 7.0, KASAN): unshare CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS, mount a private /proc, then have a grandchild that is pid 1 in the new pid_ns open an io_uring ring with IORING_SETUP_SQPOLL. /proc/self/task lists {1, 2}; the SQPOLL kthread is pid 2. Before: fdinfo prints SqThread = . After: SqThread = 2. Use task_pid_nr_ns() against the proc inode's pid_ns to compute sq_pid, instead of reading the stored sq->task_pid (which holds the init_pid_ns view). pidfd_show_fdinfo() in kernel/pid.c follows the same pattern. Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260510084119.457578-1-maoyi.xie@ntu.edu.sg Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index c2d3e45544bb..001fb542dc11 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -190,8 +190,9 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) get_task_struct(tsk); rcu_read_unlock(); usec = io_sq_cpu_usec(tsk); + sq_pid = task_pid_nr_ns(tsk, + proc_pid_ns(file_inode(m->file)->i_sb)); put_task_struct(tsk); - sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; sq_total_time = usec; sq_work_time = sq->work_time; -- cgit v1.2.3 From 1860c2f85922917d8a46f16a6f4bd2298ffa0fb5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 10 May 2026 22:48:43 +0800 Subject: ublk: reject max_sectors smaller than PAGE_SECTORS in parameter validation blk_validate_limits() requires max_hw_sectors >= PAGE_SECTORS and fires a WARN_ON_ONCE if this invariant is violated. ublk_validate_params() only checked the upper bound of max_sectors against max_io_buf_bytes, allowing userspace to pass small values (including zero) that trigger the warning when blk_mq_alloc_disk() is called from ublk_ctrl_start_dev(). Before 494ea040bcb5, ublk used blk_queue_max_hw_sectors() which silently clamped small values up to PAGE_SECTORS. The conversion to passing queue_limits directly to blk_mq_alloc_disk() lost that clamping and now hits blk_validate_limits()'s WARN_ON_ONCE instead. Validate that max_sectors is at least PAGE_SECTORS in ublk_validate_params() so invalid values are rejected early with -EINVAL instead of reaching the block layer. Fixes: 494ea040bcb5 ("ublk: pass queue_limits to blk_mq_alloc_disk") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260510144843.769031-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6d13f1481de0..6c041eaebdb9 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -920,6 +920,9 @@ static int ublk_validate_params(const struct ublk_device *ub) if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9)) return -EINVAL; + if (p->max_sectors < PAGE_SECTORS) + return -EINVAL; + if (ublk_dev_is_zoned(ub) && !p->chunk_sectors) return -EINVAL; } else -- cgit v1.2.3 From 725ecd80688bf3c57ca9205431f2c06174ff0756 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Thu, 7 May 2026 19:23:01 +0800 Subject: nsfs: fix wrong error code returned for pidns ioctls When executing NS_GET_PID_FROM_PIDNS (or similar pidns ioctls), if the target task cannot be found in the corresponding pid_ns, the error code should be ESRCH instead of ENOTTY. This bug was introduced when the extensible ioctl handling was added. Without proper return, ret would be overwritten by the default case in the extensible ioctl switch statement. Fixes: a1d220d9dafa8 ("nsfs: iterate through mount namespaces") Signed-off-by: Zhihao Cheng Link: https://patch.msgid.link/20260507112301.1042757-1-chengzhihao1@huawei.com Reviewed-by: Yang Erkun Signed-off-by: Christian Brauner --- fs/nsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 51e8c9430477..160018c4fb36 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -266,7 +266,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, else tsk = find_task_by_pid_ns(arg, pid_ns); if (!tsk) - break; + return ret; switch (ioctl) { case NS_GET_PID_FROM_PIDNS: -- cgit v1.2.3 From 99269799bf2448aebccee164df56c22a7b85b02c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 5 May 2026 12:34:33 +0200 Subject: s390/pai: Fix missing PAI counter increments under heavy load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Machines with a larger number of CPUs and under heavy load sometimes loose PAI counter increments during recording using events -e CRYPTO_ÂLL or -e NNPA_ALL. Counting is not affected. This happens when several PAI crypto counters are incremented during the same cryptographic operation. During schedule out the functions paiXXX_sched_task() (with XXX either crypt or ext) +--> pai_have_samples() +--> pai_have_sample() +--> pai_copy() +--> pai_push_sample() are called to read out PAI counter values. In pai_copy() the current values of PAI counters are read from the PMU memory mapped page and compared to the values read during last schedule out operation, which have been saved in a backup page named PAI_SAVE_AREA(event). For each PAI counter a delta is calculated and when the delta is positive, that PAI counter was incremented by hardware. This positve delta is reported as raw data record attached to a sample. After all deltas have been calculated, the new PAI counter values are saved in the backup page PAI_SAVE_AREA(event). However this is done in pai_push_sample(), leaving a small window for missing hardware triggered updates. Here is one scenario: PAI counter idx: 0 1 2 3 4 5 6 7 .... N +---+---+---+---+---+---+---+---+ +---+ PAI counter page:| | | X | | | | | |....| Y | +---+---+---+---+---+---+---+---+ +---+ In pai_copy() each PAI counter value is read and compared to its old value. This is done in a loop. When PAI counter indexed N is read, the hardware might increment PAI counter indexed 2 again, updating its value from X to X+1. Later pai_push_sample() simply mem-copies the complete PAI counter page to a backup page and the increment of X+1 is lost, because the backup page now contains the new value. Read each PAI counter and save this value in the backup page when there is a positive delta. This omits any time window between read and store. This also reduced the work load as only modified PAI counters are saved. Cc: stable@vger.kernel.org Fixes: fe861b0c8d06 ("s390/pai: save PAI counter value page in event structure") Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c index f13c5c5fbea6..cdb8006220ca 100644 --- a/arch/s390/kernel/perf_pai.c +++ b/arch/s390/kernel/perf_pai.c @@ -186,6 +186,13 @@ static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset) return page[nr]; } +static void pai_setctr(unsigned long *page, int nr, unsigned long offset, u64 v) +{ + if (offset) + nr += offset / sizeof(*page); + page[nr] = v; +} + /* Read the counter values. Return value from location in CMP. For base * event xxx_ALL sum up all events. Returns counter value. */ @@ -551,6 +558,8 @@ static void paicrypt_del(struct perf_event *event, int flags) /* Create raw data and save it in buffer. Calculate the delta for each * counter between this invocation and the last invocation. * Returns number of bytes copied. + * After reading from PAI counter page, save the read value to the old + * page to calculate PAI counter deltas. * Saves only entries with positive counter difference of the form * 2 bytes: Number of counter * 8 bytes: Value of counter @@ -562,16 +571,22 @@ static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page, int i, outidx = 0; for (i = 1; i <= pp->num_avail; i++) { - u64 val = 0, val_old = 0; + u64 val = 0, val_old = 0, val_k = 0, val_old_k = 0; if (!exclude_kernel) { - val += pai_getctr(page, i, pp->kernel_offset); - val_old += pai_getctr(page_old, i, pp->kernel_offset); + val_k = pai_getctr(page, i, pp->kernel_offset); + val_old_k = pai_getctr(page_old, i, pp->kernel_offset); + if (val_k != val_old_k) + pai_setctr(page_old, i, pp->kernel_offset, val_k); } if (!exclude_user) { - val += pai_getctr(page, i, 0); - val_old += pai_getctr(page_old, i, 0); + val = pai_getctr(page, i, 0); + val_old = pai_getctr(page_old, i, 0); + if (val != val_old) + pai_setctr(page_old, i, 0, val); } + val += val_k; + val_old += val_old_k; if (val >= val_old) val -= val_old; else @@ -602,8 +617,6 @@ static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page, static int pai_push_sample(size_t rawsize, struct pai_map *cpump, struct perf_event *event) { - int idx = PAI_PMU_IDX(event); - struct pai_pmu *pp = &pai_pmu[idx]; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; @@ -634,8 +647,6 @@ static int pai_push_sample(size_t rawsize, struct pai_map *cpump, overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); - /* Save crypto counter lowcore page after reading event data. */ - memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size); return overflow; } -- cgit v1.2.3 From 2997606dd17729404cef9821ce66dd037b6019eb Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Fri, 8 May 2026 09:09:56 +0200 Subject: platform/x86: asus-nb-wmi: add DMI quirk for ASUS Zenbook Duo UX8407AA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the existing zenbook duo keyboard quirk for the UX8407AA model too. Signed-off-by: Paolo Pisati Reviewed-by: Denis Benato Link: https://patch.msgid.link/20260508070956.62201-1-p.pisati@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-nb-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index b4677c5bba5b..8005c088e9ee 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -544,6 +544,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_zenbook_duo_kbd, }, + { + .callback = dmi_matched, + .ident = "ASUS Zenbook Duo UX8407AA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUS"), + DMI_MATCH(DMI_PRODUCT_NAME, "Zenbook Duo UX8407AA"), + }, + .driver_data = &quirk_asus_zenbook_duo_kbd, + }, { .callback = dmi_matched, .ident = "ASUS ROG Z13", -- cgit v1.2.3 From ea34567db0a6b3a7ce78ba421592344315c8f90e Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 7 May 2026 16:27:08 +0200 Subject: s390/cio: Restore GFP_DMA for CHSC allocation Re-add GFP_DMA when allocating memory for CHSC control blocks. On some supported machines, CHSC cannot access memory outside the DMA zone, causing CHSC command failures. Cc: stable@vger.kernel.org Fixes: a3a64a4def8d ("s390/cio: remove unneeded DMA zone allocation") Signed-off-by: Peter Oberparleiter Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- drivers/s390/cio/chsc.c | 4 ++-- drivers/s390/cio/chsc_sch.c | 20 ++++++++++---------- drivers/s390/cio/scm.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index fbb58edd6274..9689f722c863 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1142,8 +1142,8 @@ int __init chsc_init(void) { int ret; - sei_page = (void *)get_zeroed_page(GFP_KERNEL); - chsc_page = (void *)get_zeroed_page(GFP_KERNEL); + sei_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + chsc_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sei_page || !chsc_page) { ret = -ENOMEM; goto out_err; diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 73413417a2ce..b6cb8bb8bcc4 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -292,7 +292,7 @@ static int chsc_ioctl_start(void __user *user_area) if (!css_general_characteristics.dynio) /* It makes no sense to try. */ return -EOPNOTSUPP; - chsc_area = (void *)get_zeroed_page(GFP_KERNEL); + chsc_area = (void *)get_zeroed_page(GFP_DMA | GFP_KERNEL); if (!chsc_area) return -ENOMEM; request = kzalloc_obj(*request); @@ -340,7 +340,7 @@ static int chsc_ioctl_on_close_set(void __user *user_area) ret = -ENOMEM; goto out_unlock; } - on_close_chsc_area = (void *)get_zeroed_page(GFP_KERNEL); + on_close_chsc_area = (void *)get_zeroed_page(GFP_DMA | GFP_KERNEL); if (!on_close_chsc_area) { ret = -ENOMEM; goto out_free_request; @@ -392,7 +392,7 @@ static int chsc_ioctl_start_sync(void __user *user_area) struct chsc_sync_area *chsc_area; int ret, ccode; - chsc_area = (void *)get_zeroed_page(GFP_KERNEL); + chsc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!chsc_area) return -ENOMEM; if (copy_from_user(chsc_area, user_area, PAGE_SIZE)) { @@ -438,7 +438,7 @@ static int chsc_ioctl_info_channel_path(void __user *user_cd) u8 data[PAGE_SIZE - 20]; } __attribute__ ((packed)) *scpcd_area; - scpcd_area = (void *)get_zeroed_page(GFP_KERNEL); + scpcd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!scpcd_area) return -ENOMEM; cd = kzalloc_obj(*cd); @@ -500,7 +500,7 @@ static int chsc_ioctl_info_cu(void __user *user_cd) u8 data[PAGE_SIZE - 20]; } __attribute__ ((packed)) *scucd_area; - scucd_area = (void *)get_zeroed_page(GFP_KERNEL); + scucd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!scucd_area) return -ENOMEM; cd = kzalloc_obj(*cd); @@ -563,7 +563,7 @@ static int chsc_ioctl_info_sch_cu(void __user *user_cud) u8 data[PAGE_SIZE - 20]; } __attribute__ ((packed)) *sscud_area; - sscud_area = (void *)get_zeroed_page(GFP_KERNEL); + sscud_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sscud_area) return -ENOMEM; cud = kzalloc_obj(*cud); @@ -625,7 +625,7 @@ static int chsc_ioctl_conf_info(void __user *user_ci) u8 data[PAGE_SIZE - 20]; } __attribute__ ((packed)) *sci_area; - sci_area = (void *)get_zeroed_page(GFP_KERNEL); + sci_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sci_area) return -ENOMEM; ci = kzalloc_obj(*ci); @@ -696,7 +696,7 @@ static int chsc_ioctl_conf_comp_list(void __user *user_ccl) u32 res; } __attribute__ ((packed)) *cssids_parm; - sccl_area = (void *)get_zeroed_page(GFP_KERNEL); + sccl_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sccl_area) return -ENOMEM; ccl = kzalloc_obj(*ccl); @@ -756,7 +756,7 @@ static int chsc_ioctl_chpd(void __user *user_chpd) int ret; chpd = kzalloc_obj(*chpd); - scpd_area = (void *)get_zeroed_page(GFP_KERNEL); + scpd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!scpd_area || !chpd) { ret = -ENOMEM; goto out_free; @@ -796,7 +796,7 @@ static int chsc_ioctl_dcal(void __user *user_dcal) u8 data[PAGE_SIZE - 36]; } __attribute__ ((packed)) *sdcal_area; - sdcal_area = (void *)get_zeroed_page(GFP_KERNEL); + sdcal_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sdcal_area) return -ENOMEM; dcal = kzalloc_obj(*dcal); diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c index d13ed1011c03..171212a6d2d9 100644 --- a/drivers/s390/cio/scm.c +++ b/drivers/s390/cio/scm.c @@ -229,7 +229,7 @@ int scm_update_information(void) size_t num; int ret; - scm_info = (void *)__get_free_page(GFP_KERNEL); + scm_info = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); if (!scm_info) return -ENOMEM; -- cgit v1.2.3 From 91840be8f710370607f949a627e070896faeddb8 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 30 Mar 2026 15:32:29 +0800 Subject: irq_work: Fix use-after-free in irq_work_single() on PREEMPT_RT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On PREEMPT_RT, non-HARD irq_work runs in per-CPU kthreads via run_irq_workd(), so irq_work_sync() uses rcuwait() to wait for BUSY==0. After irq_work_single() clears BUSY via atomic_cmpxchg(), it still dereferences @work for irq_work_is_hard() and rcuwait_wake_up(). An irq_work_sync() caller on another CPU that enters after BUSY is cleared can observe BUSY==0 immediately, return, and free the work before those accesses complete — causing a use-after-free. Fix this by wrapping run_irq_workd() in guard(rcu)() so that the entire irq_work_single() execution is within an RCU read-side critical section. Then add synchronize_rcu() in irq_work_sync() after rcuwait_wait_event() to ensure the caller waits for the RCU grace period before returning, preventing premature frees. Fixes: 810979682ccc ("irq_work: Allow irq_work_sync() to sleep if irq_work() no IRQ support.") Suggested-by: Sebastian Andrzej Siewior Suggested-by: Steven Rostedt Signed-off-by: Jiayuan Chen Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20260330073234.303732-1-jiayuan.chen@linux.dev --- kernel/irq_work.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 120fd7365fbe..f7e2dc2c30c6 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -292,6 +292,12 @@ void irq_work_sync(struct irq_work *work) !arch_irq_work_has_interrupt()) { rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), TASK_UNINTERRUPTIBLE); + /* + * Ensure irq_work_single() does not access @work + * after removing IRQ_WORK_BUSY. It is always + * accessed within a RCU-read section. + */ + synchronize_rcu(); return; } @@ -302,6 +308,7 @@ EXPORT_SYMBOL_GPL(irq_work_sync); static void run_irq_workd(unsigned int cpu) { + guard(rcu)(); irq_work_run_list(this_cpu_ptr(&lazy_list)); } -- cgit v1.2.3 From 2beaa98b46c4cc90ed8a674f27a586d7f547bbe5 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Mon, 11 May 2026 02:11:14 +0900 Subject: ntfs: restore $MFT mirror contents check check_mft_mirror() still computes the number of bytes to validate in each mirrored MFT record, but the actual comparison against $MFTMirr was dropped when the superblock code was updated. As a result, mount misses a stale or inconsistent $MFTMirr as long as both records pass the structural baad-record checks. Restore the comparison and log an error when the primary $MFT record differs from its mirror copy. Returning false lets the existing mount error handling mark the volume as having NTFS errors and, with on_errors=remount-ro, continue read-only. The default on_errors=continue mount policy still allows the mount to proceed. Fixes: 6251f0b0de7d ("ntfs: update super block operations") Signed-off-by: DaeMyung Kang Signed-off-by: Namjae Jeon --- fs/ntfs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index d282cf6e712e..9e321cc2febe 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -993,6 +993,13 @@ mft_unmap_out: ntfs_is_baad_recordp((__le32 *)kmirr)) bytes = vol->mft_record_size; } + /* Compare the two records. */ + if (memcmp(kmft, kmirr, bytes)) { + ntfs_error(sb, + "$MFT and $MFTMirr record %i do not match. Run chkdsk.", + i); + goto mm_unmap_out; + } kmft += vol->mft_record_size; kmirr += vol->mft_record_size; } while (++i < vol->mftmirr_size); -- cgit v1.2.3 From d15cd40cb1858f75846eaafa9a6bca841b790a92 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 10 Apr 2026 18:19:31 +0100 Subject: serial: zs: Fix swapped RI/DSR modem line transition counting Fix a thinko in the status interrupt handler that has caused counters for the RI and DSR modem line transitions to be used for the other line each. Fixes: 8b4a40809e53 ("zs: move to the serial subsystem") Cc: stable Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2604101747110.29980@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/zs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c index 72a3c0d90f40..31e12645f66b 100644 --- a/drivers/tty/serial/zs.c +++ b/drivers/tty/serial/zs.c @@ -680,9 +680,9 @@ static void zs_status_handle(struct zs_port *zport, struct zs_port *zport_a) uart_handle_dcd_change(uport, zport->mctrl & TIOCM_CAR); if (delta & TIOCM_RNG) - uport->icount.dsr++; - if (delta & TIOCM_DSR) uport->icount.rng++; + if (delta & TIOCM_DSR) + uport->icount.dsr++; if (delta) wake_up_interruptible(&uport->state->port.delta_msr_wait); -- cgit v1.2.3 From 6fe472c1bbbe238e91141f7cabc1226e96a60d43 Mon Sep 17 00:00:00 2001 From: Zhaoyang Yu <2426767509@qq.com> Date: Thu, 9 Apr 2026 13:41:58 +0800 Subject: tty: serial: pch_uart: add check for dma_alloc_coherent() Add a check for dma_alloc_coherent() failure to prevent a potential NULL pointer dereference in dma_handle_rx(). Properly release DMA channels and the PCI device reference using a goto ladder if the allocation fails. Fixes: 3c6a483275f4 ("Serial: EG20T: add PCH_UART driver") Cc: stable Signed-off-by: Zhaoyang Yu <2426767509@qq.com> Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/tencent_E328416B7CFD436F6029F2DF02AD7ED89C08@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 6729d8e83c3c..ba1fcd663fe2 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -689,8 +689,7 @@ static void pch_request_dma(struct uart_port *port) if (!chan) { dev_err(priv->port.dev, "%s:dma_request_channel FAILS(Tx)\n", __func__); - pci_dev_put(dma_dev); - return; + goto err_pci_get; } priv->chan_tx = chan; @@ -704,18 +703,26 @@ static void pch_request_dma(struct uart_port *port) if (!chan) { dev_err(priv->port.dev, "%s:dma_request_channel FAILS(Rx)\n", __func__); - dma_release_channel(priv->chan_tx); - priv->chan_tx = NULL; - pci_dev_put(dma_dev); - return; + goto err_req_tx; } /* Get Consistent memory for DMA */ priv->rx_buf_virt = dma_alloc_coherent(port->dev, port->fifosize, &priv->rx_buf_dma, GFP_KERNEL); + if (!priv->rx_buf_virt) + goto err_req_rx; priv->chan_rx = chan; pci_dev_put(dma_dev); + return; + +err_req_rx: + dma_release_channel(chan); +err_req_tx: + dma_release_channel(priv->chan_tx); + priv->chan_tx = NULL; +err_pci_get: + pci_dev_put(dma_dev); } static void pch_dma_rx_complete(void *arg) -- cgit v1.2.3 From 92b1ea22454b08a39baef3a7290fb3ec50366616 Mon Sep 17 00:00:00 2001 From: Hongling Zeng Date: Tue, 21 Apr 2026 14:57:37 +0800 Subject: serial: sh-sci: fix memory region release in error path The sci_request_port() function uses request_mem_region() to reserve I/O memory, but in the error path when sci_remap_port() fails, it incorrectly calls release_resource() instead of release_mem_region(). This mismatch can cause resource accounting issues. Fix it by using the correct release function, consistent with sci_release_port(). Fixes: e2651647080930a1 ("serial: sh-sci: Handle port memory region reservations.") Cc: stable Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202604032356.SzEjYkBC-lkp@intel.com/ Signed-off-by: Hongling Zeng Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260421065737.724187-1-zenghongling@kylinos.cn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 6c819b6b2425..54db019a5bfc 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -3025,7 +3025,7 @@ int sci_request_port(struct uart_port *port) ret = sci_remap_port(port); if (unlikely(ret != 0)) { - release_resource(res); + release_mem_region(port->mapbase, sport->reg_size); return ret; } -- cgit v1.2.3 From ca2584d841b69391ffc4144840563d2e1a0018df Mon Sep 17 00:00:00 2001 From: Prasanna S Date: Tue, 28 Apr 2026 09:56:13 +0530 Subject: serial: qcom-geni: fix UART_RX_PAR_EN bit position UART_RX_PAR_EN is incorrectly defined as bit 3, which triggers false framing errors (S_GP_IRQ_1_EN) and causes received data to be dropped when parity is enabled and the parity bit is 0. Define UART_RX_PAR_EN as bit 4 of the SE_UART_RX_TRANS_CFG register, as specified in the reference manual. Fixes: c4f528795d1a ("tty: serial: msm_geni_serial: Add serial driver support for GENI based QUP") Cc: stable Signed-off-by: Prasanna S Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260428-serial-bit-correct-v1-1-9131ad5b97d8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index b365dd5da3cb..5139a9d21b2b 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -50,7 +50,7 @@ #define TX_STOP_BIT_LEN_2 2 /* SE_UART_RX_TRANS_CFG */ -#define UART_RX_PAR_EN BIT(3) +#define UART_RX_PAR_EN BIT(4) /* SE_UART_RX_WORD_LEN */ #define RX_WORD_LEN_MASK GENMASK(9, 0) -- cgit v1.2.3 From 1e5b50c78d10119be08bf8f7a11d8ea333dd113a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 6 May 2026 14:43:23 +0200 Subject: tty: add missing tty_driver include to tty_port.h Include the definition of struct tty_driver in tty_port.h to keep the header self-contained and avoid build breakage in case anyone includes it before tty_driver.h. Fixes: eb3b0d92c9c3 ("tty: tty_port: add workqueue to flip TTY buffer") Cc: Xin Zhao Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260506124323.186703-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index d2a7882c0b58..23cad403bb8f 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -6,10 +6,10 @@ #include #include #include +#include #include struct attribute_group; -struct tty_driver; struct tty_port; struct tty_struct; -- cgit v1.2.3 From ce28b772c3c28fb1c62a81533045ae90ed3b496c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 May 2026 06:05:14 -0700 Subject: nvme: make prp passthrough usage less scary The warning is a bit alarming, and it only prints for the very first non-sgl capable device that receives a passthrough command. Just log an informational message on initial discovery for every device. Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++++ drivers/nvme/host/ioctl.c | 13 ++----------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc388e24caad..1e7e42b43aa3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3749,6 +3749,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) ret = nvme_hwmon_init(ctrl); if (ret == -EINTR) return ret; + + if (!nvme_ctrl_sgl_supported(ctrl)) + dev_info(ctrl->device, + "passthrough uses implicit buffer lengths\n"); } clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 9597a87cf05d..e232188ccd02 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -120,21 +120,12 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct nvme_ns *ns = q->queuedata; struct block_device *bdev = ns ? ns->disk->part0 : NULL; bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); - struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; bool has_metadata = meta_buffer && meta_len; struct bio *bio = NULL; int ret; - if (!nvme_ctrl_sgl_supported(ctrl)) - dev_warn_once(ctrl->device, "using unchecked data buffer\n"); - if (has_metadata) { - if (!supports_metadata) - return -EINVAL; - - if (!nvme_ctrl_meta_sgl_supported(ctrl)) - dev_warn_once(ctrl->device, - "using unchecked metadata buffer\n"); - } + if (has_metadata && !supports_metadata) + return -EINVAL; if (iter) ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); -- cgit v1.2.3 From 2279cd9c61a330e5de4d6eb0bc422820dd6fdf36 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 May 2026 06:16:02 -0700 Subject: nvme: fix bio leak on mapping failure The local bio is always NULL, so we'd leak the bio if the integrity mapping failed. Just get it directly from the request. Fixes: d0d1d522316e91f ("blk-map: provide the bdev to bio if one exists") Reviewed-by: Sagi Grimberg Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index e232188ccd02..08889b20e5d8 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -121,7 +121,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct block_device *bdev = ns ? ns->disk->part0 : NULL; bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); bool has_metadata = meta_buffer && meta_len; - struct bio *bio = NULL; int ret; if (has_metadata && !supports_metadata) @@ -145,8 +144,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, return ret; out_unmap: - if (bio) - blk_rq_unmap_user(bio); + if (req->bio) + blk_rq_unmap_user(req->bio); return ret; } -- cgit v1.2.3 From a891962ae5e48fb5476c23631df759482e62ab16 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 30 Apr 2026 15:22:32 +0200 Subject: nvmet-auth: Do not print DH-HMAC-CHAP secrets From a security standpoint we should not allow to print out the DH-HMAC-CHAP secrets, but at the same time having them is useful for debugging authentication failures. So add a Kconfig option NVME_TARGET_AUTH_DEBUG to only enable debugging if explictly requested at build time. Reviewed-by: Sagi Grimberg Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/Kconfig | 9 +++++++++ drivers/nvme/target/auth.c | 13 ++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 4904097dfd49..69bde270115e 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -117,6 +117,15 @@ config NVME_TARGET_AUTH If unsure, say N. +config NVME_TARGET_AUTH_DEBUG + bool "NVMe over Fabrics In-band Authentication debug messages" + depends on NVME_TARGET_AUTH + help + This enables additional debug messages including the generated + DH-HMAC-CHAP secrets to help debugging authentication failures. + + If unsure, say N. + config NVME_TARGET_PCI_EPF tristate "NVMe PCI Endpoint Function target support" depends on NVME_TARGET && PCI_ENDPOINT diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 9a2eccdc8b13..edb9627d97b0 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -144,7 +144,6 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset) goto out_unlock; list_for_each_entry(p, &ctrl->subsys->hosts, entry) { - pr_debug("check %s\n", nvmet_host_name(p->host)); if (strcmp(nvmet_host_name(p->host), ctrl->hostnqn)) continue; host = p->host; @@ -189,11 +188,12 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset) ctrl->host_key = NULL; goto out_free_hash; } +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG pr_debug("%s: using hash %s key %*ph\n", __func__, ctrl->host_key->hash > 0 ? nvme_auth_hmac_name(ctrl->host_key->hash) : "none", (int)ctrl->host_key->len, ctrl->host_key->key); - +#endif nvme_auth_free_key(ctrl->ctrl_key); if (!host->dhchap_ctrl_secret) { ctrl->ctrl_key = NULL; @@ -207,11 +207,12 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset) ctrl->ctrl_key = NULL; goto out_free_hash; } +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG pr_debug("%s: using ctrl hash %s key %*ph\n", __func__, ctrl->ctrl_key->hash > 0 ? nvme_auth_hmac_name(ctrl->ctrl_key->hash) : "none", (int)ctrl->ctrl_key->len, ctrl->ctrl_key->key); - +#endif out_free_hash: if (ret) { if (ctrl->host_key) { @@ -317,7 +318,6 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, if (ret) goto out_free_challenge; } - pr_debug("ctrl %d qid %d host response seq %u transaction %d\n", ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1, req->sq->dhchap_tid); @@ -434,8 +434,10 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req, ret = -EINVAL; } else { memcpy(buf, ctrl->dh_key, buf_size); +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG pr_debug("%s: ctrl %d public key %*ph\n", __func__, ctrl->cntlid, (int)buf_size, buf); +#endif } return ret; @@ -458,11 +460,12 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, ctrl->shash_id); if (ret) pr_debug("failed to compute session key, err %d\n", ret); +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG else pr_debug("%s: session key %*ph\n", __func__, (int)req->sq->dhchap_skey_len, req->sq->dhchap_skey); - +#endif return ret; } -- cgit v1.2.3 From b35a13036755c5803168a7cb93bc66035c3e65b8 Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 29 Apr 2026 16:11:16 +0800 Subject: nvme-pci: fix use-after-free in nvme_free_host_mem() nvme_free_host_mem() frees dev->hmb_sgt via dma_free_noncontiguous() but never clears the pointer afterward. This leads to a use-after-free if nvme_free_host_mem() is called twice in the same error path. This can happen during nvme_probe() when nvme_setup_host_mem() succeeds in allocating the HMB (setting dev->hmb_sgt) but nvme_set_host_mem() fails with an I/O error: nvme_setup_host_mem() nvme_alloc_host_mem_single() -> sets dev->hmb_sgt nvme_set_host_mem() -> fails with -EIO nvme_free_host_mem() -> frees hmb_sgt, but does NOT NULL it return error nvme_probe() error path: nvme_free_host_mem() -> dev->hmb_sgt is stale, use-after-free The second call dereferences the freed sgt, causing a NULL pointer dereference in iommu_dma_free_noncontiguous() when it accesses sgt->sgl->dma_address (the backing memory has been freed and zeroed). This is reproducible on Thunderbolt-attached NVMe devices (e.g., OWC Envoy Express behind a Dell WD22TB4 dock) where the device intermittently returns I/O errors during HMB setup due to PCIe link instability. BUG: kernel NULL pointer dereference, address: 0000000000000010 RIP: 0010:iommu_dma_free_noncontiguous+0x22/0x80 Call Trace: dma_free_noncontiguous+0x3b/0x130 nvme_free_host_mem+0x30/0xf0 [nvme] nvme_probe.cold+0xcc/0x275 [nvme] local_pci_probe+0x43/0xa0 pci_device_probe+0xeea/0x290 really_probe+0xf9/0x3b0 __driver_probe_device+0x8b/0x170 driver_probe_device+0x24/0xd0 __driver_attach_async_helper+0x6b/0x110 async_run_entry_fn+0x37/0x170 process_one_work+0x1ac/0x3d0 worker_thread+0x1b8/0x360 kthread+0xf7/0x130 ret_from_fork+0x2d8/0x3a0 ret_from_fork_asm+0x1a/0x30 Fix this by setting dev->hmb_sgt to NULL after freeing it, so the second call takes the multi-descriptor path which safely handles the already-cleaned-up state. Fixes: 63a5c7a4b4c4 ("nvme-pci: use dma_alloc_noncontigous if possible") Signed-off-by: Chia-Lin Kao (AceLan) Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9fd04cd7c5cb..94c423bed947 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2533,11 +2533,13 @@ static void nvme_free_host_mem_multi(struct nvme_dev *dev) static void nvme_free_host_mem(struct nvme_dev *dev) { - if (dev->hmb_sgt) + if (dev->hmb_sgt) { dma_free_noncontiguous(dev->dev, dev->host_mem_size, dev->hmb_sgt, DMA_BIDIRECTIONAL); - else + dev->hmb_sgt = NULL; + } else { nvme_free_host_mem_multi(dev); + } dma_free_coherent(dev->dev, dev->host_mem_descs_size, dev->host_mem_descs, dev->host_mem_descs_dma); -- cgit v1.2.3 From dbbd07d0a7020b80f6a7028e561908f7b83b3d5a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 10 May 2026 23:30:29 +0300 Subject: nvmet-tcp: Fix potential UAF when ddgst mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shivam Kumar found via vulnerability testing: When data digest is enabled on an NVMe/TCP connection and a digest mismatch occurs on a non-final H2C_DATA PDU during an R2T-based data transfer, the digest error handler in nvmet_tcp_try_recv_ddgst() calls nvmet_req_uninit() — which performs percpu_ref_put() on the submission queue — but does NOT mark the command as completed. It does not set cqe->status, does not modify rbytes_done, and does not clear any flag. When the subsequent fatal error triggers queue teardown, nvmet_tcp_uninit_data_in_cmds() iterates all commands, checks nvmet_tcp_need_data_in() for each one, and finds that the already-uninited command still appears to need data (because rbytes_done < transfer_len and cqe->status == 0). It therefore calls nvmet_req_uninit() a second time on the same command — a double percpu_ref_put against a single percpu_ref_get. Reported-by: Shivam Kumar Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 164a564ba3b4..20f150d17a96 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1321,8 +1321,10 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) + if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) { + cmd->req.cqe->status = NVME_SC_CMD_SEQ_ERROR; nvmet_req_uninit(&cmd->req); + } nvmet_tcp_free_cmd_buffers(cmd); ret = -EPROTO; goto out; -- cgit v1.2.3 From 40df2172853ffc0f84cca3906f5c4d9a6131195d Mon Sep 17 00:00:00 2001 From: AlanCui4080 Date: Fri, 8 May 2026 17:59:12 +0800 Subject: Revert "nvme: add quirk NVME_QUIRK_IGNORE_DEV_SUBNQN for 144d:a808" This reverts commit 7f991e3f9b8f044640bcb5fa8570350a68932843 ("nvme: add quirk NVME_QUIRK_IGNORE_DEV_SUBNQN for 144d:a808") The incorrect implementations of SUBNQN is a known issue in a massive number of NVMe units. However, the warning "nvme nvmex: missing or invalid SUBNQN field." is usually appropriate and will not affect performance or behavior etc. That is because the support for SUBNQN is mandatory if the controller supports NVMe revision 1.2.1 or greater, and it reported itself without a SUBNQN field which breaks compliance with the specification. It should be not quirked by the Linux Kernel. Signed-off-by: Alan Cui Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 94c423bed947..139a10cd687f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -4109,8 +4109,6 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0555), /* Memblaze Pblaze5 adapter */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, - { PCI_DEVICE(0x144d, 0xa808), /* Samsung PM981/983 */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ -- cgit v1.2.3 From 9a9254c4a2a3ca2b3da16d173f3b0dd01f397ff6 Mon Sep 17 00:00:00 2001 From: Shitalkumar Gandhi Date: Mon, 20 Apr 2026 19:29:03 +0530 Subject: serial: fsl_lpuart: fix rx buffer and DMA map leaks in start_rx_dma lpuart_start_rx_dma() allocates sport->rx_ring.buf with kzalloc() and then maps a scatterlist via dma_map_sg(). On three subsequent error paths the function returns directly without releasing those resources: - when dma_map_sg() returns 0 (-EINVAL): ring->buf is leaked. - when dmaengine_slave_config() fails: ring->buf and the DMA mapping are leaked. - when dmaengine_prep_dma_cyclic() returns NULL: ring->buf and the DMA mapping are leaked. The sole cleanup path, lpuart_dma_rx_free(), is only reached when lpuart_dma_rx_use is set, and the caller lpuart_rx_dma_startup() clears that flag on failure of lpuart_start_rx_dma(). So these resources are permanently leaked on every failure in this function. Repeated port open/close or termios changes under error conditions will slowly consume memory and leave stale streaming DMA mappings behind. Fix it by introducing two error labels that unmap the scatterlist and free the ring buffer as appropriate. While here, replace the misleading -EFAULT (bad userspace pointer) returned when dmaengine_prep_dma_cyclic() fails with the more accurate -ENOMEM, matching how other dmaengine users in the tree treat this failure. No functional change on the success path. Fixes: 5887ad43ee02 ("tty: serial: fsl_lpuart: Use cyclic DMA for Rx") Cc: stable Signed-off-by: Shitalkumar Gandhi Reviewed-by: Frank Li Link: https://patch.msgid.link/20260420135903.2062024-1-shitalkumar.gandhi@cambiumnetworks.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 1bd7ec9c81ea..b7919c05f0fb 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1379,7 +1379,8 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) if (!nent) { dev_err(sport->port.dev, "DMA Rx mapping error\n"); - return -EINVAL; + ret = -EINVAL; + goto err_free_buf; } dma_rx_sconfig.src_addr = lpuart_dma_datareg_addr(sport); @@ -1391,7 +1392,7 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) if (ret < 0) { dev_err(sport->port.dev, "DMA Rx slave config failed, err = %d\n", ret); - return ret; + goto err_unmap_sg; } sport->dma_rx_desc = dmaengine_prep_dma_cyclic(chan, @@ -1402,7 +1403,8 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) DMA_PREP_INTERRUPT); if (!sport->dma_rx_desc) { dev_err(sport->port.dev, "Cannot prepare cyclic DMA\n"); - return -EFAULT; + ret = -ENOMEM; + goto err_unmap_sg; } sport->dma_rx_desc->callback = lpuart_dma_rx_complete; @@ -1426,6 +1428,13 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport) } return 0; + +err_unmap_sg: + dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE); +err_free_buf: + kfree(ring->buf); + ring->buf = NULL; + return ret; } static void lpuart_dma_rx_free(struct uart_port *port) -- cgit v1.2.3 From 452d6fa37ae9b021f4f6d397dbae077f7296f6f4 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Wed, 6 May 2026 10:15:21 +0530 Subject: serial: qcom_geni: fix kfifo underflow when flush precedes DMA completion IRQ When uart_flush_buffer() runs before the DMA completion IRQ is delivered, the following race can occur (all steps serialized by uart_port_lock): 1. DMA starts: tx_remaining = N, kfifo contains N bytes 2. DMA completes in hardware; IRQ is pending but not yet delivered 3. uart_flush_buffer() acquires the port lock and calls kfifo_reset(), making kfifo_len() = 0 while tx_remaining remains N 4. uart_flush_buffer() releases the port lock 5. DMA IRQ fires; handle_tx_dma() acquires the port lock and calls uart_xmit_advance(uport, tx_remaining) on an empty kfifo uart_xmit_advance() increments kfifo->out by tx_remaining. Since kfifo_reset() already set both in and out to 0, out wraps past in, causing kfifo_len() to return UART_XMIT_SIZE - tx_remaining. The next start_tx_dma() call then submits a DMA transfer of stale buffer data. Fix this by snapshotting kfifo_len() at the start of handle_tx_dma() and skipping uart_xmit_advance() when fifo_len < tx_remaining, which indicates the kfifo was reset by a preceding flush. Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA") Cc: stable Signed-off-by: Viken Dadhaniya Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260506-serial-dma-stale-tx-buf-v1-1-e3ccb360d719@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 5139a9d21b2b..17da115b1e78 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1031,8 +1031,20 @@ static void qcom_geni_serial_handle_tx_dma(struct uart_port *uport) { struct qcom_geni_serial_port *port = to_dev_port(uport); struct tty_port *tport = &uport->state->port; + unsigned int fifo_len = kfifo_len(&tport->xmit_fifo); + + /* + * Only advance the kfifo if it still contains the bytes that were + * transferred. uart_flush_buffer() may have run before this IRQ + * fired: it calls kfifo_reset() under the port lock, making + * fifo_len = 0 while tx_remaining remains non-zero. Calling + * uart_xmit_advance() in that case would underflow kfifo->out past + * kfifo->in, making kfifo_len() wrap to UART_XMIT_SIZE - tx_remaining + * and triggering a spurious large DMA transfer of stale data. + */ + if (fifo_len >= port->tx_remaining) + uart_xmit_advance(uport, port->tx_remaining); - uart_xmit_advance(uport, port->tx_remaining); geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr, port->tx_remaining); port->tx_dma_addr = 0; port->tx_remaining = 0; -- cgit v1.2.3 From 4314a44564eb1565349fed7a4192344c5f46fc85 Mon Sep 17 00:00:00 2001 From: Yazhou Tang Date: Wed, 6 May 2026 17:47:12 +0800 Subject: bpf: Fix out-of-bounds read in bpf_patch_call_args() The interpreters_args array only accommodates stack depths up to MAX_BPF_STACK (512 bytes). However, do_misc_fixups() may allow a larger stack depth if JIT is requested. If JIT compilation later fails and falls back to the interpreter, the verifier invokes bpf_patch_call_args() with this oversized stack depth. This causes a load-time out-of-bounds (OOB) read when calculating the interpreter function pointer index. Fix this by changing bpf_patch_call_args() to return an int and explicitly rejecting the JIT fallback (returning -EINVAL) if the stack depth exceeds MAX_BPF_STACK. Fixes: 1ea47e01ad6e ("bpf: add support for bpf_call to interpreter") Co-developed-by: Tianci Cao Signed-off-by: Tianci Cao Co-developed-by: Shenghao Yuan Signed-off-by: Shenghao Yuan Signed-off-by: Yazhou Tang Acked-by: Xu Kuohai Link: https://lore.kernel.org/r/20260506094714.419842-2-tangyazhou@zju.edu.cn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/core.c | 6 +++++- kernel/bpf/fixups.c | 7 ++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 01e203964892..52b30e9ea431 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2917,7 +2917,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); #ifndef CONFIG_BPF_JIT_ALWAYS_ON -void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); +int bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); #endif struct btf *bpf_get_btf_vmlinux(void); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8b018ff48875..63044ebe5721 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2394,13 +2394,17 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) #undef PROG_NAME_LIST #ifdef CONFIG_BPF_SYSCALL -void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) +int bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) { stack_depth = max_t(u32, stack_depth, 1); + /* Prevent out-of-bounds read to interpreters_args */ + if (stack_depth > MAX_BPF_STACK) + return -EINVAL; insn->off = (s16) insn->imm; insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - __bpf_call_base_args; insn->code = BPF_JMP | BPF_CALL_ARGS; + return 0; } #endif #endif diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c index fba9e8c00878..df8f48091321 100644 --- a/kernel/bpf/fixups.c +++ b/kernel/bpf/fixups.c @@ -1416,7 +1416,12 @@ int bpf_fixup_call_args(struct bpf_verifier_env *env) depth = get_callee_stack_depth(env, insn, i); if (depth < 0) return depth; - bpf_patch_call_args(insn, depth); + err = bpf_patch_call_args(insn, depth); + if (err) { + verbose(env, "stack depth %d exceeds interpreter stack depth limit\n", + depth); + return err; + } } err = 0; #endif -- cgit v1.2.3 From 58a8f3e2501dc14b8e00e883d6aaf0600a239da7 Mon Sep 17 00:00:00 2001 From: Yazhou Tang Date: Wed, 6 May 2026 17:47:13 +0800 Subject: bpf: Fix s16 truncation for large bpf-to-bpf call offsets Currently, the BPF instruction set allows bpf-to-bpf calls (or internal calls, pseudo calls) to use a 32-bit imm field to represent the relative jump offset. However, when JIT is disabled or falls back to the interpreter, the verifier invokes bpf_patch_call_args() to rewrite the call instruction. In this function, the 32-bit imm is downcast to s16 and stored in the off field. void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) { stack_depth = max_t(u32, stack_depth, 1); insn->off = (s16) insn->imm; insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - __bpf_call_base_args; insn->code = BPF_JMP | BPF_CALL_ARGS; } If the original imm exceeds the s16 range (i.e., a jump offset greater than 32767 instructions), this downcast silently truncates the offset, resulting in an incorrect call target. Fix this by: 1. In bpf_patch_call_args(), keeping the imm field unchanged and using the off field to store the index of the interpreter function. 2. In ___bpf_prog_run() for the JMP_CALL_ARGS case, retrieving the interpreter function pointer from the interpreters_args array using the off field as the index, and passing the original imm to calculate the last argument of the interpreter function. After these changes, the truncation issue is resolved, and __bpf_call_base_args is also no longer needed and can be removed, which makes the code cleaner. Performance: In ___bpf_prog_run() for the JMP_CALL_ARGS case, changing the retrieval of the interpreter function pointer from pointer addition to direct array indexing improves performance. The possible reason is that the latter has better instruction-level parallelism. See the v5 discussion [1] for more details. [1] https://lore.kernel.org/bpf/f120c3c4-6999-414a-b514-518bb64b4758@zju.edu.cn/ To avoid requiring bpftool changes, keep the new imm/off encoding internal and restore the legacy xlated dump layout in bpf_insn_prepare_dump(). For bpf-to-bpf call offsets that do not fit in s16, export off as 0 instead of a truncated and misleading value. Fixes: 1ea47e01ad6e ("bpf: add support for bpf_call to interpreter") Fixes: 7105e828c087 ("bpf: allow for correlation of maps and helpers in dump") Suggested-by: Xu Kuohai Suggested-by: Puranjay Mohan Co-developed-by: Tianci Cao Signed-off-by: Tianci Cao Co-developed-by: Shenghao Yuan Signed-off-by: Shenghao Yuan Signed-off-by: Yazhou Tang Link: https://lore.kernel.org/r/20260506094714.419842-3-tangyazhou@zju.edu.cn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ include/linux/filter.h | 3 --- kernel/bpf/core.c | 21 ++++++++++++++------- kernel/bpf/fixups.c | 6 +++--- kernel/bpf/syscall.c | 26 ++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 52b30e9ea431..cd191c5fdb0a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2918,6 +2918,12 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 ua #ifndef CONFIG_BPF_JIT_ALWAYS_ON int bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); +s32 bpf_call_args_imm(s16 idx); +#else +static inline s32 bpf_call_args_imm(s16 idx) +{ + return 0; +} #endif struct btf *bpf_get_btf_vmlinux(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index 1ec6d5ba64cc..88a241aac36a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1151,9 +1151,6 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -#define __bpf_call_base_args \ - ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ - (void *)__bpf_call_base) struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 63044ebe5721..6aa2a8b24030 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1771,6 +1771,9 @@ static u32 abs_s32(s32 x) return x >= 0 ? (u32)x : -(u32)x; } +static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, + const struct bpf_insn *insn); + /** * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers @@ -2077,10 +2080,9 @@ select_insn: CONT; JMP_CALL_ARGS: - BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2, - BPF_R3, BPF_R4, - BPF_R5, - insn + insn->off + 1); + BPF_R0 = interpreters_args[insn->off](BPF_R1, BPF_R2, BPF_R3, + BPF_R4, BPF_R5, + insn + insn->imm + 1); CONT; JMP_TAIL_CALL: { @@ -2400,12 +2402,17 @@ int bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) /* Prevent out-of-bounds read to interpreters_args */ if (stack_depth > MAX_BPF_STACK) return -EINVAL; - insn->off = (s16) insn->imm; - insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - - __bpf_call_base_args; + insn->off = (round_up(stack_depth, 32) / 32) - 1; insn->code = BPF_JMP | BPF_CALL_ARGS; return 0; } + +s32 bpf_call_args_imm(s16 idx) +{ + if (WARN_ON_ONCE(idx < 0 || idx >= ARRAY_SIZE(interpreters_args))) + return 0; + return BPF_CALL_IMM(interpreters_args[idx]); +} #endif #endif diff --git a/kernel/bpf/fixups.c b/kernel/bpf/fixups.c index df8f48091321..3692adf62558 100644 --- a/kernel/bpf/fixups.c +++ b/kernel/bpf/fixups.c @@ -1250,9 +1250,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) } if (!bpf_pseudo_call(insn)) continue; - insn->off = env->insn_aux_data[i].call_imm; - subprog = bpf_find_subprog(env, i + insn->off + 1); - insn->imm = subprog; + insn->imm = env->insn_aux_data[i].call_imm; + subprog = bpf_find_subprog(env, i + insn->imm + 1); + insn->off = subprog; } prog->jited = 1; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a3c0214ca934..630d530782fe 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4919,6 +4919,29 @@ out: return map; } +static void prepare_dump_pseudo_call(struct bpf_insn *insn) +{ + s32 call_off = insn->imm; + + /* + * BPF_CALL_ARGS only exists for interpreter fallback. + * 1. For interpreter (BPF_CALL_ARGS): insn->off is the index of + * interpreters_args array, so here using bpf_call_args_imm() + * to get the real address offset. + * 2. For JIT (BPF_CALL): insn->off is the subprog id. + */ + if (insn->code == (BPF_JMP | BPF_CALL_ARGS)) + insn->imm = bpf_call_args_imm(insn->off); + else + insn->imm = insn->off; + + /* Avoid dumping a truncated and misleading pc-relative offset. */ + if (call_off > S16_MAX || call_off < S16_MIN) + insn->off = 0; + else + insn->off = call_off; +} + static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, const struct cred *f_cred) { @@ -4944,6 +4967,9 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, } if (code == (BPF_JMP | BPF_CALL) || code == (BPF_JMP | BPF_CALL_ARGS)) { + /* Restore the legacy xlated dump layout. */ + if (insns[i].src_reg == BPF_PSEUDO_CALL) + prepare_dump_pseudo_call(&insns[i]); if (code == (BPF_JMP | BPF_CALL_ARGS)) insns[i].code = BPF_JMP | BPF_CALL; if (!bpf_dump_raw_ok(f_cred)) -- cgit v1.2.3 From 344a00712ce1bce8db72b0eadc1595dede31565a Mon Sep 17 00:00:00 2001 From: Yazhou Tang Date: Wed, 6 May 2026 17:47:14 +0800 Subject: selftests/bpf: Add test for large offset bpf-to-bpf call Add a selftest to verify the verifier and JIT behavior when handling bpf-to-bpf calls with relative jump offsets exceeding the s16 boundary. The test utilizes an inline assembly block with ".rept 32765" to generate a massive dummy subprogram. By placing this padding between the main program and the target subprogram, it forces the verifier to process a bpf-to-bpf call where the imm field exceeds the s16 range. - When JIT is enabled, it asserts that the program is successfully loaded and executes correctly to return the expected value. Since the fix does not change the JIT behavior, the test passes whether the fix is applied or not. - When JIT is disabled, it also asserts that the program is successfully loaded and executes correctly to return the expected value 3. - Before the fix, the verifier rewrites the call instruction with a truncated offset (here 32768 -> -32768) and lets it pass. When the program is executed, the call instruction will go to a wrong target (the landing pad) instead of the intended subprogram, then return -1 and fail. - After the fix, the verifier correctly handles the large offset and allows it to pass. The program then executes correctly to return the expected value 3. Co-developed-by: Tianci Cao Signed-off-by: Tianci Cao Co-developed-by: Shenghao Yuan Signed-off-by: Shenghao Yuan Signed-off-by: Yazhou Tang Acked-by: Xu Kuohai Link: https://lore.kernel.org/r/20260506094714.419842-4-tangyazhou@zju.edu.cn Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_call_large_imm.c | 66 ++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_call_large_imm.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index a96b25ebff23..06cd24e37b3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -22,6 +22,7 @@ #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" #include "verifier_btf_unreliable_prog.skel.h" +#include "verifier_call_large_imm.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" @@ -170,6 +171,7 @@ void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } +void test_verifier_call_large_imm(void) { RUN(verifier_call_large_imm); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } diff --git a/tools/testing/selftests/bpf/progs/verifier_call_large_imm.c b/tools/testing/selftests/bpf/progs/verifier_call_large_imm.c new file mode 100644 index 000000000000..7998df07f6a6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_call_large_imm.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf_misc.h" + +int call_happened = 0; + +/* + * 32765 is the exact minimum number of padding instructions needed to + * trigger the verifier failure, because: + * 1. Counting the wrapper instructions around the padding block (one + * "r0=0" and two "exit" instructions), the actual jump distance + * evaluates to N + 3. + * 2. To overflow the s16 max bound (32767), we need N + 3 > 32767. + * Thus, N = 32765 is the exact minimum padding size required. + */ +static __attribute__((noinline)) void padding_subprog(void) +{ + asm volatile ( + "r0 = 0;" + ".rept 32765;" + "r0 += 0;" + ".endr;" + ::: __clobber_all); +} + +static __attribute__((noinline)) int target_subprog(void) +{ + /* Use volatile variable here to prevent optimization. */ + volatile int magic_ret = 3; + return magic_ret; +} + +SEC("syscall") +__success __retval(3) +int call_large_imm_test(void *ctx) +{ + /* + * Landing pad to handle call error on kernel without the fix, + * preventing kernel panic. + */ + asm volatile ( + "r0 = 0;" + ".rept 32768;" + "r0 += 0;" + ".endr;" + ::: __clobber_all); + + /* + * The call_happened variable is 1 only when the call insn wrongly + * go back to the landing pad above. + */ + if (call_happened == 1) { + /* Use volatile variable here to prevent optimization. */ + volatile int flag = -1; + return flag; + } + + call_happened = 1; + + padding_subprog(); + + return target_subprog(); +} + +char LICENSE[] SEC("license") = "GPL"; -- cgit v1.2.3 From c1fa0bb633e4a6b11e83ffc57fa5abe8ebb87891 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 11 May 2026 08:55:11 -0700 Subject: exit: prevent preemption of oopsing TASK_DEAD task When an already-exiting task oopses, make_task_dead() currently calls do_task_dead() with preemption enabled. That is forbidden: do_task_dead() calls __schedule(), which has a comment saying "WARNING: must be called with preemption disabled!". If an oopsing task is preempted in do_task_dead(), between becoming TASK_DEAD and entering the scheduler explicitly, bad things happen: finish_task_switch() assumes that once the scheduler has switched away from a TASK_DEAD task, the task can never run again and its stack is no longer needed; but that assumption apparently doesn't hold if the dead task was preempted (the SM_PREEMPT case). This means that the scheduler ends up repeatedly dropping references on the dead task's stack, which can lead to use-after-free or double-free of the entire task stack; in other words, two tasks can end up running on the same stack, resulting in various kinds of memory corruption. (This does not just affect "recursively oopsing" tasks; it is enough to oops once during task exit, for example in a file_operations::release handler) Fixes: 7f80a2fd7db9 ("exit: Stop poorly open coding do_task_dead in make_task_dead") Cc: stable@kernel.org Signed-off-by: Jann Horn Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/exit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/exit.c b/kernel/exit.c index 25e9cb6de7e7..9a909993ab1d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1073,6 +1073,7 @@ void __noreturn make_task_dead(int signr) futex_exit_recursive(tsk); tsk->exit_state = EXIT_DEAD; refcount_inc(&tsk->rcu_users); + preempt_disable(); do_task_dead(); } -- cgit v1.2.3 From e4865a56d013e86e46ea6acea15bb6eae01898ff Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 8 May 2026 20:04:33 +0200 Subject: ACPI: driver: Check ACPI_COMPANION() against NULL during probe Since every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), platform drivers that rely on the existence of a device's ACPI companion object should verify its presence. Accordingly, add requisite ACPI_COMPANION() or ACPI_HANDLE() checks against NULL to 13 platform drivers handling core ACPI devices. Also change the value returned by the ACPI thermal zone driver when the device's ACPI companion is not present to -ENODEV for consistency with the other drivers. Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/4516068.ejJDZkT8p0@rafael.j.wysocki Cc: 7.0+ # 7.0+ --- drivers/acpi/ac.c | 6 +++++- drivers/acpi/acpi_pad.c | 6 +++++- drivers/acpi/acpi_tad.c | 6 +++++- drivers/acpi/battery.c | 6 +++++- drivers/acpi/button.c | 9 +++++++-- drivers/acpi/ec.c | 6 +++++- drivers/acpi/hed.c | 6 +++++- drivers/acpi/nfit/core.c | 6 +++++- drivers/acpi/pfr_telemetry.c | 6 +++++- drivers/acpi/pfr_update.c | 6 +++++- drivers/acpi/sbs.c | 6 +++++- drivers/acpi/sbshc.c | 6 +++++- drivers/acpi/thermal.c | 2 +- drivers/acpi/tiny-power-button.c | 6 +++++- 14 files changed, 68 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index e9e970fd8f33..27f31744f29e 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -192,11 +192,15 @@ static const struct dmi_system_id ac_dmi_table[] __initconst = { static int acpi_ac_probe(struct platform_device *pdev) { - struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); struct power_supply_config psy_cfg = {}; + struct acpi_device *adev; struct acpi_ac *ac; int result; + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return -ENODEV; + ac = kzalloc_obj(struct acpi_ac); if (!ac) return -ENOMEM; diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 0a8e02bc8c8b..ec94b09bb747 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -423,7 +423,11 @@ static void acpi_pad_notify(acpi_handle handle, u32 event, void *data) static int acpi_pad_probe(struct platform_device *pdev) { - struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + struct acpi_device *adev; + + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return -ENODEV; return acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY, acpi_pad_notify, adev); diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c index cac07e997028..386fc1abcbdc 100644 --- a/drivers/acpi/acpi_tad.c +++ b/drivers/acpi/acpi_tad.c @@ -815,12 +815,16 @@ static void acpi_tad_remove(void *data) static int acpi_tad_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - acpi_handle handle = ACPI_HANDLE(dev); struct acpi_tad_driver_data *dd; + acpi_handle handle; acpi_status status; unsigned long long caps; int ret; + handle = ACPI_HANDLE(dev); + if (!handle) + return -ENODEV; + /* * Initialization failure messages are mostly about firmware issues, so * print them at the "info" level. diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index b4c25474f42f..d4ae21a71007 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -1214,10 +1214,14 @@ static void sysfs_battery_cleanup(struct acpi_battery *battery) static int acpi_battery_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct acpi_battery *battery; + struct acpi_device *device; int result; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + if (device->dep_unmet) return -EPROBE_DEFER; diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index dc064a388c23..b47301ee4c8a 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -531,15 +531,20 @@ static int acpi_lid_input_open(struct input_dev *input) static int acpi_button_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); acpi_notify_handler handler; + struct acpi_device *device; struct acpi_button *button; struct input_dev *input; - const char *hid = acpi_device_hid(device); acpi_status status; char *name, *class; + const char *hid; int error = 0; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + + hid = acpi_device_hid(device); if (!strcmp(hid, ACPI_BUTTON_HID_LID) && lid_init_state == ACPI_BUTTON_LID_INIT_DISABLED) return -ENODEV; diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 45204538ed87..64ad4cfa6208 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1676,10 +1676,14 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca static int acpi_ec_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; struct acpi_ec *ec; int ret; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + if (boot_ec && (boot_ec->handle == device->handle || !strcmp(acpi_device_hid(device), ACPI_ECDT_HID))) { /* Fast path: this device corresponds to the boot EC. */ diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c index 4d5e12ed6f3c..060e8d670f5d 100644 --- a/drivers/acpi/hed.c +++ b/drivers/acpi/hed.c @@ -50,9 +50,13 @@ static void acpi_hed_notify(acpi_handle handle, u32 event, void *data) static int acpi_hed_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; int err; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + /* Only one hardware error device */ if (hed_handle) return -EINVAL; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index d13264fb9e02..9304ac996d41 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3341,12 +3341,16 @@ static int acpi_nfit_probe(struct platform_device *pdev) struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_nfit_desc *acpi_desc; struct device *dev = &pdev->dev; - struct acpi_device *adev = ACPI_COMPANION(dev); struct acpi_table_header *tbl; + struct acpi_device *adev; acpi_status status = AE_OK; acpi_size sz; int rc = 0; + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return -ENODEV; + rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY, acpi_nfit_notify, dev); if (rc) diff --git a/drivers/acpi/pfr_telemetry.c b/drivers/acpi/pfr_telemetry.c index 32bdf8cbe8f2..2387376832a1 100644 --- a/drivers/acpi/pfr_telemetry.c +++ b/drivers/acpi/pfr_telemetry.c @@ -360,10 +360,14 @@ static void pfrt_log_put_idx(void *data) static int acpi_pfrt_log_probe(struct platform_device *pdev) { - acpi_handle handle = ACPI_HANDLE(&pdev->dev); struct pfrt_log_device *pfrt_log_dev; + acpi_handle handle; int ret; + handle = ACPI_HANDLE(&pdev->dev); + if (!handle) + return -ENODEV; + if (!acpi_has_method(handle, "_DSM")) { dev_dbg(&pdev->dev, "Missing _DSM\n"); return -ENODEV; diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c index 11b1c2828005..6283105bb0e8 100644 --- a/drivers/acpi/pfr_update.c +++ b/drivers/acpi/pfr_update.c @@ -538,10 +538,14 @@ static void pfru_put_idx(void *data) static int acpi_pfru_probe(struct platform_device *pdev) { - acpi_handle handle = ACPI_HANDLE(&pdev->dev); struct pfru_device *pfru_dev; + acpi_handle handle; int ret; + handle = ACPI_HANDLE(&pdev->dev); + if (!handle) + return -ENODEV; + if (!acpi_has_method(handle, "_DSM")) { dev_dbg(&pdev->dev, "Missing _DSM\n"); return -ENODEV; diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 440f1d69aca8..86b7c7975852 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -629,11 +629,15 @@ static void acpi_sbs_callback(void *context) static int acpi_sbs_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; struct acpi_sbs *sbs; int result = 0; int id; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + sbs = kzalloc_obj(struct acpi_sbs); if (!sbs) { result = -ENOMEM; diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index f413270415b6..c0ffa267f96c 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -237,11 +237,15 @@ static int smbus_alarm(void *context) static int acpi_smbus_hc_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; int status; unsigned long long val; struct acpi_smb_hc *hc; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + status = acpi_evaluate_integer(device->handle, "_EC", NULL, &val); if (ACPI_FAILURE(status)) { pr_err("error obtaining _EC.\n"); diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index b8b487d89d25..dfc7daa809b5 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -789,7 +789,7 @@ static int acpi_thermal_probe(struct platform_device *pdev) int i; if (!device) - return -EINVAL; + return -ENODEV; tz = kzalloc_obj(struct acpi_thermal); if (!tz) diff --git a/drivers/acpi/tiny-power-button.c b/drivers/acpi/tiny-power-button.c index 531e65b01bcb..92516ef84b02 100644 --- a/drivers/acpi/tiny-power-button.c +++ b/drivers/acpi/tiny-power-button.c @@ -38,9 +38,13 @@ static u32 acpi_tiny_power_button_event(void *not_used) static int acpi_tiny_power_button_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; acpi_status status; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, acpi_tiny_power_button_event, -- cgit v1.2.3 From 37953cec775ed34e59cf9a7d7bb9b0610daa3f3e Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 8 May 2026 15:33:29 +0200 Subject: nvme: fix race condition between connected uevent and STARTED_ONCE flag When a controller connects, nvme_start_ctrl() emits the "NVME_EVENT=connected" uevent and sets the NVME_CTRL_STARTED_ONCE flag. Currently, the uevent is emitted before the flag is set. This creates a race condition for userspace tools (like udev rules) that might rely on the "connected" event to configure other attributes. Swap the order of operations in nvme_start_ctrl() so that the NVME_CTRL_STARTED_ONCE flag is set before the uevent is sent. This guarantees that the admin_timeout can already be changed when userspace is notified. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1e7e42b43aa3..c3032d6ad6b1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5045,8 +5045,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_mpath_update(ctrl); } - nvme_change_uevent(ctrl, "NVME_EVENT=connected"); set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags); + nvme_change_uevent(ctrl, "NVME_EVENT=connected"); } EXPORT_SYMBOL_GPL(nvme_start_ctrl); -- cgit v1.2.3 From 20c39819a27646573dfa0ac0d01c38895298a6f6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 May 2026 10:58:38 -0600 Subject: io_uring: hold uring_lock when walking link chain in io_wq_free_work() io_wq_free_work() calls io_req_find_next() from io-wq worker context, which reads and clears req->link without holding any lock. This can potentially race with other paths that mutate the same chain under ctx->uring_lock. Take ctx->uring_lock around the io_req_find_next() call. Only requests with IO_REQ_LINK_FLAGS reach this path, which is not the hot path. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4ed998d60c09..2ebb0ba37c4f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1452,8 +1452,13 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work) struct io_kiocb *nxt = NULL; if (req_ref_put_and_test_atomic(req)) { - if (req->flags & IO_REQ_LINK_FLAGS) + if (req->flags & IO_REQ_LINK_FLAGS) { + struct io_ring_ctx *ctx = req->ctx; + + mutex_lock(&ctx->uring_lock); nxt = io_req_find_next(req); + mutex_unlock(&ctx->uring_lock); + } io_free_req(req); } return nxt ? &nxt->work : NULL; -- cgit v1.2.3 From 49ae66eb8c27375075ffa308cfd4bf25af335d41 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 May 2026 10:58:50 -0600 Subject: io_uring: defer linked-timeout chain splice out of hrtimer context io_link_timeout_fn() is the hrtimer callback that fires when a linked timeout expires. It currently calls io_remove_next_linked(prev) under ctx->timeout_lock to splice the timeout request out of the link chain. This is the only chain-mutation site that runs without ctx->uring_lock, because hrtimer callbacks cannot take a mutex. Defer the splicing until the task_work callback. Signed-off-by: Jens Axboe --- io_uring/timeout.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index e2595cae2b07..6353a4d979dc 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -284,6 +284,10 @@ static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout); io_remove_next_linked(req); + + /* If this is NULL, then timer already claimed it and will complete it */ + if (!timeout->head) + return NULL; timeout->head = NULL; if (hrtimer_try_to_cancel(&io->timer) != -1) { list_del(&timeout->list); @@ -367,6 +371,14 @@ static void io_req_task_link_timeout(struct io_tw_req tw_req, io_tw_token_t tw) int ret; if (prev) { + /* + * splice the linked timeout out of prev's chain if the regular + * completion path didn't already do it. + */ + if (prev->link == req) + prev->link = req->link; + req->link = NULL; + if (!tw.cancel) { struct io_cancel_data cd = { .ctx = req->ctx, @@ -401,10 +413,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) /* * We don't expect the list to be empty, that will only happen if we - * race with the completion of the linked work. + * race with the completion of the linked work. Splice of prev is + * done in io_req_task_link_timeout(), if needed. */ if (prev) { - io_remove_next_linked(prev); if (!req_ref_inc_not_zero(prev)) prev = NULL; } -- cgit v1.2.3 From a65855ec34aed84e1e5b4aea0323cc1745f83a5c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 11 May 2026 10:58:56 -0600 Subject: io_uring: hold uring_lock across io_kill_timeouts() in cancel path io_uring_try_cancel_requests() dropped ctx->uring_lock before calling io_kill_timeouts(), which walks each timeout's link chain via io_match_task() to test REQ_F_INFLIGHT. With chain mutation now serialized by ctx->uring_lock, that walk needs the lock too. Signed-off-by: Jens Axboe --- io_uring/cancel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 5e5eb9cfc7cd..4aa3103ba9c3 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -561,8 +561,8 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, ret |= io_waitid_remove_all(ctx, tctx, cancel_all); ret |= io_futex_remove_all(ctx, tctx, cancel_all); ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all); - mutex_unlock(&ctx->uring_lock); ret |= io_kill_timeouts(ctx, tctx, cancel_all); + mutex_unlock(&ctx->uring_lock); if (tctx) ret |= io_run_task_work() > 0; else -- cgit v1.2.3 From 94f3b133168d1c49895e7cc6afbcf1cc0b354602 Mon Sep 17 00:00:00 2001 From: Luxiao Xu Date: Mon, 11 May 2026 18:52:09 +0200 Subject: batman-adv: fix tp_meter counter underflow during shutdown batadv_tp_sender_shutdown() unconditionally decrements the "sending" atomic counter. If multiple paths (e.g. timeout, user cancel, and normal finish) call this function, the counter can underflow to -1. Since the sender logic treats any non-zero value as "still sending", a negative value causes the sender kthread to loop indefinitely. This leads to a use-after-free when the interface is removed while the zombie thread is still active. Fix this by using atomic_xchg() to ensure the counter only transitions from 1 to 0 once. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Luxiao Xu Signed-off-by: Ren Wei [sven: added missing change in batadv_tp_send] Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 066c76113fc4..a4397aa881dd 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -451,7 +451,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, enum batadv_tp_meter_reason reason) { - if (!atomic_dec_and_test(&tp_vars->sending)) + if (atomic_xchg(&tp_vars->sending, 0) != 1) return; tp_vars->reason = reason; @@ -885,7 +885,7 @@ static int batadv_tp_send(void *arg) "Meter: %s() cannot send packets (%d)\n", __func__, err); /* ensure nobody else tries to stop the thread now */ - if (atomic_dec_and_test(&tp_vars->sending)) + if (atomic_xchg(&tp_vars->sending, 0) == 1) tp_vars->reason = err; break; } -- cgit v1.2.3 From 77098e4bea37af51d3962efa88a5af2ea5e1ac57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 May 2026 11:31:03 +0200 Subject: batman-adv: tp_meter: fix tp_vars reference leak in receiver shutdown The receiver shutdown timer handler, batadv_tp_receiver_shutdown(), is responsible for releasing the tp_vars reference it holds. However, the existing logic for coordinating this release with batadv_tp_stop_all() was flawed. timer_shutdown_sync() guarantees the timer will not fire again after it returns, but it returns non-zero only when the timer was pending at the time of the call. If the timer had already expired (and batadv_tp_stop_all() would unsucessfully try to rearm itself), batadv_tp_stop_all() skips its batadv_tp_vars_put(), and batadv_tp_receiver_shutdown() fails to put its own reference as well. Fix this by introducing a new atomic variable receiving that is set to 1 when the receiver is initialized and cleared atomically with atomic_xchg() by whichever side claims it first. Only the side that observes the transition from 1 to 0 is responsible for releasing the tp_vars timer reference, eliminating the uncertainty. Cc: stable@kernel.org Fixes: 3d3cf6a7314a ("batman-adv: stop tp_meter sessions during mesh teardown") Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 13 +++++++++++-- net/batman-adv/types.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index a4397aa881dd..ca6c3f6374bc 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -8,6 +8,7 @@ #include "main.h" #include +#include #include #include #include @@ -1156,6 +1157,9 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) spin_unlock_bh(&tp_vars->unacked_lock); /* drop reference of timer */ + if (WARN_ON(atomic_xchg(&tp_vars->receiving, 0) != 1)) + return; + batadv_tp_vars_put(tp_vars); } @@ -1374,6 +1378,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, ether_addr_copy(tp_vars->other_end, icmp->orig); tp_vars->role = BATADV_TP_RECEIVER; + atomic_set(&tp_vars->receiving, 1); memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session)); tp_vars->last_recv = BATADV_TP_FIRST_SEQ; tp_vars->bat_priv = bat_priv; @@ -1546,8 +1551,12 @@ void batadv_tp_stop_all(struct batadv_priv *bat_priv) break; case BATADV_TP_RECEIVER: batadv_tp_list_detach(tp_var); - if (timer_shutdown_sync(&tp_var->timer)) - batadv_tp_vars_put(tp_var); + timer_shutdown_sync(&tp_var->timer); + + if (atomic_xchg(&tp_var->receiving, 0) != 1) + break; + + batadv_tp_vars_put(tp_var); break; } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index daa06f421154..b9c0b7779122 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1323,6 +1323,9 @@ struct batadv_tp_vars { /** @sending: sending binary semaphore: 1 if sending, 0 is not */ atomic_t sending; + /** @receiving: receiving binary semaphore: 1 if receiving, 0 is not */ + atomic_t receiving; + /** @reason: reason for a stopped session */ enum batadv_tp_meter_reason reason; -- cgit v1.2.3 From 35d0ed82d03e5ee77ea4f31f20e29562a7721649 Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Tue, 5 May 2026 11:08:12 +0200 Subject: libceph: Fix potential out-of-bounds access in osdmap_decode() When decoding osd_state and osd_weight from an incoming osdmap in osdmap_decode(), both are decoded for each osd, i.e., map->max_osd times. The ceph_decode_need() check only accounts for sizeof(*map->osd_weight) once. This can potentially result in an out-of-bounds memory access if the incoming message is corrupted such that the max_osd value exceeds the actual content of the osdmap message. This patch fixes the issue by changing the corresponding part in the ceph_decode_need() check to account for map->max_osd*sizeof(*map->osd_weight). Cc: stable@vger.kernel.org Fixes: dcbc919a5dc8 ("libceph: switch osdmap decoding to use ceph_decode_entity_addr") Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 669348d883f0..2095e73ccf6c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1705,7 +1705,7 @@ static int osdmap_decode(void **p, void *end, bool msgr2, ceph_decode_need(p, end, 3*sizeof(u32) + map->max_osd*(struct_v >= 5 ? sizeof(u32) : sizeof(u8)) + - sizeof(*map->osd_weight), e_inval); + map->max_osd*sizeof(*map->osd_weight), e_inval); if (ceph_decode_32(p) != map->max_osd) goto e_inval; -- cgit v1.2.3 From 63d2059cd665c4e5e4ca6cfbc58c6f171e7939b1 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 5 May 2026 12:09:02 -0400 Subject: pinctrl: imx1: Allow parsing DT without function nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old format to define pinctrl settings for imx in DT has two hierarchy levels. The first level are function device nodes. The second level are pingroups which contain a property fsl,pins. The original ntention was to define all pin functions in a single dtsi file and just reference the correct ones in the board files. The commit ("5fcdf6a7ed95e pinctrl: imx: Allow parsing DT without function nodes") already make moden i.MX chip support flatten layout. Make legacy chipes (more than 15 years) support this flatten layout also. Fixes: e948cbdc41d6f ("ARM: dts: imx: remove redundant intermediate node in pinmux hierarchy") Tested-by: Sébastien Szymanski Signed-off-by: Frank Li Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx1-core.c | 48 +++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c index b36c8a1461b7..b7bd4ef9c0db 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c +++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c @@ -540,10 +540,34 @@ static int imx1_pinctrl_parse_functions(struct device_node *np, return 0; } +/* + * Check if the DT contains pins in the direct child nodes. This indicates the + * newer DT format to store pins. This function returns true if the first found + * fsl,pins property is in a child of np. Otherwise false is returned. + */ +static bool imx1_pinctrl_dt_is_flat_functions(struct device_node *np) +{ + struct device_node *function_np; + struct device_node *pinctrl_np; + + for_each_child_of_node(np, function_np) { + if (of_property_present(function_np, "fsl,pins")) + return true; + + for_each_child_of_node(function_np, pinctrl_np) { + if (of_property_present(pinctrl_np, "fsl,pins")) + return false; + } + } + + return true; +} + static int imx1_pinctrl_parse_dt(struct platform_device *pdev, struct imx1_pinctrl *pctl, struct imx1_pinctrl_soc_info *info) { struct device_node *np = pdev->dev.of_node; + bool flat_funcs; int ret; u32 nfuncs = 0; u32 ngroups = 0; @@ -552,9 +576,15 @@ static int imx1_pinctrl_parse_dt(struct platform_device *pdev, if (!np) return -ENODEV; - for_each_child_of_node_scoped(np, child) { - ++nfuncs; - ngroups += of_get_child_count(child); + flat_funcs = imx1_pinctrl_dt_is_flat_functions(np); + if (flat_funcs) { + nfuncs = 1; + ngroups = of_get_child_count(np); + } else { + for_each_child_of_node_scoped(np, child) { + ++nfuncs; + ngroups += of_get_child_count(child); + } } if (!nfuncs) { @@ -574,10 +604,14 @@ static int imx1_pinctrl_parse_dt(struct platform_device *pdev, if (!info->functions || !info->groups) return -ENOMEM; - for_each_child_of_node_scoped(np, child) { - ret = imx1_pinctrl_parse_functions(child, info, ifunc++); - if (ret == -ENOMEM) - return -ENOMEM; + if (flat_funcs) { + imx1_pinctrl_parse_functions(np, info, 0); + } else { + for_each_child_of_node_scoped(np, child) { + ret = imx1_pinctrl_parse_functions(child, info, ifunc++); + if (ret == -ENOMEM) + return -ENOMEM; + } } return 0; -- cgit v1.2.3 From 5dd74441cbf42c22e874450eb6a6bbb19390a216 Mon Sep 17 00:00:00 2001 From: Guopeng Zhang Date: Sat, 9 May 2026 18:20:31 +0800 Subject: cgroup/cpuset: Reserve DL bandwidth only for root-domain moves cpuset_can_attach() currently adds the bandwidth of all migrating SCHED_DEADLINE tasks to sum_migrate_dl_bw. If the source and destination cpuset effective CPU masks do not overlap, the whole sum is then reserved in the destination root domain. set_cpus_allowed_dl(), however, subtracts bandwidth from the source root domain only when the affinity change really moves the task between root domains. A DL task can move between cpusets that are still in the same root domain, so including that task in sum_migrate_dl_bw can reserve destination bandwidth without a matching source-side subtraction. Share the root-domain move test with set_cpus_allowed_dl(). Keep nr_migrate_dl_tasks counting all migrating deadline tasks for cpuset DL task accounting, but add to sum_migrate_dl_bw only for tasks that need a root-domain bandwidth move. Keep using the destination cpuset effective CPU mask and leave the broader can_attach()/attach() transaction model unchanged. Fixes: 2ef269ef1ac0 ("cgroup/cpuset: Free DL BW in case can_attach() fails") Cc: stable@vger.kernel.org # v6.10+ Signed-off-by: Guopeng Zhang Reviewed-by: Waiman Long Acked-by: Juri Lelli Tested-by: Juri Lelli Signed-off-by: Tejun Heo --- include/linux/sched/deadline.h | 9 +++++++++ kernel/cgroup/cpuset-internal.h | 1 + kernel/cgroup/cpuset.c | 33 ++++++++++++++++++--------------- kernel/sched/deadline.c | 13 ++++++++++--- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 1198138cb839..273538200a44 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -33,6 +33,15 @@ struct root_domain; extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); extern void dl_clear_root_domain_cpu(int cpu); +/* + * Return whether moving DL task @p to @new_mask requires moving DL + * bandwidth accounting between root domains. This helper is specific to + * DL bandwidth move accounting semantics and is shared by + * cpuset_can_attach() and set_cpus_allowed_dl() so both paths use the + * same source root-domain test. + */ +extern bool dl_task_needs_bw_move(struct task_struct *p, + const struct cpumask *new_mask); extern u64 dl_cookie; extern bool dl_bw_visited(int cpu, u64 cookie); diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h index bb4e692bea30..f7aaf01f7cd5 100644 --- a/kernel/cgroup/cpuset-internal.h +++ b/kernel/cgroup/cpuset-internal.h @@ -167,6 +167,7 @@ struct cpuset { */ int nr_deadline_tasks; int nr_migrate_dl_tasks; + /* DL bandwidth that needs destination reservation for this attach. */ u64 sum_migrate_dl_bw; /* * CPU used for temporary DL bandwidth allocation during attach; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 3fbf6e7f68c3..e84e801e22cf 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2993,7 +2993,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) struct cpuset *cs, *oldcs; struct task_struct *task; bool setsched_check; - int ret; + int cpu, ret; /* used later by cpuset_attach() */ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); @@ -3038,28 +3038,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) } if (dl_task(task)) { + /* + * Count all migrating DL tasks for cpuset task accounting. + * Only tasks that need a root-domain bandwidth move + * contribute to sum_migrate_dl_bw. + */ cs->nr_migrate_dl_tasks++; - cs->sum_migrate_dl_bw += task->dl.dl_bw; + if (dl_task_needs_bw_move(task, cs->effective_cpus)) + cs->sum_migrate_dl_bw += task->dl.dl_bw; } } - if (!cs->nr_migrate_dl_tasks) + if (!cs->sum_migrate_dl_bw) goto out_success; - if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { - int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); - - if (unlikely(cpu >= nr_cpu_ids)) { - ret = -EINVAL; - goto out_unlock; - } + cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); + if (unlikely(cpu >= nr_cpu_ids)) { + ret = -EINVAL; + goto out_unlock; + } - ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); - if (ret) - goto out_unlock; + ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); + if (ret) + goto out_unlock; - cs->dl_bw_cpu = cpu; - } + cs->dl_bw_cpu = cpu; out_success: /* diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index edca7849b165..7db4c87df83b 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -3107,20 +3107,18 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) static void set_cpus_allowed_dl(struct task_struct *p, struct affinity_context *ctx) { - struct root_domain *src_rd; struct rq *rq; WARN_ON_ONCE(!dl_task(p)); rq = task_rq(p); - src_rd = rq->rd; /* * Migrating a SCHED_DEADLINE task between exclusive * cpusets (different root_domains) entails a bandwidth * update. We already made space for us in the destination * domain (see cpuset_can_attach()). */ - if (!cpumask_intersects(src_rd->span, ctx->new_mask)) { + if (dl_task_needs_bw_move(p, ctx->new_mask)) { struct dl_bw *src_dl_b; src_dl_b = dl_bw_of(cpu_of(rq)); @@ -3137,6 +3135,15 @@ static void set_cpus_allowed_dl(struct task_struct *p, set_cpus_allowed_common(p, ctx); } +bool dl_task_needs_bw_move(struct task_struct *p, + const struct cpumask *new_mask) +{ + if (!dl_task(p)) + return false; + + return !cpumask_intersects(task_rq(p)->rd->span, new_mask); +} + /* Assumes rq->lock is held */ static void rq_online_dl(struct rq *rq) { -- cgit v1.2.3 From 92f3403a7ca5d186b2bad342603410cc6adc95a3 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 6 May 2026 18:50:27 +0530 Subject: drm/xe/madvise: Track purgeability with BO-local counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_bo_recompute_purgeable_state() walks all VMAs of a BO to determine whether the BO can be made purgeable. This makes VMA create/destroy and madvise updates O(n) in the number of mappings. Replace the walk with BO-local counters protected by the BO dma-resv lock: - vma_count tracks the number of VMAs mapping the BO. - willneed_count tracks active WILLNEED holders, including WILLNEED VMAs and active dma-buf exports for non-imported BOs. A DONTNEED BO is promoted back to WILLNEED on a 0->1 transition of willneed_count. A BO is demoted to DONTNEED on a 1->0 transition only when it still has VMAs, preserving the previous behaviour where a BO with no mappings keeps its current madvise state. PURGED remains terminal, preserving the existing "once purged, always purged" rule. Fixes: 4f44961eab84 ("drm/xe/vm: Prevent binding of purged buffer objects") v2: - Use early return for imported BOs in all four helpers to avoid nesting (Matt B). - Group purgeability state into a purgeable sub-struct on struct xe_bo (Matt B). - Reword xe_bo_willneed_put_locked() kernel-doc to explain that a 1->0 transition means all remaining active VMAs are DONTNEED (Matt B). v3: - Move DONTNEED/PURGED reject from vma_lock_and_validate() into xe_vma_create(), gated on attr->purgeable_state == WILLNEED. Fixes vm_bind bypass and partial-unbind rejection on DONTNEED BOs (Matt B). - Drop .check_purged from MAP and REMAP; keep it for PREFETCH and add a comment why (Matt B). - Skip BO validation in vma_lock_and_validate() for non-WILLNEED VMA remnants so cleanup/remap paths do not repopulate DONTNEED/PURGED BOs. Suggested-by: Thomas Hellström Cc: Matthew Brost Cc: Thomas Hellström Cc: Himal Prasad Ghimiray Signed-off-by: Arvind Yadav Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260506132027.2556046-1-arvind.yadav@intel.com Signed-off-by: Himal Prasad Ghimiray (cherry picked from commit 23fb2ea56cb4fa2587bc072b04e4e698687a48e4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_bo.c | 6 +- drivers/gpu/drm/xe/xe_bo.h | 88 +++++++++++++++++++- drivers/gpu/drm/xe/xe_bo_types.h | 28 ++++++- drivers/gpu/drm/xe/xe_dma_buf.c | 28 ++++++- drivers/gpu/drm/xe/xe_vm.c | 51 +++++++++--- drivers/gpu/drm/xe/xe_vm_madvise.c | 162 +++---------------------------------- drivers/gpu/drm/xe/xe_vm_madvise.h | 2 - 7 files changed, 190 insertions(+), 175 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4075edf97421..6b518858538f 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -897,10 +897,10 @@ void xe_bo_set_purgeable_state(struct xe_bo *bo, new_state == XE_MADV_PURGEABLE_PURGED); /* Once purged, always purged - cannot transition out */ - xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED && + xe_assert(xe, !(bo->purgeable.state == XE_MADV_PURGEABLE_PURGED && new_state != XE_MADV_PURGEABLE_PURGED)); - bo->madv_purgeable = new_state; + bo->purgeable.state = new_state; xe_bo_set_purgeable_shrinker(bo, new_state); } @@ -2368,7 +2368,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, INIT_LIST_HEAD(&bo->vram_userfault_link); /* Initialize purge advisory state */ - bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED; + bo->purgeable.state = XE_MADV_PURGEABLE_WILLNEED; drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 68dea7d25a6b..6340317f7d2e 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -251,7 +251,7 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo) static inline bool xe_bo_is_purged(struct xe_bo *bo) { xe_bo_assert_held(bo); - return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED; + return bo->purgeable.state == XE_MADV_PURGEABLE_PURGED; } /** @@ -268,11 +268,95 @@ static inline bool xe_bo_is_purged(struct xe_bo *bo) static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo) { xe_bo_assert_held(bo); - return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED; + return bo->purgeable.state == XE_MADV_PURGEABLE_DONTNEED; } void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state); +/** + * xe_bo_willneed_get_locked() - Acquire a WILLNEED holder on a BO + * @bo: Buffer object + * + * Increments willneed_count and, on a 0->1 transition, promotes the BO + * from DONTNEED to WILLNEED. PURGED is terminal and is never modified. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_willneed_get_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + /* Imported BOs are owned externally; do not track purgeability. */ + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + if (bo->purgeable.willneed_count++ == 0 && xe_bo_madv_is_dontneed(bo)) + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_WILLNEED); +} + +/** + * xe_bo_willneed_put_locked() - Release a WILLNEED holder on a BO + * @bo: Buffer object + * + * Decrements willneed_count and, on a 1->0 transition, marks the BO + * DONTNEED only if it still has VMAs (implying all active VMAs are + * DONTNEED). If the last VMA is being removed, preserve the current BO + * state to match the previous VMA-walk semantics. + * + * PURGED is terminal and the BO state is never modified. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_willneed_put_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + xe_assert(xe_bo_device(bo), bo->purgeable.willneed_count > 0); + if (--bo->purgeable.willneed_count == 0 && bo->purgeable.vma_count > 0 && + !xe_bo_is_purged(bo)) + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_DONTNEED); +} + +/** + * xe_bo_vma_count_inc_locked() - Account a new VMA on a BO + * @bo: Buffer object + * + * Increments vma_count. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_vma_count_inc_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + bo->purgeable.vma_count++; +} + +/** + * xe_bo_vma_count_dec_locked() - Account a VMA removal on a BO + * @bo: Buffer object + * + * Decrements vma_count. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_vma_count_dec_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + xe_assert(xe_bo_device(bo), bo->purgeable.vma_count > 0); + bo->purgeable.vma_count--; +} + static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) { if (likely(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 9d19940b8fc0..077e35b4cdce 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -111,10 +111,32 @@ struct xe_bo { u64 min_align; /** - * @madv_purgeable: user space advise on BO purgeability, protected - * by BO's dma-resv lock. + * @purgeable: Purgeability state and accounting. + * + * All fields are protected by the BO's dma-resv lock. */ - u32 madv_purgeable; + struct { + /** + * @purgeable.state: BO purgeability state + * (WILLNEED/DONTNEED/PURGED). + */ + u32 state; + + /** + * @purgeable.vma_count: Number of VMAs currently mapping this BO. + */ + u32 vma_count; + + /** + * @purgeable.willneed_count: Number of active WILLNEED holders. + * + * Counts WILLNEED VMAs plus active dma-buf exports for + * non-imported BOs. The BO flips to DONTNEED on a 1->0 + * transition only when VMAs still exist; if the last VMA is + * removed, the previous BO state is preserved. + */ + u32 willneed_count; + } purgeable; }; #endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index b9828da15897..855d32ba314d 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -193,6 +193,18 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return 0; } +static void xe_dma_buf_release(struct dma_buf *dmabuf) +{ + struct drm_gem_object *obj = dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + xe_bo_lock(bo, false); + xe_bo_willneed_put_locked(bo); + xe_bo_unlock(bo); + + drm_gem_dmabuf_release(dmabuf); +} + static const struct dma_buf_ops xe_dmabuf_ops = { .attach = xe_dma_buf_attach, .detach = xe_dma_buf_detach, @@ -200,7 +212,7 @@ static const struct dma_buf_ops xe_dmabuf_ops = { .unpin = xe_dma_buf_unpin, .map_dma_buf = xe_dma_buf_map, .unmap_dma_buf = xe_dma_buf_unmap, - .release = drm_gem_dmabuf_release, + .release = xe_dma_buf_release, .begin_cpu_access = xe_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, @@ -241,18 +253,26 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) ret = -EINVAL; goto out_unlock; } + + xe_bo_willneed_get_locked(bo); xe_bo_unlock(bo); ret = ttm_bo_setup_export(&bo->ttm, &ctx); if (ret) - return ERR_PTR(ret); + goto out_put; buf = drm_gem_prime_export(obj, flags); - if (!IS_ERR(buf)) - buf->ops = &xe_dmabuf_ops; + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_put; + } + buf->ops = &xe_dmabuf_ops; return buf; +out_put: + xe_bo_lock(bo, false); + xe_bo_willneed_put_locked(bo); out_unlock: xe_bo_unlock(bo); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a717a2b8dea3..ab6cc1f0a789 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1120,6 +1120,25 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, xe_bo_assert_held(bo); + /* + * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This + * gates new vm_bind ioctls (user supplies WILLNEED) while + * still allowing partial-unbind / remap splits whose new VMAs + * inherit the parent's DONTNEED attr. It must also run before + * xe_bo_willneed_get_locked() below so a 0->1 holder bump + * cannot silently promote DONTNEED back to WILLNEED. + */ + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { + if (xe_bo_madv_is_dontneed(bo)) { + xe_vma_free(vma); + return ERR_PTR(-EBUSY); + } + if (xe_bo_is_purged(bo)) { + xe_vma_free(vma); + return ERR_PTR(-EINVAL); + } + } + vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); if (IS_ERR(vm_bo)) { xe_vma_free(vma); @@ -1131,6 +1150,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.gem.offset = bo_offset_or_userptr; drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); + + xe_bo_vma_count_inc_locked(bo); + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) + xe_bo_willneed_get_locked(bo); } else /* userptr or null */ { if (!is_null && !is_cpu_addr_mirror) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); @@ -1208,7 +1231,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_bo_assert_held(bo); drm_gpuva_unlink(&vma->gpuva); - xe_bo_recompute_purgeable_state(bo); + + xe_bo_vma_count_dec_locked(bo); + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) + xe_bo_willneed_put_locked(bo); } xe_vm_assert_held(vm); @@ -3016,7 +3042,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, * @res_evict: Allow evicting resources during validation * @validate: Perform BO validation * @request_decompress: Request BO decompression - * @check_purged: Reject operation if BO is purged + * @check_purged: Reject operation if BO is DONTNEED or PURGED */ struct xe_vma_lock_and_validate_flags { u32 res_evict : 1; @@ -3030,6 +3056,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); + bool validate_bo = flags.validate; int err = 0; if (bo) { @@ -3044,7 +3071,11 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = -EINVAL; /* BO already purged */ } - if (!err && flags.validate) + /* Don't validate the BO for DONTNEED/PURGED remap remnants. */ + if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED) + validate_bo = false; + + if (!err && validate_bo) err = xe_bo_validate(bo, vm, xe_vm_allow_vm_eviction(vm) && flags.res_evict, exec); @@ -3152,7 +3183,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, op->map.immediate, .request_decompress = op->map.request_decompress, - .check_purged = true, + .check_purged = false, }); break; case DRM_GPUVA_OP_REMAP: @@ -3174,7 +3205,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, .res_evict = res_evict, .validate = true, .request_decompress = false, - .check_purged = true, + .check_purged = false, }); if (!err && op->remap.next) err = vma_lock_and_validate(exec, op->remap.next, @@ -3182,7 +3213,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, .res_evict = res_evict, .validate = true, .request_decompress = false, - .check_purged = true, + .check_purged = false, }); break; case DRM_GPUVA_OP_UNMAP: @@ -3211,9 +3242,11 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, } /* - * Prefetch attempts to migrate BO's backing store without - * repopulating it first. Purged BOs have no backing store - * to migrate, so reject the operation. + * PREFETCH is the only op that still gates on BO purge state. + * MAP/REMAP handle this inside xe_vma_create() so partial + * unbind on a DONTNEED BO still works. PREFETCH skips + * xe_vma_create() and would migrate a BO with no backing + * store, so reject DONTNEED/PURGED here. */ err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index c78906dea82b..c4fb29004195 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -185,147 +185,6 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, } } -/** - * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf - * @bo: Buffer object - * - * Prevent marking imported or exported dma-bufs as purgeable. - * For imported BOs, Xe doesn't own the backing store and cannot - * safely reclaim pages (exporter or other devices may still be - * using them). For exported BOs, external devices may have active - * mappings we cannot track. - * - * Return: true if BO is imported or exported, false otherwise - */ -static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo) -{ - struct drm_gem_object *obj = &bo->ttm.base; - - /* Imported: exporter owns backing store */ - if (drm_gem_is_imported(obj)) - return true; - - /* Exported: external devices may be accessing */ - if (obj->dma_buf) - return true; - - return false; -} - -/** - * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation - * - * Distinguishes whether a BO's VMAs are all DONTNEED, have at least - * one WILLNEED, or have no VMAs at all. - * - * Enum values align with XE_MADV_PURGEABLE_* states for consistency. - */ -enum xe_bo_vmas_purge_state { - /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */ - XE_BO_VMAS_STATE_WILLNEED = 0, - /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */ - XE_BO_VMAS_STATE_DONTNEED = 1, - /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */ - XE_BO_VMAS_STATE_NO_VMAS = 2, -}; - -/* - * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and - * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across - * both enums so the single-line cast is always valid. - */ -static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED, - "VMA purge state WILLNEED must equal madv purgeable WILLNEED"); -static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED, - "VMA purge state DONTNEED must equal madv purgeable DONTNEED"); - -/** - * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state - * @bo: Buffer object - * - * Check all VMAs across all VMs to determine aggregate purgeable state. - * Shared BOs require unanimous DONTNEED state from all mappings. - * - * Caller must hold BO dma-resv lock. - * - * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED, - * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED, - * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs - */ -static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo) -{ - struct drm_gpuvm_bo *vm_bo; - struct drm_gpuva *gpuva; - struct drm_gem_object *obj = &bo->ttm.base; - bool has_vmas = false; - - xe_bo_assert_held(bo); - - /* Shared dma-bufs cannot be purgeable */ - if (xe_bo_is_dmabuf_shared(bo)) - return XE_BO_VMAS_STATE_WILLNEED; - - drm_gem_for_each_gpuvm_bo(vm_bo, obj) { - drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - - has_vmas = true; - - /* Any non-DONTNEED VMA prevents purging */ - if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED) - return XE_BO_VMAS_STATE_WILLNEED; - } - } - - /* - * No VMAs => preserve existing BO purgeable state. - * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped. - */ - if (!has_vmas) - return XE_BO_VMAS_STATE_NO_VMAS; - - return XE_BO_VMAS_STATE_DONTNEED; -} - -/** - * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs - * @bo: Buffer object - * - * Walk all VMAs to determine if BO should be purgeable or not. - * Shared BOs require unanimous DONTNEED state from all mappings. - * If the BO has no VMAs the existing state is preserved. - * - * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists, - * VM lock must also be held (write) to prevent concurrent VMA modifications. - * This is satisfied at both call sites: - * - xe_vma_destroy(): holds vm->lock write - * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path) - * - * Return: nothing - */ -void xe_bo_recompute_purgeable_state(struct xe_bo *bo) -{ - enum xe_bo_vmas_purge_state vma_state; - - if (!bo) - return; - - xe_bo_assert_held(bo); - - /* - * Once purged, always purged. Cannot transition back to WILLNEED. - * This matches i915 semantics where purged BOs are permanently invalid. - */ - if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED) - return; - - vma_state = xe_bo_all_vmas_dontneed(bo); - - if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable && - vma_state != XE_BO_VMAS_STATE_NO_VMAS) - xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state); -} - /** * madvise_purgeable - Handle purgeable buffer object advice * @xe: XE device @@ -359,12 +218,6 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, /* BO must be locked before modifying madv state */ xe_bo_assert_held(bo); - /* Skip shared dma-bufs - no PTEs to zap */ - if (xe_bo_is_dmabuf_shared(bo)) { - vmas[i]->skip_invalidation = true; - continue; - } - /* * Once purged, always purged. Cannot transition back to WILLNEED. * This matches i915 semantics where purged BOs are permanently invalid. @@ -377,13 +230,14 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, switch (op->purge_state_val.val) { case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED: - vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; vmas[i]->skip_invalidation = true; - - xe_bo_recompute_purgeable_state(bo); + /* Only act on a real DONTNEED -> WILLNEED transition. */ + if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_DONTNEED) { + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; + xe_bo_willneed_get_locked(bo); + } break; case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED: - vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; /* * Don't zap PTEs at DONTNEED time -- pages are still * alive. The zap happens in xe_bo_move_notify() right @@ -391,7 +245,11 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, */ vmas[i]->skip_invalidation = true; - xe_bo_recompute_purgeable_state(bo); + /* Only act on a real WILLNEED -> DONTNEED transition. */ + if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; + xe_bo_willneed_put_locked(bo); + } break; default: /* Should never hit - values validated in madvise_args_are_sane() */ diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h index 39acd2689ca0..a3078f634c7e 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.h +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -13,6 +13,4 @@ struct xe_bo; int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void xe_bo_recompute_purgeable_state(struct xe_bo *bo); - #endif -- cgit v1.2.3 From 4568dfbb8363a153e7cef384d23cc6d6563ff316 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 7 May 2026 14:00:15 -0700 Subject: drm/xe: Make decision to use Xe2-style blitter instructions a feature flag The blitter engines' MEM_COPY and MEM_SET instructions were added as part of the same hardware change that introduced service copy engines (i.e., BCS1-BCS8) which is why the driver checks for service copy engine presence when deciding whether to use these instructions or the older XY_* instructions. However when making this decision the driver should consider which engines are part of the hardware architecture, not which engines are present/usable on the current device. For graphics IP versions that architecturally include service copy engines (i.e., everything Xe2 and later, plus PVC's Xe_HPC) we should use MEM_SET and MEM_COPY even in if all of the service copy engines wind up getting fused off. I.e., we need to decide based on whether the platform's graphics descriptor contains these engines, rather than whether the usable engine mask contains them. This logic got broken when gt->info.__engine_mask was removed, although in practice that mistake has been harmless so far because there haven't been any hardware SKUs that fuse off all of the service copy engines yet. Replace the incorrect has_service_copy_support() function with a GT feature flag that tracks more accurately whether the new blitter instructions are usable. In addition to fixing incorrect logic if all service copies are fused off, the flag also makes it more obvious what the calling code is trying to do; previously it wasn't terribly obvious why "has service copy engines" was being used as the condition for using different instructions on all copy engine types. The new feature flag is named 'has_xe2_blt_instructions' because we expect this flag to be set for all Xe2 and later platforms (i.e., everything officially supported by the Xe driver). Technically there's also one Xe1-era platform (PVC) that supports these engines/instructions and will set this flag, but this still seems to be the most clear and understandable name for the flag. Fixes: 61549a2ee594 ("drm/xe: Drop __engine_mask") Cc: Balasubramani Vivekanandan Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260507-xe2_copy-v1-1-26506381b821@intel.com Signed-off-by: Matt Roper (cherry picked from commit 09b399842907565a64e351fb22da790b4c673ffb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_types.h | 7 +++++++ drivers/gpu/drm/xe/xe_migrate.c | 18 ++---------------- drivers/gpu/drm/xe/xe_pci.c | 9 +++++++++ 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8b55cf25a75f..fffb5d631b69 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -144,6 +144,13 @@ struct xe_gt { u8 id; /** @info.has_indirect_ring_state: GT has indirect ring state support */ u8 has_indirect_ring_state:1; + /** + * @info.has_xe2_blt_instructions: GT supports Xe2-style MEM_SET + * and MEM_COPY blitter functionality. Note that despite the + * name, some Xe1 platforms may also support this "Xe2-style" + * feature. + */ + u8 has_xe2_blt_instructions:1; /** * @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse * registers the geometry XeCore mask spans. diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 5fdc89ed5256..a22413f892a0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1524,23 +1524,9 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, bb->len += len; } -static bool has_service_copy_support(struct xe_gt *gt) -{ - /* - * What we care about is whether the architecture was designed with - * service copy functionality (specifically the new MEM_SET / MEM_COPY - * instructions) so check the architectural engine list rather than the - * actual list since these instructions are usable on BCS0 even if - * all of the actual service copy engines (BCS1-BCS8) have been fused - * off. - */ - return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8, - XE_HW_ENGINE_BCS1); -} - static u32 emit_clear_cmd_len(struct xe_gt *gt) { - if (has_service_copy_support(gt)) + if (gt->info.has_xe2_blt_instructions) return PVC_MEM_SET_CMD_LEN_DW; else return XY_FAST_COLOR_BLT_DW; @@ -1549,7 +1535,7 @@ static u32 emit_clear_cmd_len(struct xe_gt *gt) static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch, bool is_vram) { - if (has_service_copy_support(gt)) + if (gt->info.has_xe2_blt_instructions) emit_clear_link_copy(gt, bb, src_ofs, size, pitch); else emit_clear_main_copy(gt, bb, src_ofs, size, pitch, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 9f98d0334164..c2ecd27ec770 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -851,6 +851,15 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, gt->info.num_geometry_xecore_fuse_regs = graphics_desc->num_geometry_xecore_fuse_regs; gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs; + /* + * Even if the service copy engines wind up being fused off, their + * presence in the IP descriptor indicates that the platform supports + * Xe2-style MEM_SET and MEM_COPY functionality. + */ + if (graphics_desc->hw_engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1)) + gt->info.has_xe2_blt_instructions = true; + /* * Before media version 13, the media IP was part of the primary GT * so we need to add the media engines to the primary GT's engine list. -- cgit v1.2.3 From 981bedbbe61364fcc3a3b87ebaf648a66cd07108 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 8 May 2026 11:26:36 +0100 Subject: drm/xe/dma-buf: handle empty bo and UAF races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There look to be some nasty races here when triggering the invalidate_mappings hook: 1) We do xe_bo_alloc() followed by the attach, before the actual full bo init step in xe_dma_buf_init_obj(). However the bo is visible on the attachments list after the attach. This is bad since exporter driver, say amdgpu, can at any time call back into our invalidate_mappings hook, with an empty/bogus bo, leading to potential bugs/crashes. 2) Similar to 1) but here we get a UAF, when the invalidate_mappings hook is triggered. For example, we get as far as xe_bo_init_locked() but this fails in some way. But here the bo will be freed on error, but we still have it attached from dma-buf pov, so if the invalidate_mappings is now triggered then the bo we access is gone and we trigger UAF and more bugs/crashes. To fix this, move the attach step until after we actually have a fully set up buffer object. Note that the bo is not published to userspace until later, so not sure what the comment "Don't publish the bo until we have a valid attachment", is referring to. We have at least two different customers reporting hitting a NULL ptr deref in evict_flags when importing something from amdgpu, followed by triggering the evict flow. Hit rate is also pretty low, which would hint at some kind of race, so something like 1) or 2) might explain this. v2: - Shuffle the order of the ops slightly (no functional change) - Improve the comment to better explain the ordering (Matt B) Assisted-by: Gemini:gemini-3 #debug Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/work_items/7903 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/work_items/4055 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Acked-by: Thomas Hellström Link: https://patch.msgid.link/20260508102635.149172-3-matthew.auld@intel.com (cherry picked from commit af1f2ad0c59fe4e2f924c526f66e968289d77971) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_dma_buf.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 855d32ba314d..f7b47e9d1a4c 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -377,15 +377,25 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, } } - /* - * Don't publish the bo until we have a valid attachment, and a - * valid attachment needs the bo address. So pre-create a bo before - * creating the attachment and publish. - */ bo = xe_bo_alloc(); if (IS_ERR(bo)) return ERR_CAST(bo); + /* + * xe_dma_buf_init_obj() takes ownership of the raw bo, so do not touch + * on fail, since it will already take care of cleanup. On success we + * still need to drop the ref, if something later fails. + * + * In addition this needs to happen before the attach, since + * it will create a new attachment for this, and add it to the list of + * attachments, at which point it is globally visible, and at any point + * the export side can call into on invalidate_mappings callback, which + * require a working object. + */ + obj = xe_dma_buf_init_obj(dev, bo, dma_buf); + if (IS_ERR(obj)) + return obj; + attach_ops = &xe_dma_buf_attach_ops; #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) if (test) @@ -398,21 +408,12 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, goto out_err; } - /* - * xe_dma_buf_init_obj() takes ownership of bo on both success - * and failure, so we must not touch bo after this call. - */ - obj = xe_dma_buf_init_obj(dev, bo, dma_buf); - if (IS_ERR(obj)) { - dma_buf_detach(dma_buf, attach); - return obj; - } get_dma_buf(dma_buf); obj->import_attach = attach; return obj; out_err: - xe_bo_free(bo); + xe_bo_put(bo); return obj; } -- cgit v1.2.3 From 155a372a1cc50fa93387c5d3cdfd614a61e1afd1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 8 May 2026 11:26:37 +0100 Subject: drm/xe/dma-buf: fix UAF with retry loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Retry doesn't work here, since bo will be freed on error, leading to UAF. However, now that we do the alloc & init before the attach, we can now combine this as one unit and have the init do the alloc for us. This should make the retry safe. Reported by Sashiko. v2: Fix up the error unwind (CI) Closes: https://sashiko.dev/#/patchset/20260506184332.86743-2-matthew.auld%40intel.com Fixes: eb289a5f6cc6 ("drm/xe: Convert xe_dma_buf.c for exhaustive eviction") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Cc: # v6.18+ Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20260508102635.149172-4-matthew.auld@intel.com (cherry picked from commit 479669418253e0f27f8cf5db01a731352ea592e7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_dma_buf.c | 49 ++++++++++------------------------------- 1 file changed, 12 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index f7b47e9d1a4c..8a920e58245c 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -278,16 +278,8 @@ out_unlock: return ERR_PTR(ret); } -/* - * Takes ownership of @storage: on success it is transferred to the returned - * drm_gem_object; on failure it is freed before returning the error. - * This matches the contract of xe_bo_init_locked() which frees @storage on - * its error paths, so callers need not (and must not) free @storage after - * this call. - */ static struct drm_gem_object * -xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, - struct dma_buf *dma_buf) +xe_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) { struct dma_resv *resv = dma_buf->resv; struct xe_device *xe = to_xe_device(dev); @@ -298,10 +290,8 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret = 0; dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!dummy_obj) { - xe_bo_free(storage); + if (!dummy_obj) return ERR_PTR(-ENOMEM); - } dummy_obj->resv = resv; xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { @@ -310,8 +300,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, if (ret) break; - /* xe_bo_init_locked() frees storage on error */ - bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + bo = xe_bo_init_locked(xe, NULL, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); drm_exec_retry_on_contention(&exec); @@ -362,7 +351,6 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, const struct dma_buf_attach_ops *attach_ops; struct dma_buf_attachment *attach; struct drm_gem_object *obj; - struct xe_bo *bo; if (dma_buf->ops == &xe_dmabuf_ops) { obj = dma_buf->priv; @@ -377,22 +365,14 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, } } - bo = xe_bo_alloc(); - if (IS_ERR(bo)) - return ERR_CAST(bo); - /* - * xe_dma_buf_init_obj() takes ownership of the raw bo, so do not touch - * on fail, since it will already take care of cleanup. On success we - * still need to drop the ref, if something later fails. - * - * In addition this needs to happen before the attach, since - * it will create a new attachment for this, and add it to the list of - * attachments, at which point it is globally visible, and at any point - * the export side can call into on invalidate_mappings callback, which - * require a working object. + * This needs to happen before the attach, since it will create a new + * attachment for this, and add it to the list of attachments, at which + * point it is globally visible, and at any point the export side can + * call into on invalidate_mappings callback, which require a working + * object. */ - obj = xe_dma_buf_init_obj(dev, bo, dma_buf); + obj = xe_dma_buf_create_obj(dev, dma_buf); if (IS_ERR(obj)) return obj; @@ -402,20 +382,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, attach_ops = test->attach_ops; #endif - attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, obj); if (IS_ERR(attach)) { - obj = ERR_CAST(attach); - goto out_err; + xe_bo_put(gem_to_xe_bo(obj)); + return ERR_CAST(attach); } get_dma_buf(dma_buf); obj->import_attach = attach; return obj; - -out_err: - xe_bo_put(bo); - - return obj; } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) -- cgit v1.2.3 From d5971c5c34303a00bf841a902ca00a703602c500 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 20 Apr 2026 20:18:43 +0200 Subject: drm/amdgpu: remove deadlocks from amdgpu_userq_pre_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose of a GPU reset is to make sure that fence can be signaled again and the signal and resume workers can make progress again. So waiting for the resume worker or any fence in the GPU reset path is just utterly nonsense. Signed-off-by: Christian König Reviewed-by: Prike Liang Signed-off-by: Alex Deucher (cherry picked from commit fcd5f065eab46993af43442fd77ee8d9eb9c5bdf) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index de140a8ed135..692f7e3513df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -1504,23 +1504,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev) { const struct amdgpu_userq_funcs *userq_funcs; struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm; unsigned long queue_id; + /* TODO: We probably need a new lock for the queue state */ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { - uqm = queue->userq_mgr; - cancel_delayed_work_sync(&uqm->resume_work); - if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { - amdgpu_userq_wait_for_last_fence(queue); - userq_funcs = adev->userq_funcs[queue->queue_type]; - userq_funcs->unmap(queue); - /* just mark all queues as hung at this point. - * if unmap succeeds, we could map again - * in amdgpu_userq_post_reset() if vram is not lost - */ - queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_fence_driver_force_completion(queue); - } + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) + continue; + + userq_funcs = adev->userq_funcs[queue->queue_type]; + userq_funcs->unmap(queue); + /* just mark all queues as hung at this point. + * if unmap succeeds, we could map again + * in amdgpu_userq_post_reset() if vram is not lost + */ + queue->state = AMDGPU_USERQ_STATE_HUNG; + amdgpu_userq_fence_driver_force_completion(queue); } } -- cgit v1.2.3 From 44e5bc73bdcbec115cfe1c4b4394d25eb3deaff2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 16 Apr 2026 15:32:11 +0200 Subject: drm/amdgpu: rework amdgpu_userq_signal_ioctl v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This one was fortunately not looking so bad as the wait ioctl path, but there were still a few things which could be fixed/improved: 1. Allocating with GFP_ATOMIC was quite unnecessary, we can do that before taking the userq_lock. 2. Use a new mutex as protection for the fence_drv_xa so that we can do memory allocations while holding it. 3. Starting the reset timer is unnecessary when the fence is already signaled when we create it. 4. Cleanup error handling, avoid trying to free the queue when we don't even got one. v2: fix incorrect usage of xa_find, destroy the new mutex on error v3: cleanup ref ordering Signed-off-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 1609eb0f81a609d350169839128cecf298c84e7a) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 12 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 226 +++++++++++------------- 3 files changed, 119 insertions(+), 121 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 692f7e3513df..2d4f159f3362 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -800,6 +800,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; + mutex_init(&queue->fence_drv_lock); xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) { @@ -873,6 +874,7 @@ clean_mapping: amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(fpriv->vm.root.bo); + mutex_destroy(&queue->fence_drv_lock); free_queue: kfree(queue); err_pm_runtime: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 8b8f345b60b6..843ea8ecc5d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -66,6 +66,18 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; + + /** + * @fence_drv_lock: Protecting @fence_drv_xa. + */ + struct mutex fence_drv_lock; + + /** + * @fence_drv_xa: + * + * References to the external fence drivers returned by wait_ioctl. + * Dropped on the next signaled dma_fence or queue destruction. + */ struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *last_fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index e2d5f04296e1..c9dbf03c4eea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -121,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) userq->last_fence = NULL; amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); xa_destroy(&userq->fence_drv_xa); + mutex_destroy(&userq->fence_drv_lock); /* Drop the queue's ownership reference to fence_drv explicitly */ amdgpu_userq_fence_driver_put(userq->fence_drv); } @@ -209,80 +210,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence **pfence) { - *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); - return *userq_fence ? 0 : -ENOMEM; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; + struct amdgpu_userq_fence *userq_fence; + void *entry; + + userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL); + if (!userq_fence) + return -ENOMEM; + + /* + * Get the next unused entry, since we fill from the start this can be + * used as size to allocate the array. + */ + mutex_lock(&userq->fence_drv_lock); + XA_STATE(xas, &userq->fence_drv_xa, 0); + + rcu_read_lock(); + do { + entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index, + sizeof(fence_drv), + GFP_KERNEL); + if (!userq_fence->fence_drv_array) { + mutex_unlock(&userq->fence_drv_lock); + kfree(userq_fence); + return -ENOMEM; + } + + userq_fence->fence_drv_array_count = xas.xa_index; + xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array, + 0, ULONG_MAX, xas.xa_index, XA_PRESENT); + xa_destroy(&userq->fence_drv_xa); + + mutex_unlock(&userq->fence_drv_lock); + + amdgpu_userq_fence_driver_get(fence_drv); + userq_fence->fence_drv = fence_drv; + + *pfence = userq_fence; + return 0; } -static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - struct amdgpu_userq_fence *userq_fence, - u64 seq, struct dma_fence **f) +static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *fence, + u64 seq) { - struct amdgpu_userq_fence_driver *fence_drv; - struct dma_fence *fence; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; unsigned long flags; bool signaled = false; - fence_drv = userq->fence_drv; - if (!fence_drv) - return -EINVAL; - - spin_lock_init(&userq_fence->lock); - INIT_LIST_HEAD(&userq_fence->link); - fence = &userq_fence->base; - userq_fence->fence_drv = fence_drv; - - dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + spin_lock_init(&fence->lock); + dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock, fence_drv->context, seq); - amdgpu_userq_fence_driver_get(fence_drv); - dma_fence_get(fence); - - if (!xa_empty(&userq->fence_drv_xa)) { - struct amdgpu_userq_fence_driver *stored_fence_drv; - unsigned long index, count = 0; - int i = 0; - - xa_lock(&userq->fence_drv_xa); - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) - count++; - - userq_fence->fence_drv_array = - kvmalloc_objs(struct amdgpu_userq_fence_driver *, count, - GFP_ATOMIC); - - if (userq_fence->fence_drv_array) { - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { - userq_fence->fence_drv_array[i] = stored_fence_drv; - __xa_erase(&userq->fence_drv_xa, index); - i++; - } - } - - userq_fence->fence_drv_array_count = i; - xa_unlock(&userq->fence_drv_xa); - } else { - userq_fence->fence_drv_array = NULL; - userq_fence->fence_drv_array_count = 0; - } + /* Make sure the fence is visible to the hang detect worker */ + dma_fence_put(userq->last_fence); + userq->last_fence = dma_fence_get(&fence->base); - /* Check if hardware has already processed the job */ + /* Check if hardware has already processed the fence */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled(fence)) { - list_add_tail(&userq_fence->link, &fence_drv->fences); + if (!dma_fence_is_signaled(&fence->base)) { + dma_fence_get(&fence->base); + list_add_tail(&fence->link, &fence_drv->fences); } else { + INIT_LIST_HEAD(&fence->link); signaled = true; - dma_fence_put(fence); } spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); if (signaled) - amdgpu_userq_fence_put_fence_drv_array(userq_fence); - - *f = fence; - - return 0; + amdgpu_userq_fence_put_fence_drv_array(fence); + else + amdgpu_userq_start_hang_detect_work(userq); } static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) @@ -403,11 +408,6 @@ map_error: return r; } -static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) -{ - dma_fence_put(fence); -} - static void amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, int error) @@ -451,13 +451,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, const unsigned int num_read_bo_handles = args->num_bo_read_handles; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_gem_object **gobj_write, **gobj_read; u32 *syncobj_handles, num_syncobj_handles; - struct amdgpu_userq_fence *userq_fence; - struct amdgpu_usermode_queue *queue = NULL; - struct drm_syncobj **syncobj = NULL; - struct dma_fence *fence; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *fence; + struct drm_syncobj **syncobj; struct drm_exec exec; + void __user *ptr; int r, i, entry; u64 wptr; @@ -469,13 +470,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, return -EINVAL; num_syncobj_handles = args->num_syncobj_handles; - syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles), - num_syncobj_handles, sizeof(u32)); + ptr = u64_to_user_ptr(args->syncobj_handles); + syncobj_handles = memdup_array_user(ptr, num_syncobj_handles, + sizeof(u32)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); - /* Array of pointers to the looked up syncobjs */ - syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), + GFP_KERNEL); if (!syncobj) { r = -ENOMEM; goto free_syncobj_handles; @@ -489,21 +491,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_read_handles), - num_read_bo_handles, - &gobj_read); + ptr = u64_to_user_ptr(args->bo_read_handles); + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); if (r) goto free_syncobj; - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_write_handles), - num_write_bo_handles, + ptr = u64_to_user_ptr(args->bo_write_handles); + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, &gobj_write); if (r) goto put_gobj_read; - /* Retrieve the user queue */ queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; @@ -512,73 +510,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); if (r) - goto put_gobj_write; + goto put_queue; - r = amdgpu_userq_fence_alloc(&userq_fence); + r = amdgpu_userq_fence_alloc(queue, &fence); if (r) - goto put_gobj_write; + goto put_queue; /* We are here means UQ is active, make sure the eviction fence is valid */ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); - kfree(userq_fence); - goto put_gobj_write; - } + /* Create the new fence */ + amdgpu_userq_fence_init(queue, fence, wptr); - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - amdgpu_userq_start_hang_detect_work(queue); mutex_unlock(&userq_mgr->userq_mutex); + /* + * This needs to come after the fence is created since + * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv + * locks. + */ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); - /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_read, + num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_write, + num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } } - for (i = 0; i < num_read_bo_handles; i++) { - if (!gobj_read || !gobj_read[i]->resv) - continue; - - dma_resv_add_fence(gobj_read[i]->resv, fence, + /* And publish the new fence in the BOs and syncobj */ + for (i = 0; i < num_read_bo_handles; i++) + dma_resv_add_fence(gobj_read[i]->resv, &fence->base, DMA_RESV_USAGE_READ); - } - for (i = 0; i < num_write_bo_handles; i++) { - if (!gobj_write || !gobj_write[i]->resv) - continue; - - dma_resv_add_fence(gobj_write[i]->resv, fence, + for (i = 0; i < num_write_bo_handles; i++) + dma_resv_add_fence(gobj_write[i]->resv, &fence->base, DMA_RESV_USAGE_WRITE); - } - /* Add the created fence to syncobj/BO's */ for (i = 0; i < num_syncobj_handles; i++) - drm_syncobj_replace_fence(syncobj[i], fence); + drm_syncobj_replace_fence(syncobj[i], &fence->base); +exec_fini: /* drop the reference acquired in fence creation function */ - dma_fence_put(fence); + dma_fence_put(&fence->base); -exec_fini: drm_exec_fini(&exec); +put_queue: + amdgpu_userq_put(queue); put_gobj_write: for (i = 0; i < num_write_bo_handles; i++) drm_gem_object_put(gobj_write[i]); @@ -589,15 +575,11 @@ put_gobj_read: kvfree(gobj_read); free_syncobj: while (entry-- > 0) - if (syncobj[entry]) - drm_syncobj_put(syncobj[entry]); + drm_syncobj_put(syncobj[entry]); kfree(syncobj); free_syncobj_handles: kfree(syncobj_handles); - if (queue) - amdgpu_userq_put(queue); - return r; } @@ -872,8 +854,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, * Otherwise, we would gather those references until we don't * have any more space left and crash. */ + mutex_lock(&waitq->fence_drv_lock); r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&waitq->fence_drv_lock); if (r) goto put_waitq; -- cgit v1.2.3 From d0053441ad7eaf7920b71e2263097ece53c5af34 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 20 Apr 2026 15:13:57 +0200 Subject: drm/amdgpu: remove almost all calls to amdgpu_userq_detect_and_reset_queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Well the reset handling seems broken on multiple levels. As first step of fixing this remove most calls to the hang detection. That function should only be called after we run into a timeout! And *NOT* as random check spread over the code in multiple places. Signed-off-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 71bea36b54ccfb14cbc90f94267af6369af4e702) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 38 ++++++++++++------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 2d4f159f3362..ba03d2a42e1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -345,23 +345,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { r = userq_funcs->preempt(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_PREEMPTED; } } - - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue) @@ -390,24 +385,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || - (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + r = userq_funcs->unmap(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_UNMAPPED; } } - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) @@ -416,19 +408,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { r = userq_funcs->map(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_detect_and_reset_queues(uq_mgr); + return r; } else { queue->state = AMDGPU_USERQ_STATE_MAPPED; } } - return r; + return 0; } static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) @@ -654,7 +646,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que #if defined(CONFIG_DEBUG_FS) debugfs_remove_recursive(queue->debugfs_queue); #endif - amdgpu_userq_detect_and_reset_queues(uq_mgr); r = amdgpu_userq_unmap_helper(queue); atomic_dec(&uq_mgr->userq_count[queue->queue_type]); amdgpu_userq_cleanup(queue); @@ -1264,7 +1255,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) unsigned long queue_id; int ret = 0, r; - amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { r = amdgpu_userq_preempt_helper(queue); @@ -1272,9 +1262,11 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) ret = r; } - if (ret) + if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); + amdgpu_userq_detect_and_reset_queues(uq_mgr); + } return ret; } @@ -1374,7 +1366,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); guard(mutex)(&uqm->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uqm); if (adev->in_s0ix) r = amdgpu_userq_preempt_helper(queue); else @@ -1433,7 +1424,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - amdgpu_userq_detect_and_reset_queues(uqm); r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; -- cgit v1.2.3 From 0071e01c61aa73f22edd5252f0a3e2c2eff744d6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 20 Apr 2026 16:08:35 +0200 Subject: drm/amdgpu: fix userq hang detection and reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix lock inversions pointed out by Prike and Sunil. The hang detection timeout *CAN'T* grab locks under which we wait for fences, especially not the userq_mutex lock. Then instead of this completely broken handling with the hang_detect_fence just cancel the work when fences are processed and re-start if necessary. Signed-off-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit 1b62077f045ac6ffde7c97005c6659569ac5c1ec) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 65 ++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 17 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 2 +- 4 files changed, 40 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index ba03d2a42e1e..70d74f04d2dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -106,9 +106,6 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) int r = 0; int i; - /* Warning if current process mutex is not held */ - WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex)); - if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); return 0; @@ -127,9 +124,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) */ for (i = 0; i < num_queue_types; i++) { int ring_type = queue_types[i]; - const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type]; + const struct amdgpu_userq_funcs *funcs = + adev->userq_funcs[ring_type]; - if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE)) + if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, + AMDGPU_RESET_TYPE_PER_QUEUE)) continue; if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 && @@ -150,38 +149,22 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) static void amdgpu_userq_hang_detect_work(struct work_struct *work) { - struct amdgpu_usermode_queue *queue = container_of(work, - struct amdgpu_usermode_queue, - hang_detect_work.work); - struct dma_fence *fence; - struct amdgpu_userq_mgr *uq_mgr; - - if (!queue->userq_mgr) - return; - - uq_mgr = queue->userq_mgr; - fence = READ_ONCE(queue->hang_detect_fence); - /* Fence already signaled – no action needed */ - if (!fence || dma_fence_is_signaled(fence)) - return; + struct amdgpu_usermode_queue *queue = + container_of(work, struct amdgpu_usermode_queue, + hang_detect_work.work); - mutex_lock(&uq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uq_mgr); - mutex_unlock(&uq_mgr->userq_mutex); + amdgpu_userq_detect_and_reset_queues(queue->userq_mgr); } /* * Start hang detection for a user queue fence. A delayed work will be scheduled - * to check if the fence is still pending after the timeout period. -*/ + * to reset the queues when the fence doesn't signal in time. + */ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev; unsigned long timeout_ms; - if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev) - return; - adev = queue->userq_mgr->adev; /* Determine timeout based on queue type */ switch (queue->queue_type) { @@ -199,8 +182,6 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - /* Store the fence to monitor and schedule hang detection */ - WRITE_ONCE(queue->hang_detect_fence, queue->last_fence); schedule_delayed_work(&queue->hang_detect_work, msecs_to_jiffies(timeout_ms)); } @@ -210,18 +191,24 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) struct xarray *xa = &adev->userq_doorbell_xa; struct amdgpu_usermode_queue *queue; unsigned long flags; + int r; xa_lock_irqsave(xa, flags); queue = xa_load(xa, doorbell); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); -} + if (queue) { + r = amdgpu_userq_fence_driver_process(queue->fence_drv); + /* + * We are in interrupt context here, this *can't* wait for + * reset work to finish. + */ + if (r >= 0) + cancel_delayed_work(&queue->hang_detect_work); -static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue) -{ - INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); - queue->hang_detect_fence = NULL; + /* Restart the timer when there are still fences pending */ + if (r == 1) + amdgpu_userq_start_hang_detect_work(queue); + } + xa_unlock_irqrestore(xa, flags); } static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, @@ -640,7 +627,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que amdgpu_bo_unreserve(vm->root.bo); mutex_lock(&uq_mgr->userq_mutex); - queue->hang_detect_fence = NULL; amdgpu_userq_wait_for_last_fence(queue); #if defined(CONFIG_DEBUG_FS) @@ -847,7 +833,8 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) up_read(&adev->reset_domain->sem); amdgpu_debugfs_userq_init(filp, queue, qid); - amdgpu_userq_init_hang_detect_work(queue); + INIT_DELAYED_WORK(&queue->hang_detect_work, + amdgpu_userq_hang_detect_work); args->out.queue_id = qid; atomic_inc(&uq_mgr->userq_count[queue->queue_type]); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 843ea8ecc5d7..85f460e7c31b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -85,7 +85,6 @@ struct amdgpu_usermode_queue { int priority; struct dentry *debugfs_queue; struct delayed_work hang_detect_work; - struct dma_fence *hang_detect_fence; struct kref refcount; struct list_head userq_va_list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index c9dbf03c4eea..53a8944bab05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -135,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) userq_fence->fence_drv_array_count = 0; } -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +/* + * Returns: + * -ENOENT when no fences were processes + * 1 when more fences are pending + * 0 when no fences are pending any more + */ +int +amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) { struct amdgpu_userq_fence *userq_fence, *tmp; LIST_HEAD(to_be_signaled); @@ -143,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d unsigned long flags; u64 rptr; - if (!fence_drv) - return; - spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); @@ -158,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d &userq_fence->link); spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + if (list_empty(&to_be_signaled)) + return -ENOENT; + list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { fence = &userq_fence->base; list_del_init(&userq_fence->link); @@ -169,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d dma_fence_put(fence); } + /* That doesn't need to be accurate so no locking */ + return list_empty(&fence_drv->fences) ? 0 : 1; } void amdgpu_userq_fence_driver_destroy(struct kref *ref) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d355a0eecc07..0bd51616cef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -63,7 +63,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_userq_fence_driver **fence_drv_req); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, -- cgit v1.2.3 From 183182235f6d53bac62c6c39014738a54a68dfa6 Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Tue, 5 May 2026 09:05:37 +0800 Subject: drm/amd/display: Wrap DCN32 phantom-plane allocation in DC_RUN_WITH_PREEMPTION_ENABLED [Why] dcn32_validate_bandwidth() wraps dcn32_internal_validate_bw() with DC_FP_START()/DC_FP_END(). In x86 non-RT, DC_FP_START takes fpregs_lock(), which disables local softirqs. The DML1 path through dcn32_enable_phantom_plane() calls kvzalloc() to allocate ~335 KiB for dc_plane_state. This triggers the vmalloc path, which calls BUG_ON(in_interrupt()) because it's invoked within the FPU-enabled (softirq disabled) region, leading to a kernel crash. [How] Wrap the dc_state_create_phantom_plane() call with the DC_RUN_WITH_PREEMPTION_ENABLED() macro to allow preemption during this memory allocation. Fixes: 235c67634230 ("drm/amd/display: add DCN32/321 specific files for Display Core") Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4470 Reviewed-by: Aurabindo Pillai Signed-off-by: Mikhail Gavrilov Signed-off-by: James Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 885ccbef7b94a8b38f69c4211c679021aa27ad11) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 82f81b586986..3751f7a94a05 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -92,9 +92,14 @@ #include "dml/dcn32/dcn32_fpu.h" #include "dc_state_priv.h" +#include "dc_fpu.h" #include "dml2_0/dml2_wrapper.h" +#if !defined(DC_RUN_WITH_PREEMPTION_ENABLED) +#define DC_RUN_WITH_PREEMPTION_ENABLED(code) code +#endif + #define DC_LOGGER_INIT(logger) enum dcn32_clk_src_array_id { @@ -1684,7 +1689,8 @@ static void dcn32_enable_phantom_plane(struct dc *dc, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) phantom_plane = prev_phantom_plane; else - phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + DC_RUN_WITH_PREEMPTION_ENABLED(phantom_plane = + dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state)); if (!phantom_plane) continue; -- cgit v1.2.3 From 6bbede02dc62a1021aeeae87ab243bd7a93c61d2 Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Thu, 7 May 2026 20:56:15 +0800 Subject: drm/amd/ras: Fix CPER ring debugfs read overflow The legacy CPER debugfs reader can reach the payload path without a valid pointer snapshot. The remaining user byte count is also treated as the ring occupancy in dwords, so reads past the header can copy more than requested. Take the CPER lock before sampling pointers. Resample rptr/wptr for payload reads, bound the payload copy by available dwords and the remaining user size, and advance the file position for each dword copied. Signed-off-by: Xiang Liu Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher (cherry picked from commit 1e40ef87ffdc291e05ccdade8b9170cc9c1c4249) --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 66e8a2f7afcf..d6bee5c30073 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - uint32_t value, result, early[3]; + u32 value, result, early[3] = { 0 }; uint64_t p; + u32 avail_dw, start_dw, read_dw; loff_t i; int r; @@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; - if (*pos < 12) { - if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) - mutex_lock(&ring->adev->cper.ring_lock); + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + mutex_lock(&ring->adev->cper.ring_lock); + if (*pos < 12) { early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; @@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, *pos += 4; } } else { + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; + early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; + p = early[0]; if (early[0] <= early[1]) - size = (early[1] - early[0]); + avail_dw = early[1] - early[0]; else - size = ring->ring_size - (early[0] - early[1]); + avail_dw = ring->buf_mask + 1 - (early[0] - early[1]); - while (size) { + start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0; + if (start_dw >= avail_dw) + goto out; + + p = (p + start_dw) & ring->ptr_mask; + avail_dw -= start_dw; + read_dw = min_t(u32, avail_dw, size >> 2); + + while (read_dw) { if (p == early[1]) goto out; @@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, buf += 4; result += 4; - size--; + read_dw--; p++; p &= ring->ptr_mask; + *pos += 4; } } -- cgit v1.2.3 From 5d08559c910cc37673b965a0d4e8d004444d0332 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 3 Apr 2026 15:58:31 +0800 Subject: drm/amdgpu/gfx_v12_0: set gfx.rs64_enable from PFP header on GFX12 gfx_v12_0_init_microcode() always loads RS64 CP ucode but never set adev->gfx.rs64_enable, so it stayed false and code that branches on it (e.g. MEC pipe reset) used the legacy CP_MEC_CNTL path incorrectly. Match GFX11: derive RS64 mode from the PFP firmware header (v2.0) via amdgpu_ucode_hdr_version(). Log at debug when RS64 is enabled. Reviewed-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit b03d53598b0d2048e8fa7303b8d0784768ec4fa6) --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0e0b1e5b88fc..c35372e21261 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -602,6 +602,13 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; + + adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( + (union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data, 2, 0); + if (adev->gfx.rs64_enable) + dev_dbg(adev->dev, "CP RS64 enable\n"); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); -- cgit v1.2.3 From 9a415cc53711f2238e0f0ca8a6bcc796c003b127 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 May 2026 12:05:48 -1000 Subject: sched_ext: Avoid UAF in scx_root_enable_workfn() init failure path In scx_root_enable_workfn(), put_task_struct(p) is called before scx_error() dereferences p->comm and p->pid. If the iterator's reference is the last drop, the task is freed synchronously and the deref becomes a UAF. Move put_task_struct() past scx_error(). Reported-by: Sashiko Closes: https://lore.kernel.org/all/20260511214031.AF5E9C2BCB0@smtp.kernel.org/ Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1efd5d82b08b..9354da79e162 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6973,10 +6973,10 @@ static void scx_root_enable_workfn(struct kthread_work *work) if (scx_get_task_state(p) != SCX_TASK_DEAD) scx_set_task_state(p, SCX_TASK_NONE); task_rq_unlock(rq, p, &rf); - put_task_struct(p); scx_task_iter_stop(&sti); scx_error(sch, "ops.init_task() failed (%d) for %s[%d]", ret, p->comm, p->pid); + put_task_struct(p); goto err_disable_unlock_all; } -- cgit v1.2.3 From 3a8389d42bdf4213730f4067f8bfa78bae6564ef Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 29 Apr 2026 22:58:15 +0200 Subject: zonefs: handle integer overflow in zonefs_fname_to_fno In zonefs the file name in one of the two directories corresponds to the zone number. Here Alexey reported a possible integer overflow in zonefs_fname_to_fno(), where the parsing of the zone number from the file name can overflow the 'long' data type. Add a check for integer overflows and if the fno 'long' did overflow return -ENOENT. Reported-by: Alexey Dobriyan Fixes: d207794ababe ("zonefs: Dynamically create file inodes when needed") Signed-off-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 9b646cb5335d..ff43d6d1ea30 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -610,10 +610,14 @@ static long zonefs_fname_to_fno(const struct qstr *fname) return c - '0'; for (i = 0, rname = name + len - 1; i < len; i++, rname--) { + long digit; + c = *rname; if (!isdigit(c)) return -ENOENT; - fno += (c - '0') * shift; + digit = (c - '0') * shift; + if (check_add_overflow(fno, digit, &fno)) + return -ENOENT; shift *= 10; } -- cgit v1.2.3 From 83dd9effefa2e9da58ccd37059a11820fc05caf4 Mon Sep 17 00:00:00 2001 From: Florian Eckert Date: Fri, 17 Apr 2026 10:35:45 +0200 Subject: MAINTAINERS: Remove Chuanhua Lei as PCIe intel-gw maintainer Chuanhua Lei's email address has been bouncing for months. Remove the entry and mark the PCI intel-gw driver as orphaned. Signed-off-by: Florian Eckert Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260417-pcie-intel-gw-v5-1-0a2b933fe04f@dev.tdt.de --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..ab2f91f62c54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20693,9 +20693,8 @@ F: Documentation/devicetree/bindings/pci/intel,keembay-pcie* F: drivers/pci/controller/dwc/pcie-keembay.c PCIE DRIVER FOR INTEL LGM GW SOC -M: Chuanhua Lei L: linux-pci@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml F: drivers/pci/controller/dwc/pcie-intel-gw.c -- cgit v1.2.3 From e174929793195e0cd6a4adb0cad731b39f9019b4 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Tue, 5 May 2026 16:43:36 -0700 Subject: net/rds: reset op_nents when zerocopy page pin fails When iov_iter_get_pages2() fails in rds_message_zcopy_from_user(), the pinned pages are released with put_page(), and rm->data.op_mmp_znotifier is cleared. But we fail to properly clear rm->data.op_nents. Later when rds_message_purge() is called from rds_sendmsg() the cleanup loop iterates over the incorrectly non zero number of op_nents and frees them again. Fix this by properly resetting op_nents when it should be in rds_message_zcopy_from_user(). Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.") Signed-off-by: Allison Henderson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260505234336.2132721-1-achender@kernel.org Signed-off-by: Jakub Kicinski --- net/rds/message.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/message.c b/net/rds/message.c index 25fedcb3cd00..7feb0eb6537d 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -448,6 +448,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * for (i = 0; i < rm->data.op_nents; i++) put_page(sg_page(&rm->data.op_sg[i])); + rm->data.op_nents = 0; mmp = &rm->data.op_mmp_znotifier->z_mmp; mm_unaccount_pinned_pages(mmp); ret = -EFAULT; -- cgit v1.2.3 From 24a08d7d6218d60c033015cf4870b6096446e734 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Thu, 7 May 2026 00:35:15 +0000 Subject: net: ena: PHC: Check return code before setting timestamp output ena_phc_gettimex64() is setting the output parameter regardless of whether ena_com_phc_get_timestamp() succeeded or failed. When ena_com_phc_get_timestamp() returns an error, the timestamp parameter may contain uninitialized stack memory (e.g., when PHC is disabled or in blocked state) or invalid hardware values. Passing these to userspace via the PTP ioctl is both a security issue (information leak) and a correctness bug. Fix by checking the return code after releasing the lock and only setting the output timestamp on success. Fixes: e0ea34158ee8 ("net: ena: Add PHC support in the ENA driver") Cc: stable@vger.kernel.org Signed-off-by: Arthur Kiyanovski Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260507003518.22554-1-akiyano@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_phc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_phc.c b/drivers/net/ethernet/amazon/ena/ena_phc.c index 7867e893fd15..c2a3ff1ef645 100644 --- a/drivers/net/ethernet/amazon/ena/ena_phc.c +++ b/drivers/net/ethernet/amazon/ena/ena_phc.c @@ -46,9 +46,12 @@ static int ena_phc_gettimex64(struct ptp_clock_info *clock_info, spin_unlock_irqrestore(&phc_info->lock, flags); + if (rc) + return rc; + *ts = ns_to_timespec64(timestamp_nsec); - return rc; + return 0; } static int ena_phc_settime64(struct ptp_clock_info *clock_info, -- cgit v1.2.3 From 03cb001ef87b3f8d859cf7f96329acf3d6235d29 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 8 May 2026 12:08:46 +0000 Subject: tcp: Fix out-of-bounds access for twsk in tcp_ao_established_key(). lockdep_sock_is_held() was added in tcp_ao_established_key() by the cited commit. It can be called from tcp_v[46]_timewait_ack() with twsk. Since it does not have sk->sk_lock, the lockdep annotation results in out-of-bound access. $ pahole -C tcp_timewait_sock vmlinux | grep size /* size: 288, cachelines: 5, members: 8 */ $ pahole -C sock vmlinux | grep sk_lock socket_lock_t sk_lock; /* 440 192 */ Let's not use lockdep_sock_is_held() for TCP_TIME_WAIT. Fixes: 6b2d11e2d8fc ("net/tcp: Add missing lockdep annotations for TCP-AO hlist traversals") Reported-by: Damiano Melotti Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260508120853.4098365-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ao.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index a97cdf3e6af4..0a4b38b315fe 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -116,7 +116,8 @@ struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, { struct tcp_ao_key *key; - hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { + hlist_for_each_entry_rcu(key, &ao->head, node, + sk_fullsock(sk) && lockdep_sock_is_held(sk)) { if ((sndid >= 0 && key->sndid != sndid) || (rcvid >= 0 && key->rcvid != rcvid)) continue; -- cgit v1.2.3 From f8e64e956a635b054227f56e9586a1997add0646 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 8 May 2026 19:05:10 +0200 Subject: net/sched: dualpi2: initialize timer earlier in dualpi2_init() 'pi2_timer' needs to be initialized in all error paths of dualpi2_init(): otherwise, a failure in qdisc_create_dflt() causes the following crash in dualpi2_destroy(): # tc qdisc add dev crash0 handle 1: root dualpi2 BUG: kernel NULL pointer dereference, address: 0000000000000010 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 4 UID: 0 PID: 471 Comm: tc Tainted: G E 7.1.0-rc1-virtme #2 PREEMPT(full) Tainted: [E]=UNSIGNED_MODULE Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 RIP: 0010:hrtimer_active+0x39/0x60 Code: f9 eb 23 0f b6 41 38 3c 01 0f 87 87 64 c0 ff 83 e0 01 75 33 48 39 4a 50 74 28 44 3b 42 10 75 06 48 3b 51 30 74 21 48 8b 51 30 <44> 8b 42 10 41 f6 c0 01 74 cf f3 90 44 8b 42 10 41 f6 c0 01 74 c3 RSP: 0018:ffffd0db80b93620 EFLAGS: 00010282 RAX: ffffffffc0400320 RBX: ffff8cf24a4c86b8 RCX: ffff8cf24a4c86b8 RDX: 0000000000000000 RSI: ffff8cf2429c2ab0 RDI: ffff8cf24a4c86b8 RBP: 00000000fffffff4 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000000001 R11: ffff8cf24a39c500 R12: ffff8cf24822c000 R13: ffffd0db80b936c0 R14: ffffffffc02cf360 R15: 00000000ffffffff FS: 00007fbc01706580(0000) GS:ffff8cf2dc759000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000008e02003 CR4: 0000000000172ef0 Call Trace: hrtimer_cancel+0x15/0x40 dualpi2_destroy+0x20/0x40 [sch_dualpi2] qdisc_create+0x230/0x570 tc_modify_qdisc+0x716/0xc10 rtnetlink_rcv_msg+0x188/0x780 netlink_rcv_skb+0xcd/0x150 netlink_unicast+0x1ba/0x290 netlink_sendmsg+0x242/0x4d0 ____sys_sendmsg+0x39e/0x3e0 ___sys_sendmsg+0xe1/0x130 __sys_sendmsg+0xad/0x110 do_syscall_64+0x14f/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbc0188b08e Code: 4d 89 d8 e8 94 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 c3 83 e2 39 83 fa 08 75 e7 e8 03 ff ff ff 0f 1f 00 f3 0f 1e fa RSP: 002b:00007fff593260e0 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fbc0188b08e RDX: 0000000000000000 RSI: 00007fff59326190 RDI: 0000000000000003 RBP: 00007fff593260f0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 000055f06124f260 R13: 0000000069fca043 R14: 000055f061255640 R15: 000055f06124d3f8 Modules linked in: sch_dualpi2(E) CR2: 0000000000000010 [1] https://lore.kernel.org/netdev/2e78e01c504c633ebdff18d041833cf2e079a3a4.1607020450.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/20200725201707.16909-1-xiyou.wangcong@gmail.com/ v2: - rebased on top of latest net.git Fixes: 320d031ad6e4 ("sched: Struct definition and parsing of dualpi2 qdisc") Signed-off-by: Davide Caratti Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/1faca91179702b31da5d87653e1e036543e32722.1778259798.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_dualpi2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 241e6a46bd00..a22489c14458 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -938,6 +938,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, int err; sch->flags |= TCQ_F_DEQUEUE_DROPS; + hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED_SOFT); q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1), extack); @@ -950,8 +952,6 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, q->sch = sch; dualpi2_reset_default(sch); - hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED_SOFT); if (opt && nla_len(opt)) { err = dualpi2_change(sch, opt, extack); -- cgit v1.2.3 From be48e5fe51a5864566307998286a699d6b986934 Mon Sep 17 00:00:00 2001 From: Evgenii Burenchev Date: Thu, 7 May 2026 17:55:17 +0300 Subject: qed: fix division by zero in qed_init_wfq_param when all vports are configured In qed_init_wfq_param(), variable non_requested_count can become zero when the number of vports with the configured flag set (including the current vport being configured) equals total num_vports. This happens when configuring the last unconfigured vport or when re-configuring an already configured vport. The function then calculates left_rate_per_vp = total_left_rate / non_requested_count, which causes division by zero. Fix this by skipping the division when non_requested_count is zero. In that case, there is no remaining bandwidth to distribute, so just record the configuration for the current vport and return success. Fixes: bcd197c81f63 ("qed: Add vport WFQ configuration APIs") Signed-off-by: Evgenii Burenchev Link: https://patch.msgid.link/20260507145520.23106-1-evg28bur@yandex.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 42c6dcfb1f0f..dd75c47758e1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -5103,6 +5103,13 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn, return -EINVAL; } + /* All vports are already or become configured, nothing to distribute */ + if (non_requested_count == 0) { + p_hwfn->qm_info.wfq_data[vport_id].min_speed = req_rate; + p_hwfn->qm_info.wfq_data[vport_id].configured = true; + return 0; + } + total_left_rate = min_pf_rate - total_req_min_rate; left_rate_per_vp = total_left_rate / non_requested_count; -- cgit v1.2.3 From 2c7b1227e582e88db7917412dca4e752c1aff691 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 11 May 2026 10:36:36 -0500 Subject: ASoC: SOF: amd: Fix error code handling in psp_send_cmd() The smn_read_register() helper returns negative error codes on failure or the register value on success. When used with read_poll_timeout(), the return value is stored in the 'data' variable. Currently 'data' is declared as u32, which causes negative error codes to be cast to large positive values. This makes the condition 'data > 0' incorrectly treat errors as success. Fix by changing 'data' from u32 to int, matching the pattern used in psp_mbox_ready() which correctly handles the same helper function. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-sound/agGES8vWrLOrBu28@stanley.mountain/ Fixes: f120cf33d232 ("ASoC: SOF: amd: Use AMD_NODE") Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20260511153638.724810-1-mario.limonciello@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 71a18f156de2..f615b8d1c802 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -223,7 +223,7 @@ static int psp_send_cmd(struct acp_dev_data *adata, int cmd) { struct snd_sof_dev *sdev = adata->dev; int ret; - u32 data; + int data; if (!cmd) return -EINVAL; -- cgit v1.2.3 From 8b7c5cc7f6e655087164eb902131de356f6d5984 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 11 May 2026 12:42:39 +0100 Subject: ASoC: cs35l56: Check for successful runtime-resume in cs35l56_dsp_work() In cs35l56_dsp_work() check that the request for runtime-resume was successful instead of assuming that it can't fail. We may as well do this using the new PM_RUNTIME_ACQUIRE*() macros and remove the manual pm_runtime_put_autosuspend() and associated gotos. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260511114239.44970-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 849d70ca23d6..4fbbdcc87151 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -867,11 +867,16 @@ static void cs35l56_dsp_work(struct work_struct *work) if (!cs35l56->base.init_done) return; - pm_runtime_get_sync(cs35l56->base.dev); + PM_RUNTIME_ACQUIRE(cs35l56->base.dev, pm); + ret = PM_RUNTIME_ACQUIRE_ERR(&pm); + if (ret) { + dev_err(cs35l56->base.dev, "dsp_work failed to runtime-resume: %d\n", ret); + return; + } ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &firmware_version); if (ret) - goto err; + return; /* Populate fw file qualifier with the revision and security state */ kfree(cs35l56->dsp.fwf_name); @@ -887,7 +892,7 @@ static void cs35l56_dsp_work(struct work_struct *work) } if (!cs35l56->dsp.fwf_name) - goto err; + return; dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n", cs35l56->dsp.fwf_name, cs35l56->dsp.system_name); @@ -905,8 +910,6 @@ static void cs35l56_dsp_work(struct work_struct *work) cs35l56_patch(cs35l56, firmware_missing); cs35l56_log_tuning(&cs35l56->base, &cs35l56->dsp.cs_dsp); -err: - pm_runtime_put_autosuspend(cs35l56->base.dev); } static struct snd_soc_dapm_context *cs35l56_power_up_for_cal(struct cs35l56_private *cs35l56) -- cgit v1.2.3 From 53597deca0e38c30e6cd4ba2114fa42d2bcd85bb Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Thu, 7 May 2026 18:06:03 +0800 Subject: drm/bridge: imx8qxp-pxl2dpi: avoid ERR_PTR with device_node cleanup imx8qxp_pxl2dpi_get_available_ep_from_port() returns ERR_PTR() on errors. imx8qxp_pxl2dpi_find_next_bridge() stores its return value in a __free(device_node) variable before checking IS_ERR(). When the function returns on the error path, the cleanup action calls of_node_put() on the ERR_PTR() value. Do not let a device_node cleanup variable hold error pointers. Change imx8qxp_pxl2dpi_get_available_ep_from_port() to return an int and pass the endpoint node through an output argument. Initialize the output argument to NULL so callers hold either NULL on error paths or a valid device_node pointer on successful path. Fixes: ceea3f7806a10 ("drm/bridge: imx8qxp-pxl2dpi: simplify put of device_node pointers") Cc: stable@vger.kernel.org Reviewed-by: Liu Ying Signed-off-by: Guangshuo Li Link: https://patch.msgid.link/20260507100604.667731-1-lgs201920130244@gmail.com Signed-off-by: Liu Ying --- drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c | 40 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c index 441fd32dc91c..d64e328bf542 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c @@ -222,52 +222,58 @@ static const struct drm_bridge_funcs imx8qxp_pxl2dpi_bridge_funcs = { imx8qxp_pxl2dpi_bridge_atomic_get_output_bus_fmts, }; -static struct device_node * +static int imx8qxp_pxl2dpi_get_available_ep_from_port(struct imx8qxp_pxl2dpi *p2d, - u32 port_id) + u32 port_id, + struct device_node **ep) { - struct device_node *port, *ep; + struct device_node *port; + int ret = 0; int ep_cnt; + *ep = NULL; + port = of_graph_get_port_by_id(p2d->dev->of_node, port_id); if (!port) { DRM_DEV_ERROR(p2d->dev, "failed to get port@%u\n", port_id); - return ERR_PTR(-ENODEV); + return -ENODEV; } ep_cnt = of_get_available_child_count(port); if (ep_cnt == 0) { DRM_DEV_ERROR(p2d->dev, "no available endpoints of port@%u\n", port_id); - ep = ERR_PTR(-ENODEV); + ret = -ENODEV; goto out; } else if (ep_cnt > 1) { DRM_DEV_ERROR(p2d->dev, "invalid available endpoints of port@%u\n", port_id); - ep = ERR_PTR(-EINVAL); + ret = -EINVAL; goto out; } - ep = of_get_next_available_child(port, NULL); - if (!ep) { + *ep = of_get_next_available_child(port, NULL); + if (!*ep) { DRM_DEV_ERROR(p2d->dev, "failed to get available endpoint of port@%u\n", port_id); - ep = ERR_PTR(-ENODEV); + ret = -ENODEV; goto out; } out: of_node_put(port); - return ep; + return ret; } static int imx8qxp_pxl2dpi_find_next_bridge(struct imx8qxp_pxl2dpi *p2d) { - struct device_node *ep __free(device_node) = - imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 1); - if (IS_ERR(ep)) - return PTR_ERR(ep); + struct device_node *ep __free(device_node) = NULL; + int ret; + + ret = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 1, &ep); + if (ret) + return ret; struct device_node *remote __free(device_node) = of_graph_get_remote_port_parent(ep); if (!remote || !of_device_is_available(remote)) { @@ -291,9 +297,9 @@ static int imx8qxp_pxl2dpi_set_pixel_link_sel(struct imx8qxp_pxl2dpi *p2d) struct of_endpoint endpoint; int ret; - ep = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 0); - if (IS_ERR(ep)) - return PTR_ERR(ep); + ret = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 0, &ep); + if (ret) + return ret; ret = of_graph_parse_endpoint(ep, &endpoint); if (ret) { -- cgit v1.2.3 From c21b90f77687075115d989e53a8ec5e2bb427ab1 Mon Sep 17 00:00:00 2001 From: Prathyushi Nangia Date: Tue, 9 Dec 2025 10:01:33 -0600 Subject: x86/CPU/AMD: Prevent improper isolation of shared resources in Zen2's op cache Make sure resources are not improperly shared in the op cache and cause instruction corruption this way. Signed-off-by: Prathyushi Nangia Co-developed-by: Borislav Petkov (AMD) Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/include/asm/msr-index.h | 3 ++- arch/x86/kernel/cpu/amd.c | 3 +++ tools/arch/x86/include/asm/msr-index.h | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a14a0f43e04a..86554de9a3f5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -803,9 +803,10 @@ #define MSR_AMD64_LBR_SELECT 0xc000010e /* Zen4 */ -#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33 /* Fam 19h MSRs */ #define MSR_F19H_UMC_PERF_CTL 0xc0010800 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d9ae6ab1701..2f8e8ff2d000 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -989,6 +989,9 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) /* Correct misconfigured CPUID on some clients. */ clear_cpu_cap(c, X86_FEATURE_INVLPGB); + + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN2_BP_CFG_BUG_FIX_BIT); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 6673601246b3..eff29645719b 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -793,9 +793,10 @@ #define MSR_AMD64_LBR_SELECT 0xc000010e /* Zen4 */ -#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG 0xc001102e #define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 +#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33 /* Fam 19h MSRs */ #define MSR_F19H_UMC_PERF_CTL 0xc0010800 -- cgit v1.2.3 From 8d57bb61734b23f6342e9de781173f1d83f90d3a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 5 May 2026 20:47:56 +0200 Subject: powerpc/g5: Enable all windfarms by default The G5 defconfig is clearly intended for the G5 Powermac series, and that should enable all the available windfarm drivers, or the machine will overheat a short while after booting and shut itself down, which is annoying. Signed-off-by: Linus Walleij Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260505-powermac-g5-config-v3-1-7747bf72f874@kernel.org --- arch/powerpc/configs/g5_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index e9996711b362..5ca1676e6058 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -85,6 +85,8 @@ CONFIG_PMAC_SMU=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_WINDFARM=y CONFIG_WINDFARM_PM81=y +CONFIG_WINDFARM_PM72=y +CONFIG_WINDFARM_RM31=y CONFIG_WINDFARM_PM91=y CONFIG_WINDFARM_PM112=y CONFIG_WINDFARM_PM121=y -- cgit v1.2.3 From acd1e47db03d4b528fd5efb8565dd0de1c79f62a Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Sun, 16 Nov 2025 19:55:44 +0530 Subject: powerpc: 82xx: fix uninitialized pointers with free attribute Uninitialized pointers with `__free` attribute can cause undefined behavior as the memory allocated to the pointer is freed automatically when the pointer goes out of scope. powerpc/km82xx doesn't have any bugs related to this as of now, but, it is better to initialize and assign pointers with `__free` attribute in one statement to ensure proper scope-based cleanup Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aPiG_F5EBQUjZqsl@stanley.mountain/ Signed-off-by: Ally Heev Fixes: 4aa5cc1e0012 ("powerpc-km82xx.c: replace of_node_put() with __free") Reviewed-by: Christophe Leroy (CS GROUP) Reviewed-by: Krzysztof Kozlowski Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251116-aheev-uninitialized-free-attr-km82xx-v2-1-4307e2b5300d@gmail.com --- arch/powerpc/platforms/82xx/km82xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 99f0f0f41876..4ad223525e89 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -27,8 +27,8 @@ static void __init km82xx_pic_init(void) { - struct device_node *np __free(device_node); - np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic"); + struct device_node *np __free(device_node) = of_find_compatible_node(NULL, + NULL, "fsl,pq2-pic"); if (!np) { pr_err("PIC init: can not find cpm-pic node\n"); -- cgit v1.2.3 From 108d7f951271cbd36ca36efc5e5d106966f5180c Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Sun, 16 Nov 2025 10:44:11 +0800 Subject: powerpc/warp: Fix error handling in pika_dtm_thread pika_dtm_thread() acquires client through of_find_i2c_device_by_node() but fails to release it in error handling path. This could result in a reference count leak, preventing proper cleanup and potentially leading to resource exhaustion. Add put_device() to release the reference in the error handling path. Found by code review. Cc: stable@vger.kernel.org Fixes: 3984114f0562 ("powerpc/warp: Platform fix for i2c change") Signed-off-by: Ma Ke Reviewed-by: Christophe Leroy Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251116024411.21968-1-make24@iscas.ac.cn --- arch/powerpc/platforms/44x/warp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index a5001d32f978..6f674f86dc85 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -293,6 +293,8 @@ static int pika_dtm_thread(void __iomem *fpga) schedule_timeout(HZ); } + put_device(&client->dev); + return 0; } -- cgit v1.2.3 From f98020c22ad9be07d6feefd0496e70f9f36a2f63 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 16 Mar 2026 10:47:42 -0700 Subject: powerpc/powermac: Remove pmac_low_i2c_{lock,unlock}() Commit a28d3af2a26c ("[PATCH] 2/5 powerpc: Rework PowerMac i2c part 2") removed the last calls to the pmac_low_i2c_{lock,unlock}() functions. Hence, remove these two functions. Reviewed-by: Christophe Leroy (CS GROUP) Signed-off-by: Bart Van Assche Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260316174747.3871924-1-bvanassche@acm.org --- arch/powerpc/include/asm/pmac_low_i2c.h | 4 ---- arch/powerpc/platforms/powermac/low_i2c.c | 34 ------------------------------- 2 files changed, 38 deletions(-) diff --git a/arch/powerpc/include/asm/pmac_low_i2c.h b/arch/powerpc/include/asm/pmac_low_i2c.h index 21bd7297c87f..fead8fae08ab 100644 --- a/arch/powerpc/include/asm/pmac_low_i2c.h +++ b/arch/powerpc/include/asm/pmac_low_i2c.h @@ -79,10 +79,6 @@ extern int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter); -/* (legacy) Locking functions exposed to i2c-keywest */ -extern int pmac_low_i2c_lock(struct device_node *np); -extern int pmac_low_i2c_unlock(struct device_node *np); - /* Access functions for platform code */ extern int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled); extern void pmac_i2c_close(struct pmac_i2c_bus *bus); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 73b7f4e8c047..da72a30ab865 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -1058,40 +1058,6 @@ int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter) } EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter); -int pmac_low_i2c_lock(struct device_node *np) -{ - struct pmac_i2c_bus *bus, *found = NULL; - - list_for_each_entry(bus, &pmac_i2c_busses, link) { - if (np == bus->controller) { - found = bus; - break; - } - } - if (!found) - return -ENODEV; - return pmac_i2c_open(bus, 0); -} -EXPORT_SYMBOL_GPL(pmac_low_i2c_lock); - -int pmac_low_i2c_unlock(struct device_node *np) -{ - struct pmac_i2c_bus *bus, *found = NULL; - - list_for_each_entry(bus, &pmac_i2c_busses, link) { - if (np == bus->controller) { - found = bus; - break; - } - } - if (!found) - return -ENODEV; - pmac_i2c_close(bus); - return 0; -} -EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock); - - int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled) { int rc; -- cgit v1.2.3 From aef656a0e6c01796190bb5bd2bdba1c644ed7811 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Sun, 5 Apr 2026 17:15:45 +0100 Subject: powerpc: fix dead default for GUEST_STATE_BUFFER_TEST The GUEST_STATE_BUFFER_TEST config option should default to KUNIT_ALL_TESTS so that if all tests are enabled then it is included, but currently the 'default KUNIT_ALL_TESTS' statement is shadowed by 'def_tristate n', meaning that this second default statement is currently dead code. It looks to me like the commit 6ccbbc33f06a ("KVM: PPC: Add helper library for Guest State Buffers") intended to set the default to KUNIT_ALL_TESTS, but mistakenly missed the def_tristate. This dead code was found by kconfirm, a static analysis tool for Kconfig. Fixes: 6ccbbc33f06a ("KVM: PPC: Add helper library for Guest State Buffers") Signed-off-by: Julian Braha Tested-by: Gautam Menghani Reviewed-by: Amit Machhiwal Reviewed-by: Harsh Prateek Bora Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260405161545.161006-1-julianbraha@gmail.com --- arch/powerpc/Kconfig.debug | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index f15e5920080b..e8718bc13eeb 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -83,11 +83,10 @@ config MSI_BITMAP_SELFTEST depends on DEBUG_KERNEL config GUEST_STATE_BUFFER_TEST - def_tristate n + def_tristate KUNIT_ALL_TESTS prompt "Enable Guest State Buffer unit tests" depends on KUNIT depends on KVM_BOOK3S_HV_POSSIBLE - default KUNIT_ALL_TESTS help The Guest State Buffer is a data format specified in the PAPR. It is by hcalls to communicate the state of L2 guests between -- cgit v1.2.3 From dbc30a57bd8e026995e9fa8e8c31cffd18542c01 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Fri, 8 May 2026 09:42:56 +0530 Subject: powerpc/hv-gpci: fix preempt count leak in sysfs show paths Four sysfs show() callbacks in hv-gpci take get_cpu_var(hv_gpci_reqb) (which calls preempt_disable()) but only call the matching put_cpu_var() on the error path under the 'out:' label. Every successful read leaks one preempt_disable(): processor_bus_topology_show() processor_config_show() affinity_domain_via_virtual_processor_show() affinity_domain_via_domain_show() (affinity_domain_via_partition_show() was already correct.) On a CONFIG_PREEMPT=y kernel, repeated reads raise preempt_count and eventually return to userspace with preemption still disabled. The next user-mode page fault then hits faulthandler_disabled() == 1, gets forced to SIGSEGV, and the resulting coredump trips 'BUG: scheduling while atomic' in call_usermodehelper_exec -> wait_for_completion_state -> schedule: BUG: scheduling while atomic: //0x00000004 ... __schedule_bug+0x6c/0x90 __schedule+0x58c/0x13a0 schedule+0x48/0x1a0 schedule_timeout+0x104/0x170 wait_for_completion_state+0x16c/0x330 call_usermodehelper_exec+0x254/0x2d0 vfs_coredump+0x1050/0x2590 get_signal+0xb9c/0xc80 do_notify_resume+0xf8/0x470 Add an out_success label that calls put_cpu_var() before returning the byte count, mirroring affinity_domain_via_partition_show(). Fixes: 71f1c39647d8 ("powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show processor bus topology information") Fixes: 1a160c2a13c6 ("powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show processor config information") Fixes: 71a7ccb478fc ("powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via virtual processor information") Fixes: a69a57cac1ec ("powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via domain information") Signed-off-by: Aboorva Devarajan Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260508041256.3447113-1-aboorvad@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 5cac2cf3bd1e..10c82cf8f5b3 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -210,7 +210,7 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att 0, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; @@ -244,12 +244,14 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att starting_index, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; } +out_success: + put_cpu_var(hv_gpci_reqb); return n; out: @@ -278,7 +280,7 @@ static ssize_t processor_config_show(struct device *dev, struct device_attribute 0, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; @@ -312,12 +314,14 @@ static ssize_t processor_config_show(struct device *dev, struct device_attribute starting_index, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; } +out_success: + put_cpu_var(hv_gpci_reqb); return n; out: @@ -346,7 +350,7 @@ static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, 0, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; @@ -382,12 +386,14 @@ static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, starting_index, secondary_index, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; } +out_success: + put_cpu_var(hv_gpci_reqb); return n; out: @@ -416,7 +422,7 @@ static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device 0, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; @@ -448,12 +454,14 @@ static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device starting_index, 0, buf, &n, arg); if (!ret) - return n; + goto out_success; if (ret != H_PARAMETER) goto out; } +out_success: + put_cpu_var(hv_gpci_reqb); return n; out: -- cgit v1.2.3 From 1e9fab756f8395096d5bba7be0c373c4c8f5d165 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 May 2026 19:08:37 +0200 Subject: batman-adv: tt: reject oversized local TVLV buffers The commit 3a359bf5c61d ("batman-adv: reject oversized global TT response buffers") added a check to ensure that a global return buffer size can be stored in an u16. The same buffer handling also exists for the local data buffer but was not touched. A similar check should be also be in place for the local TVLV buffer. It doesn't have the similar attack surface because it is only generated from locally discovered MAC addresses but the dynamic nature could still cause temporarily to large buffers. Cc: stable@kernel.org Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann --- net/batman-adv/translation-table.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 05cddcf994f6..06548dae1039 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -877,12 +877,12 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_meshif_vlan *vlan; + size_t change_offset; u16 num_vlan = 0; u16 vlan_entries = 0; u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; - int change_offset; spin_lock_bh(&bat_priv->meshif_vlan_list_lock); hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { @@ -900,8 +900,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, if (*tt_len < 0) *tt_len = batadv_tt_len(total_entries); - tvlv_len = *tt_len; - tvlv_len += change_offset; + if (check_add_overflow(*tt_len, change_offset, &tvlv_len)) { + tvlv_len = 0; + goto out; + } *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); if (!*tt_data) { -- cgit v1.2.3 From b64963a2ceeb7529310b6cf253a1e540784422f4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 May 2026 19:53:21 +0200 Subject: batman-adv: tt: fix negative tt_buff_len batadv_orig_node::tt_buff_len was declared as s16, but the field is never intended to hold a negative value. When a value greater than 32767 is assigned, it wraps to a negative signed integer. In batadv_send_other_tt_response(), tt_buff_len is temporarily widened to s32. The incorrectly negative s16 value propagates into the s32, causing batadv_tt_prepare_tvlv_global_data() to allocate a full sized buffer but populates only a small portion of it with the collected changeset. All remaining bits are kept uninitialized. Using an u16 avoids this type confusion and ensures that no (negative) sign extension is performed in batadv_send_other_tt_response(). Cc: stable@kernel.org Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Sven Eckelmann --- net/batman-adv/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index b9c0b7779122..888f337a194b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -452,7 +452,7 @@ struct batadv_orig_node { * @tt_buff_len: length of the last tt changeset this node received * from the orig node */ - s16 tt_buff_len; + u16 tt_buff_len; /** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */ spinlock_t tt_buff_lock; -- cgit v1.2.3 From fc92cdfcb295cefa4344d71a527d61b638b7bfc4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 May 2026 19:53:21 +0200 Subject: batman-adv: tt: fix negative last_changeset_len batadv_piv_tt::last_changeset_len len was declared as s16, but the field is never intended to hold a negative value. When a value greater than 32767 is assigned, it wraps to a negative signed integer. In batadv_send_my_tt_response(), last_changeset_len is temporarily widened to s32. The incorrectly negative s16 value propagates into the s32, causing batadv_tt_prepare_tvlv_local_data() to allocate a full sized buffer but populates only a small portion of it with the collected changeset. All remaining bits are kept uninitialized. Using an u16 avoids this type confusion and ensures that no (negative) sign extension is performed in batadv_send_my_tt_response(). Cc: stable@kernel.org Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Sven Eckelmann --- net/batman-adv/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 888f337a194b..739439e2b235 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -993,7 +993,7 @@ struct batadv_priv_tt { * @last_changeset_len: length of last tt changeset this host has * generated */ - s16 last_changeset_len; + u16 last_changeset_len; /** * @last_changeset_lock: lock protecting last_changeset & -- cgit v1.2.3 From 94d27005016be15ffc638b2ecbc4d58805ad7b48 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 May 2026 19:47:11 +0200 Subject: batman-adv: tt: fix TOCTOU race for reported vlans The local TT based TVLV is generated by first checking the number of VLANs which have at least one TT entry. A new buffer with the correct size for the VLANs is then allocated. Only then, the list of VLANs s used to fill the VLAN entries in the buffer. During this time, the meshif_vlan_list_lock is held. But the actual number of TT entries of each VLAN can still increase during this time - just not the number of VLANs in the list. But the prefilter used in the buffer size calculation might still cause an increase of the number of VLANs which need to be stored. Simply because a VLAN might now suddenly have at least one entry when it had none in the pre-alloc check - and then needs to occupy space which was not allocated. It is better to overestimate the buffer size at the beginning and then fill the buffer only with the VLANs which are not empty. Cc: stable@kernel.org Fixes: 16116dac2339 ("batman-adv: prevent TT request storms by not sending inconsistent TT TLVLs") Signed-off-by: Sven Eckelmann --- net/batman-adv/translation-table.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 06548dae1039..f009cbf8a276 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -887,11 +887,8 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->meshif_vlan_list_lock); hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); - if (vlan_entries < 1) - continue; - - num_vlan++; total_entries += vlan_entries; + num_vlan++; } change_offset = struct_size(*tt_data, vlan_data, num_vlan); @@ -916,6 +913,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; + num_vlan = 0; hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) @@ -926,8 +924,15 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan->reserved = 0; tt_vlan++; + num_vlan++; } + /* recalculate in case number of VLANs reduced */ + change_offset = struct_size(*tt_data, vlan_data, num_vlan); + tvlv_len = *tt_len + change_offset; + + (*tt_data)->num_vlan = htons(num_vlan); + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; -- cgit v1.2.3 From fa1bd704940b5bcbc32c0b28db9167405c8ee5e0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 May 2026 20:47:34 +0200 Subject: batman-adv: tt: avoid empty VLAN responses The commit 16116dac2339 ("batman-adv: prevent TT request storms by not sending inconsistent TT TLVLs") added checks to the local (direct) TT response code. But the response can also be done indirectly by another node using the global TT state. To avoid such inconsistency states reported in the original fix, also avoid sending empty VLANs for replies from the global TT state. Cc: stable@kernel.org Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann --- net/batman-adv/translation-table.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index f009cbf8a276..2259b241e0b5 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -797,24 +797,26 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, s32 *tt_len) { u16 num_vlan = 0; - u16 num_entries = 0; u16 tvlv_len = 0; unsigned int change_offset; struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_orig_node_vlan *vlan; + u16 total_entries = 0; u8 *tt_change_ptr; + int vlan_entries; spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); } change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); if (change_offset > U16_MAX || *tt_len > U16_MAX - change_offset) { *tt_len = 0; @@ -835,14 +837,26 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; + num_vlan = 0; hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); tt_vlan->reserved = 0; tt_vlan++; + num_vlan++; } + /* recalculate in case number of VLANs reduced */ + change_offset = struct_size(*tt_data, vlan_data, num_vlan); + tvlv_len = *tt_len + change_offset; + + (*tt_data)->num_vlan = htons(num_vlan); + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; -- cgit v1.2.3 From 99d9958fa10fb684b2a8e2c48a8d704122721420 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 2 May 2026 21:25:19 +0200 Subject: batman-adv: tt: prevent TVLV entry number overflow The helpers to prepare the buffers for the local and global TT based replies are trying to sum up all TT entries which can be found for each VLAN. In theory, this sum can be too big for an u16 and therefore overflow. A too small buffer would then be allocated for the TVLV. The too small buffer will be handled gracefully by batadv_tt_tvlv_generate() and is not causing a buffer overflow - just a truncated reply. But this overflow shouldn't have happened in the first and the too small buffer should never have been allocated when an overflow was detected. Cc: stable@kernel.org Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann --- net/batman-adv/translation-table.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2259b241e0b5..9f6e67771ffa 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -804,11 +804,18 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, u16 total_entries = 0; u8 *tt_change_ptr; int vlan_entries; + u16 sum_entries; spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); - total_entries += vlan_entries; + + if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) { + *tt_len = 0; + goto out; + } + + total_entries = sum_entries; num_vlan++; } @@ -893,15 +900,22 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_meshif_vlan *vlan; size_t change_offset; u16 num_vlan = 0; - u16 vlan_entries = 0; u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; + int vlan_entries; + u16 sum_entries; spin_lock_bh(&bat_priv->meshif_vlan_list_lock); hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); - total_entries += vlan_entries; + + if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) { + tvlv_len = 0; + goto out; + } + + total_entries = sum_entries; num_vlan++; } -- cgit v1.2.3 From 4cfe4c0efbdcde742a47813180cc69b132d7598e Mon Sep 17 00:00:00 2001 From: Sebastian Brzezinka Date: Thu, 16 Apr 2026 13:31:18 +0200 Subject: drm/i915: skip __i915_request_skip() for already signaled requests After a GPU reset the HWSP is zeroed, so previously completed requests appear incomplete. If such a request is picked up during reset_rewind() and marked guilty, i915_request_set_error_once() returns early (fence already signaled), leaving fence.error without a fatal error code. The subsequent __i915_request_skip() then hits: ``` GEM_BUG_ON(!fatal_error(rq->fence.error)) ``` Fixes a kernel BUG observed on Sandy Bridge (Gen6) during heartbeat-triggered engine resets. ``` kernel BUG at drivers/gpu/drm/i915/i915_request.c:556! RIP: __i915_request_skip+0x15e/0x1d0 [i915] ... __i915_request_reset+0x212/0xa70 [i915] reset_rewind+0xe4/0x280 [i915] intel_gt_reset+0x30d/0x5b0 [i915] heartbeat+0x516/0x530 [i915] ``` Guard __i915_request_skip() with i915_request_signaled(), if the fence is already signaled, the ring content is committed and there is nothing left to skip. Fixes: 36e191f0644b ("drm/i915: Apply i915_request_skip() on submission") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/work_items/13729 Signed-off-by: Sebastian Brzezinka Cc: stable@vger.kernel.org # v5.7+ Reviewed-by: Krzysztof Karas Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/fe76921d35b6ae85aa651822726d0d9815aa5362.1776339012.git.sebastian.brzezinka@intel.com (cherry picked from commit 5ba54393dcd7adf75a9f39f5a933b1538349cad5) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_reset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 984d0056c01c..adff482a6c9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -132,7 +132,8 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_set_error_once(rq, -EIO); - __i915_request_skip(rq); + if (!i915_request_signaled(rq)) + __i915_request_skip(rq); banned = mark_guilty(rq); } else { i915_request_set_error_once(rq, -EAGAIN); -- cgit v1.2.3 From 1ae15b6c7965d137eef21f2cc7d367b29cb88369 Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Tue, 5 May 2026 14:39:20 +0530 Subject: drm/i915/dp: Fix VSC dynamic range signaling for RGB formats For RGB, set dynamic_range to CTA or VESA based on crtc_state->limited_color_range so sinks apply correct quantization. YCbCr remains limited (CTA) range. (DP v1.4, Table 5-1) v2: - Added Reported-by and Tested-by tags v3: - Add back YCbCr comment(Suraj) Cc: stable@vger.kernel.org #v5.8+ Reported-by: DeepChirp Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/work_items/15874 Tested-by: DeepChirp Fixes: 9799c4c3b76e ("drm/i915/dp: Add compute routine for DP VSC SDP") Assisted-by: GitHub-Copilot:GPT-5.4 Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Suraj Kandpal Signed-off-by: Suraj Kandpal Link: https://patch.msgid.link/20260505090920.2479112-1-chaitanya.kumar.borah@intel.com (cherry picked from commit 38e10ddae6f8d42a2e8437fcd25a1cac51106c64) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_dp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4955bd8b11d7..50d34b4fdf82 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3119,8 +3119,13 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc drm_WARN_ON(display->drm, vsc->bpc == 6 && vsc->pixelformat != DP_PIXELFORMAT_RGB); - /* all YCbCr are always limited range */ - vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA; + /* All YCbCr formats are always limited range. */ + if (vsc->pixelformat == DP_PIXELFORMAT_RGB) + vsc->dynamic_range = crtc_state->limited_color_range ? + DP_DYNAMIC_RANGE_CTA : DP_DYNAMIC_RANGE_VESA; + else + vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA; + vsc->content_type = DP_CONTENT_TYPE_NOT_DEFINED; } -- cgit v1.2.3 From 911f54771ca97947cfdca360e9e9b4147a330740 Mon Sep 17 00:00:00 2001 From: Quan Sun <2022090917019@std.uestc.edu.cn> Date: Fri, 8 May 2026 20:46:36 +0800 Subject: net: hsr: fix NULL pointer dereference in hsr_get_node_data() In the HSR (High-availability Seamless Redundancy) protocol, node information is maintained in the node_db. When a supervision frame is received, node->addr_B_port is updated to track the receiving port type (e.g., HSR_PT_SLAVE_B). If the underlying physical interface associated with this slave port is removed (e.g., via `ip link del`), hsr_del_port() frees the hsr_port object. However, the stale node->addr_B_port reference is kept in the node_db until the node ages out. Subsequently, if userspace queries the node status via the Netlink command HSR_C_GET_NODE_STATUS, the kernel calls hsr_get_node_data(). This function unconditionally dereferences the pointer returned by hsr_port_get_hsr(): if (node->addr_B_port != HSR_PT_NONE) { port = hsr_port_get_hsr(hsr, node->addr_B_port); *addr_b_ifindex = port->dev->ifindex; // <-- NULL deref } If the slave port has been deleted, hsr_port_get_hsr() returns NULL, resulting in a kernel panic. Oops: general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] RIP: 0010:hsr_get_node_data+0x7b6/0x9e0 Call Trace: hsr_get_node_status+0x445/0xa40 Fix this by adding a proper NULL pointer check. If the port lookup fails due to a stale port type, gracefully treat it as if no valid port exists and assign -1 to the interface index. Steps to reproduce: 1. Create an HSR interface with two slave devices. 2. Receive a supervision frame to populate node_db with addr_B_port assigned to SLAVE_B. 3. Delete the underlying slave device B. 4. Send an HSR_C_GET_NODE_STATUS Netlink message. Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Quan Sun <2022090917019@std.uestc.edu.cn> Link: https://patch.msgid.link/20260508124636.1462346-1-2022090917019@std.uestc.edu.cn Signed-off-by: Paolo Abeni --- net/hsr/hsr_framereg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index d09875b33588..124619920d38 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -889,7 +889,10 @@ int hsr_get_node_data(struct hsr_priv *hsr, if (node->addr_B_port != HSR_PT_NONE) { port = hsr_port_get_hsr(hsr, node->addr_B_port); - *addr_b_ifindex = port->dev->ifindex; + if (port) + *addr_b_ifindex = port->dev->ifindex; + else + *addr_b_ifindex = -1; } else { *addr_b_ifindex = -1; } -- cgit v1.2.3 From 5f344d809e015fba3709e5219428c00b8ac5d7df Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 8 May 2026 18:44:10 +0200 Subject: vsock/virtio: fix length and offset in tap skb for split packets virtio_transport_build_skb() builds a new skb to be delivered to the vsockmon tap device. To build the new skb, it uses the original skb data length as payload length, but as the comment notes, the original packet stored in the skb may have been split in multiple packets, so we need to use the length in the header, which is correctly updated before the packet is delivered to the tap, and the offset for the data. This was also similar to what we did before commit 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") where we probably missed something during the skb conversion. Also update the comment above, which was left stale by the skb conversion and still mentioned a buffer pointer that no longer exists. Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Stefano Garzarella Reviewed-by: Bobby Eshleman Reviewed-by: Arseniy Krasnov Link: https://patch.msgid.link/20260508164411.261440-2-sgarzare@redhat.com Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 9b8014516f4f..a678d5d75704 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -166,12 +166,12 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct sk_buff *skb; size_t payload_len; - /* A packet could be split to fit the RX buffer, so we can retrieve - * the payload length from the header and the buffer pointer taking - * care of the offset in the original packet. + /* A packet could be split to fit the RX buffer, so we use + * the payload length from the header, which has been updated + * by the sender to reflect the fragment size. */ pkt_hdr = virtio_vsock_hdr(pkt); - payload_len = pkt->len; + payload_len = le32_to_cpu(pkt_hdr->len); skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, GFP_ATOMIC); @@ -219,7 +219,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); } else { - skb_put_data(skb, pkt->data, payload_len); + skb_put_data(skb, pkt->data + VIRTIO_VSOCK_SKB_CB(pkt)->offset, + payload_len); } } -- cgit v1.2.3 From 3a3e3d90cbc79600544536723911657730759af3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 8 May 2026 18:44:11 +0200 Subject: vsock/virtio: fix empty payload in tap skb for non-linear buffers For non-linear skbs, virtio_transport_build_skb() goes through virtio_transport_copy_nonlinear_skb() to copy the original payload in the new skb to be delivered to the vsockmon tap device. This manually initializes an iov_iter but does not set iov_iter.count. Since the iov_iter is zero-initialized, the copy length is zero and no payload is actually copied to the monitor interface, leaving data un-initialized. Fix this by removing the linear vs non-linear split and using skb_copy_datagram_iter() with iov_iter_kvec() for all cases, as vhost-vsock already does. This handles both linear and non-linear skbs, properly initializes the iov_iter, and removes the now unused virtio_transport_copy_nonlinear_skb(). While touching this code, let's also check the return value of skb_copy_datagram_iter(), even though it's unlikely to fail. Fixes: 4b0bf10eb077 ("vsock/virtio: non-linear skb handling for tap") Reported-by: Yiqi Sun Signed-off-by: Stefano Garzarella Reviewed-by: Bobby Eshleman Reviewed-by: Arseniy Krasnov Link: https://patch.msgid.link/20260508164411.261440-3-sgarzare@redhat.com Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 40 ++++++++++----------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a678d5d75704..989cc252d3d3 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -136,27 +136,6 @@ static void virtio_transport_init_hdr(struct sk_buff *skb, hdr->fwd_cnt = cpu_to_le32(0); } -static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, - void *dst, - size_t len) -{ - struct iov_iter iov_iter = { 0 }; - struct kvec kvec; - size_t to_copy; - - kvec.iov_base = dst; - kvec.iov_len = len; - - iov_iter.iter_type = ITER_KVEC; - iov_iter.kvec = &kvec; - iov_iter.nr_segs = 1; - - to_copy = min_t(size_t, len, skb->len); - - skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, - &iov_iter, to_copy); -} - /* Packet capture */ static struct sk_buff *virtio_transport_build_skb(void *opaque) { @@ -214,13 +193,18 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); if (payload_len) { - if (skb_is_nonlinear(pkt)) { - void *data = skb_put(skb, payload_len); - - virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); - } else { - skb_put_data(skb, pkt->data + VIRTIO_VSOCK_SKB_CB(pkt)->offset, - payload_len); + struct iov_iter iov_iter; + struct kvec kvec; + void *data = skb_put(skb, payload_len); + + kvec.iov_base = data; + kvec.iov_len = payload_len; + iov_iter_kvec(&iov_iter, ITER_DEST, &kvec, 1, payload_len); + + if (skb_copy_datagram_iter(pkt, VIRTIO_VSOCK_SKB_CB(pkt)->offset, + &iov_iter, payload_len)) { + kfree_skb(skb); + return NULL; } } -- cgit v1.2.3 From 859c199bb3a90ec49a678cc0846694b06703bdde Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 29 Apr 2026 06:09:37 -0700 Subject: fs/select: reject negative timeval components in kern_select() kern_select() normalises the user-supplied struct __kernel_old_timeval with tv.tv_sec + (tv.tv_usec / USEC_PER_SEC) (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC before calling poll_select_set_timeout() -> timespec64_valid(). Both operands of the seconds sum are unbounded user-controlled signed long. A crafted pair where tv_usec is a negative multiple of USEC_PER_SEC drives the sum across the wrap boundary - e.g. { .tv_sec = LONG_MIN, .tv_usec = -1000000 } yields sec = LONG_MAX, nsec = 0, which passes timespec64_valid() and then flows through timespec64_add_safe(), which saturates the absolute deadline to TIME64_MAX (clamped further to KTIME_MAX downstream). select(2) therefore blocks effectively forever instead of returning -EINVAL as POSIX requires for a negative timeout. Only the legacy __NR_select syscall takes this path. pselect6, ppoll, poll and epoll_pwait2 all hand the user's two fields directly to poll_select_set_timeout(), which validates *before* doing any arithmetic: /* fs/select.c:271 -- the validator */ int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec) { struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec}; if (!timespec64_valid(&ts)) return -EINVAL; ... } /* include/linux/time64.h:97 -- timespec64_valid */ if (ts->tv_sec < 0) return false; if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return false; /* fs/select.c:744 do_pselect() (pselect6, pselect6_time32) */ if (get_timespec64(&ts, tsp)) return -EFAULT; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; /* fs/select.c:1097 ppoll */ if (get_timespec64(&ts, tsp)) return -EFAULT; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; /* fs/select.c:1065 poll -- timeout_msecs is int; >= 0 gates the math */ if (timeout_msecs >= 0) poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); /* fs/eventpoll.c:2512 epoll_pwait2 */ if (get_timespec64(&ts, timeout)) return -EFAULT; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; In every one of these the wrap-prone arithmetic from kern_select() simply does not exist; the user fields reach timespec64_valid() unmodified. glibc routes the C-library select() through pselect6, so the bug is reachable only via a direct syscall(__NR_select, ...). The pre-validation negative check that used to live here was lost when the syscall was switched to the poll_select_set_timeout() helper. Restore it: reject tv_sec < 0 || tv_usec < 0 up front, mirroring what glibc does in userspace. do_compat_select() has the same arithmetic pattern but is only reachable on 32-bit compat and from a different syscall entry; left for a follow-up so this change stays minimal. Reproducer (returns -1/EINVAL on a fixed kernel; blocks indefinitely on an unfixed one): struct timeval tv = { .tv_sec = LONG_MIN, .tv_usec = -1000000 }; fd_set r; int pfd[2]; pipe(pfd); FD_ZERO(&r); FD_SET(pfd[0], &r); syscall(__NR_select, pfd[0] + 1, &r, NULL, NULL, &tv); Fixes: 4d36a9e65d49 ("select: deal with math overflow from borderline valid userland data") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260429-timeval-v1-1-4448e2588bbf@debian.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/select.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/select.c b/fs/select.c index 75978b18f48f..bf71c9838dfe 100644 --- a/fs/select.c +++ b/fs/select.c @@ -708,6 +708,17 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; + /* + * Reject negative components before normalisation. The seconds + * sum below is performed in signed long and a crafted negative + * timeval can wrap to a positive value that passes + * timespec64_valid() and turns into an effectively-infinite + * deadline via timespec64_add_safe()'s saturation, instead of + * the -EINVAL POSIX requires for negative timeouts. + */ + if (tv.tv_sec < 0 || tv.tv_usec < 0) + return -EINVAL; + to = &end_time; if (poll_select_set_timeout(to, tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), -- cgit v1.2.3 From 6f0f7ac1915abc0d202f0eb4b003a6548a5ba60d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:38 +0100 Subject: netfs: Fix cancellation of a DIO and single read subrequests When the preparation of a new subrequest for a read fails, if the subrequest has already been added to the stream->subrequests list, it can't simply be put and abandoned as the collector may see it. Also, if it hasn't been queued yet, it has two outstanding refs that both need to be put. Both DIO read and single-read dispatch fail at this; further, both differ in the order they do things to the way buffered read works. Fix cancellation of both DIO-read and single-read subrequests that failed preparation by the following steps: (1) Harmonise all three reads (buffered, dio, single) to queue the subreq before prepping it. (2) Make all three call netfs_queue_read() to do the queuing. (3) Set NETFS_RREQ_ALL_QUEUED independently of the queuing as we don't know the length of the subreq at this point. (4) In all cases, set the error and NETFS_SREQ_FAILED flag on the subreq and then call netfs_read_subreq_terminated() to deal with it. This will pass responsibility off to the collector for dealing with it. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Closes: https://sashiko.dev/#/patchset/20260425125426.3855807-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-2-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 34 +++++++++++++--------------------- fs/netfs/direct_read.c | 42 +++++++++++++----------------------------- fs/netfs/internal.h | 3 +++ fs/netfs/read_collect.c | 11 +++++++++++ fs/netfs/read_single.c | 23 ++++++++++------------- 5 files changed, 50 insertions(+), 63 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a8c0d86118c5..a27ed501b6d4 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -156,9 +156,8 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, netfs_cache_read_terminated, subreq); } -static void netfs_queue_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq, - bool last_subreq) +void netfs_queue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -178,11 +177,6 @@ static void netfs_queue_read(struct netfs_io_request *rreq, } } - if (last_subreq) { - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - } - spin_unlock(&rreq->lock); } @@ -233,6 +227,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, subreq->start = start; subreq->len = size; + netfs_queue_read(rreq, subreq); + source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); subreq->source = source; if (source == NETFS_DOWNLOAD_FROM_SERVER) { @@ -253,6 +249,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, rreq->debug_id, subreq->debug_index, subreq->len, size, subreq->start, ictx->zero_point, rreq->i_size); + netfs_cancel_read(subreq, ret); break; } subreq->len = len; @@ -261,12 +258,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - subreq->error = ret; - /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, - netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, - netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); break; } trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); @@ -289,23 +281,23 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, pr_err("Unexpected read source %u\n", source); WARN_ON_ONCE(1); + netfs_cancel_read(subreq, ret); break; issue: slice = netfs_prepare_read_iterator(subreq, ractl); if (slice < 0) { ret = slice; - subreq->error = ret; - trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); - /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); break; } - size -= slice; start += slice; + size -= slice; + if (size <= 0) { + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); + } - netfs_queue_read(rreq, subreq, size <= 0); netfs_issue_read(rreq, subreq); cond_resched(); } while (size > 0); diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index f72e6da88cca..6a8fb0d55e04 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -45,12 +45,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq) * Perform a read to a buffer from the server, slicing up the region to be read * according to the network rsize. */ -static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) +static void netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) { - struct netfs_io_stream *stream = &rreq->io_streams[0]; unsigned long long start = rreq->start; ssize_t size = rreq->len; - int ret = 0; + int ret; do { struct netfs_io_subrequest *subreq; @@ -58,7 +57,10 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) subreq = netfs_alloc_subrequest(rreq); if (!subreq) { - ret = -ENOMEM; + /* Stash the error in the request if there's not + * already an error set. + */ + cmpxchg(&rreq->error, 0, -ENOMEM); break; } @@ -66,25 +68,13 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) subreq->start = start; subreq->len = size; - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - spin_lock(&rreq->lock); - list_add_tail(&subreq->rreq_link, &stream->subrequests); - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { - if (!stream->active) { - stream->collected_to = subreq->start; - /* Store list pointers before active flag */ - smp_store_release(&stream->active, true); - } - } - trace_netfs_sreq(subreq, netfs_sreq_trace_added); - spin_unlock(&rreq->lock); + netfs_queue_read(rreq, subreq); netfs_stat(&netfs_n_rh_download); if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); break; } } @@ -113,8 +103,6 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); netfs_wake_collector(rreq); } - - return ret; } /* @@ -137,21 +125,17 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) // TODO: Use bounce buffer if requested inode_dio_begin(rreq->inode); + netfs_dispatch_unbuffered_reads(rreq); - ret = netfs_dispatch_unbuffered_reads(rreq); - - if (!rreq->submitted) { - netfs_put_request(rreq, netfs_rreq_trace_put_no_submit); - inode_dio_end(rreq->inode); - ret = 0; - goto out; - } + /* The collector will get run, even if we don't manage to submit any + * subreqs, so we shouldn't call inode_dio_end() here. + */ if (sync) ret = netfs_wait_for_read(rreq); else ret = -EIOCBQUEUED; -out: + _leave(" = %zd", ret); return ret; } diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index d436e20d3418..645996ecfc80 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -23,6 +23,8 @@ /* * buffered_read.c */ +void netfs_queue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq); void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); @@ -108,6 +110,7 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, */ bool netfs_read_collection(struct netfs_io_request *rreq); void netfs_read_collection_worker(struct work_struct *work); +void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error); void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); /* diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index e5f6665b3341..d2d902f46627 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -575,6 +575,17 @@ skip_error_checks: } EXPORT_SYMBOL(netfs_read_subreq_terminated); +/* + * Cancel a read subrequest due to preparation failure. + */ +void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error) +{ + trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); + subreq->error = error; + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); + netfs_read_subreq_terminated(subreq); +} + /* * Handle termination of a read from the cache. */ diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index d0e23bc42445..8833550d2eb6 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -89,7 +89,6 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq, */ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) { - struct netfs_io_stream *stream = &rreq->io_streams[0]; struct netfs_io_subrequest *subreq; int ret = 0; @@ -102,14 +101,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) subreq->len = rreq->len; subreq->io_iter = rreq->buffer.iter; - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - spin_lock(&rreq->lock); - list_add_tail(&subreq->rreq_link, &stream->subrequests); - trace_netfs_sreq(subreq, netfs_sreq_trace_added); - /* Store list pointers before active flag */ - smp_store_release(&stream->active, true); - spin_unlock(&rreq->lock); + netfs_queue_read(rreq, subreq); netfs_single_cache_prepare_read(rreq, subreq); switch (subreq->source) { @@ -121,10 +113,14 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) goto cancel; } + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); rreq->netfs_ops->issue_read(subreq); rreq->submitted += subreq->len; break; case NETFS_READ_FROM_CACHE: + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); netfs_single_read_cache(rreq, subreq); rreq->submitted += subreq->len; @@ -134,14 +130,15 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) pr_warn("Unexpected single-read source %u\n", subreq->source); WARN_ON_ONCE(true); ret = -EIO; - break; + goto cancel; } - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); return ret; cancel: - netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_cancel_read(subreq, ret); + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); + netfs_wake_collector(rreq); return ret; } -- cgit v1.2.3 From cce18c263e9623872327ba3c956012f73c1179cc Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:39 +0100 Subject: netfs: Fix missing locking around retry adding new subreqs Fix netfs_retry_read_subrequests() and netfs_retry_write_stream() to take the appropriate lock when adding extra subrequests into stream->subrequests. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Closes: https://sashiko.dev/#/patchset/20260425125426.3855807-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-3-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_retry.c | 6 +++++- fs/netfs/write_retry.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index cca9ac43c077..5ec548b996d6 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -175,7 +175,9 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) list_for_each_entry_safe_from(subreq, tmp, &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous); + spin_lock(&rreq->lock); list_del(&subreq->rreq_link); + spin_unlock(&rreq->lock); netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; @@ -203,8 +205,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) refcount_read(&subreq->ref), netfs_sreq_trace_new); + spin_lock(&rreq->lock); list_add(&subreq->rreq_link, &to->rreq_link); - to = list_next_entry(to, rreq_link); + spin_unlock(&rreq->lock); + to = subreq; trace_netfs_sreq(subreq, netfs_sreq_trace_retry); stream->sreq_max_len = umin(len, rreq->rsize); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 29489a23a220..32735abfa03f 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -130,7 +130,9 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, list_for_each_entry_safe_from(subreq, tmp, &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_discard); + spin_lock(&wreq->lock); list_del(&subreq->rreq_link); + spin_unlock(&wreq->lock); netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; @@ -153,8 +155,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, netfs_sreq_trace_new); trace_netfs_sreq(subreq, netfs_sreq_trace_split); + spin_lock(&wreq->lock); list_add(&subreq->rreq_link, &to->rreq_link); - to = list_next_entry(to, rreq_link); + spin_unlock(&wreq->lock); + to = subreq; trace_netfs_sreq(subreq, netfs_sreq_trace_retry); stream->sreq_max_len = len; -- cgit v1.2.3 From b5782e2d462c028096f922abca46318cec890670 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:40 +0100 Subject: netfs: Fix missing barriers when accessing stream->subrequests locklessly The list of subrequests attached to stream->subrequests is accessed without locks by netfs_collect_read_results() and netfs_collect_write_results(), and then they access subreq->flags without taking a barrier after getting the subreq pointer from the list. Relatedly, the functions that build the list don't use any sort of write barrier when constructing the list to make sure that the NETFS_SREQ_IN_PROGRESS flag is perceived to be set first if no lock is taken. Fix this by: (1) Add a new list_add_tail_release() function that uses a release barrier to set the pointer to the new member of the list. (2) Add a new list_first_entry_or_null_acquire() function that uses an acquire barrier to read the pointer to the first member in a list (or return NULL). (3) Use list_add_tail_release() when adding a subreq to ->subrequests. (4) Use list_first_entry_or_null_acquire() when initially accessing the front of the list (when an item is removed, the pointer to the new front iterm is obtained under the same lock). Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Link: https://sashiko.dev/#/patchset/20260326104544.509518-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-4-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 3 ++- fs/netfs/misc.c | 1 + fs/netfs/read_collect.c | 6 ++++-- fs/netfs/write_collect.c | 6 ++++-- fs/netfs/write_issue.c | 3 ++- include/linux/list.h | 37 +++++++++++++++++++++++++++++++++++++ 6 files changed, 50 insertions(+), 6 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a27ed501b6d4..15d73026ff64 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -168,7 +168,8 @@ void netfs_queue_read(struct netfs_io_request *rreq, * remove entries off of the front. */ spin_lock(&rreq->lock); - list_add_tail(&subreq->rreq_link, &stream->subrequests); + /* Write IN_PROGRESS before pointer to new subreq */ + list_add_tail_release(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { if (!stream->active) { stream->collected_to = subreq->start; diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 6df89c92b10b..21357907b7ee 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -356,6 +356,7 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, DEFINE_WAIT(myself); list_for_each_entry(subreq, &stream->subrequests, rreq_link) { + smp_rmb(); /* Read ->next before IN_PROGRESS. */ if (!netfs_check_subreq_in_progress(subreq)) continue; diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index d2d902f46627..3c9b847885c2 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -205,8 +205,10 @@ reassess: * in progress. The issuer thread may be adding stuff to the tail * whilst we're doing this. */ - front = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); + front = list_first_entry_or_null_acquire(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); + /* Read first subreq pointer before IN_PROGRESS flag. */ + while (front) { size_t transferred; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index b194447f4b11..7fbf50907a7f 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -228,8 +228,10 @@ reassess_streams: if (!smp_load_acquire(&stream->active)) continue; - front = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); + front = list_first_entry_or_null_acquire(&stream->subrequests, + struct netfs_io_subrequest, rreq_link); + /* Read first subreq pointer before IN_PROGRESS flag. */ + while (front) { trace_netfs_collect_sreq(wreq, front); //_debug("sreq [%x] %llx %zx/%zx", diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 2db688f94125..b0e9690bb90c 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -204,7 +204,8 @@ void netfs_prepare_write(struct netfs_io_request *wreq, * remove entries off of the front. */ spin_lock(&wreq->lock); - list_add_tail(&subreq->rreq_link, &stream->subrequests); + /* Write IN_PROGRESS before pointer to new subreq */ + list_add_tail_release(&subreq->rreq_link, &stream->subrequests); if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { if (!stream->active) { stream->collected_to = subreq->start; diff --git a/include/linux/list.h b/include/linux/list.h index 00ea8e5fb88b..09d979976b3b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -191,6 +191,29 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) __list_add(new, head->prev, head); } +/** + * list_add_tail_release - add a new entry with release barrier + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head, using a release barrier to set + * the ->next pointer that points to it. This is useful for implementing + * queues, in particular one that the elements will be walked through forwards + * locklessly. + */ +static inline void list_add_tail_release(struct list_head *new, + struct list_head *head) +{ + struct list_head *prev = head->prev; + + if (__list_add_valid(new, prev, head)) { + new->next = head; + new->prev = prev; + head->prev = new; + smp_store_release(&prev->next, new); + } +} + /* * Delete a list entry by making the prev/next entries * point to each other. @@ -644,6 +667,20 @@ static inline void list_splice_tail_init(struct list_head *list, pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ }) +/** + * list_first_entry_or_null_acquire - get the first element from a list with barrier + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the list is empty, it returns NULL. + */ +#define list_first_entry_or_null_acquire(ptr, type, member) ({ \ + struct list_head *head__ = (ptr); \ + struct list_head *pos__ = smp_load_acquire(&head__->next); \ + pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ +}) + /** * list_last_entry_or_null - get the last element from a list * @ptr: the list head to take the element from. -- cgit v1.2.3 From 8a8c0cfdf4658fc5b295b7fc87be56e0d76741f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:41 +0100 Subject: netfs: Fix netfs_read_to_pagecache() to pause on subreq failure Fix netfs_read_to_pagecache() so that it pauses the generation of new subrequests if an already-issued subrequest fails. Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") Closes: https://sashiko.dev/#/patchset/20260425125426.3855807-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-5-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 15d73026ff64..fee0aebf5a3d 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -300,6 +300,11 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, } netfs_issue_read(rreq, subreq); + + if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + netfs_wait_for_paused_read(rreq); + if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) + break; cond_resched(); } while (size > 0); -- cgit v1.2.3 From 2c8f4742bb76117d735f92a3932d85239b16c494 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:42 +0100 Subject: netfs: Fix potential for tearing in ->remote_i_size and ->zero_point Fix potential tearing in using ->remote_i_size and ->zero_point by copying i_size_read() and i_size_write() and using the same seqcount as for i_size. We need to make sure that netfslib and the filesystems that use it always hold i_lock whilst updating any of the sizes to prevent i_size_seqcount from getting corrupted. Fixes: 4058f742105e ("netfs: Keep track of the actual remote file size") Fixes: 100ccd18bb41 ("netfs: Optimise away reads above the point at which there can be no data") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-6-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/9p/v9fs_vfs.h | 13 -- fs/9p/vfs_inode.c | 6 +- fs/9p/vfs_inode_dotl.c | 12 +- fs/afs/file.c | 24 +++- fs/afs/inode.c | 31 +++-- fs/afs/internal.h | 11 +- fs/afs/write.c | 2 +- fs/netfs/buffered_read.c | 6 +- fs/netfs/buffered_write.c | 2 +- fs/netfs/direct_write.c | 6 +- fs/netfs/misc.c | 32 +++-- fs/netfs/write_collect.c | 9 +- fs/smb/client/cifsfs.c | 38 ++++-- fs/smb/client/cifssmb.c | 3 +- fs/smb/client/file.c | 13 +- fs/smb/client/inode.c | 14 ++- fs/smb/client/readdir.c | 3 +- fs/smb/client/smb2ops.c | 42 ++++--- fs/smb/client/smb2pdu.c | 3 +- include/linux/netfs.h | 293 ++++++++++++++++++++++++++++++++++++++++++++-- 20 files changed, 450 insertions(+), 113 deletions(-) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index d3aefbec4de6..34c115d7c250 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -75,17 +75,4 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) int v9fs_open_to_dotl_flags(int flags); -static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size) -{ - /* - * 32-bit need the lock, concurrent updates could break the - * sequences and make i_size_read() loop forever. - * 64-bit updates are atomic and can skip the locking. - */ - if (sizeof(i_size) > sizeof(long)) - spin_lock(&inode->i_lock); - i_size_write(inode, i_size); - if (sizeof(i_size) > sizeof(long)) - spin_unlock(&inode->i_lock); -} #endif diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d1508b1fe109..f468acb8ee7d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1141,11 +1141,13 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; - v9inode->netfs.remote_i_size = stat->length; + spin_lock(&inode->i_lock); + netfs_write_remote_i_size(inode, stat->length); if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) - v9fs_i_size_write(inode, stat->length); + i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ inode->i_blocks = (stat->length + 512 - 1) >> 9; + spin_unlock(&inode->i_lock); v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 71796a89bcf4..141fb54db65d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -634,10 +634,12 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; - v9inode->netfs.remote_i_size = stat->st_size; + spin_lock(&inode->i_lock); + netfs_write_remote_i_size(inode, stat->st_size); if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) - v9fs_i_size_write(inode, stat->st_size); + i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; + spin_unlock(&inode->i_lock); } else { if (stat->st_result_mask & P9_STATS_ATIME) { inode_set_atime(inode, stat->st_atime_sec, @@ -662,13 +664,15 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } + spin_lock(&inode->i_lock); if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && stat->st_result_mask & P9_STATS_SIZE) { - v9inode->netfs.remote_i_size = stat->st_size; - v9fs_i_size_write(inode, stat->st_size); + netfs_write_remote_i_size(inode, stat->st_size); + i_size_write(inode, stat->st_size); } if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; + spin_unlock(&inode->i_lock); } if (stat->st_result_mask & P9_STATS_GEN) inode->i_generation = stat->st_gen; diff --git a/fs/afs/file.c b/fs/afs/file.c index 85696ac984cc..0467742bfeee 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -427,21 +427,35 @@ static void afs_free_request(struct netfs_io_request *rreq) afs_put_wb_key(rreq->netfs_priv2); } -static void afs_update_i_size(struct inode *inode, loff_t new_i_size) +/* + * Set the file size and block count, taking ->cb_lock and ->i_lock to maintain + * coherency and prevent 64-bit tearing on 32-bit arches. + * + * Also, estimate the number of 512 bytes blocks used, rounded up to nearest 1K + * for consistency with other AFS clients. + */ +void afs_set_i_size(struct afs_vnode *vnode, loff_t new_i_size) { - struct afs_vnode *vnode = AFS_FS_I(inode); + struct inode *inode = &vnode->netfs.inode; loff_t i_size; write_seqlock(&vnode->cb_lock); - i_size = i_size_read(&vnode->netfs.inode); + spin_lock(&inode->i_lock); + i_size = i_size_read(inode); if (new_i_size > i_size) { - i_size_write(&vnode->netfs.inode, new_i_size); - inode_set_bytes(&vnode->netfs.inode, new_i_size); + i_size_write(inode, new_i_size); + inode_set_bytes(inode, round_up(new_i_size, 1024)); } + spin_unlock(&inode->i_lock); write_sequnlock(&vnode->cb_lock); fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size); } +static void afs_update_i_size(struct inode *inode, loff_t new_i_size) +{ + afs_set_i_size(AFS_FS_I(inode), new_i_size); +} + static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq) { struct afs_vnode *vnode = AFS_FS_I(wreq->inode); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index a5173434f786..19fe2e392885 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -224,7 +224,8 @@ static int afs_inode_init_from_status(struct afs_operation *op, return afs_protocol_error(NULL, afs_eproto_file_type); } - afs_set_i_size(vnode, status->size); + i_size_write(inode, status->size); + inode_set_bytes(inode, status->size); afs_set_netfs_context(vnode); vnode->invalid_before = status->data_version; @@ -253,7 +254,8 @@ static void afs_apply_status(struct afs_operation *op, { struct afs_file_status *status = &vp->scb.status; struct afs_vnode *vnode = vp->vnode; - struct inode *inode = &vnode->netfs.inode; + struct netfs_inode *ictx = &vnode->netfs; + struct inode *inode = &ictx->inode; struct timespec64 t; umode_t mode; bool unexpected_jump = false; @@ -336,6 +338,8 @@ static void afs_apply_status(struct afs_operation *op, } if (data_changed) { + unsigned long long zero_point, size = status->size; + inode_set_iversion_raw(inode, status->data_version); /* Only update the size if the data version jumped. If the @@ -343,16 +347,25 @@ static void afs_apply_status(struct afs_operation *op, * idea of what the size should be that's not the same as * what's on the server. */ - vnode->netfs.remote_i_size = status->size; - if (change_size || status->size > i_size_read(inode)) { - afs_set_i_size(vnode, status->size); + spin_lock(&inode->i_lock); + + if (change_size || size > i_size_read(inode)) { + /* We can read the sizes directly as we hold i_lock. */ + zero_point = ictx->_zero_point; + if (unexpected_jump) - vnode->netfs.zero_point = status->size; + zero_point = size; + netfs_write_sizes(inode, size, size, zero_point); + inode_set_bytes(inode, size); inode_set_ctime_to_ts(inode, t); inode_set_atime_to_ts(inode, t); + } else { + netfs_write_remote_i_size(inode, size); } + spin_unlock(&inode->i_lock); + if (op->ops == &afs_fetch_data_operation) - op->fetch.subreq->rreq->i_size = status->size; + op->fetch.subreq->rreq->i_size = size; } } @@ -709,7 +722,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path, * it, but we need to give userspace the server's size. */ if (S_ISDIR(inode->i_mode)) - stat->size = vnode->netfs.remote_i_size; + stat->size = netfs_read_remote_i_size(inode); } while (read_seqretry(&vnode->cb_lock, seq)); return 0; @@ -889,7 +902,7 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, */ if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) && attr->ia_size < i_size && - attr->ia_size > vnode->netfs.remote_i_size) { + attr->ia_size > netfs_read_remote_i_size(inode)) { truncate_setsize(inode, attr->ia_size); netfs_resize_file(&vnode->netfs, size, false); fscache_resize_cookie(afs_vnode_cache(vnode), diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 599353c33337..816dc848ea71 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1157,6 +1157,7 @@ extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); void afs_fetch_data_async_rx(struct work_struct *work); void afs_fetch_data_immediate_cancel(struct afs_call *call); +void afs_set_i_size(struct afs_vnode *vnode, loff_t new_i_size); /* * flock.c @@ -1758,16 +1759,6 @@ static inline void afs_update_dentry_version(struct afs_operation *op, (void *)(unsigned long)dir_vp->scb.status.data_version; } -/* - * Set the file size and block count. Estimate the number of 512 bytes blocks - * used, rounded up to nearest 1K for consistency with other AFS clients. - */ -static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size) -{ - i_size_write(&vnode->netfs.inode, size); - vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1; -} - /* * Check for a conflicting operation on a directory that we just unlinked from. * If someone managed to sneak a link or an unlink in on the file we just diff --git a/fs/afs/write.c b/fs/afs/write.c index fcfed9d24e0a..7f34b939706a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -142,7 +142,7 @@ static void afs_issue_write_worker(struct work_struct *work) afs_begin_vnode_operation(op); op->store.write_iter = &subreq->io_iter; - op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size); + op->store.i_size = umax(pos + len, netfs_read_remote_i_size(&vnode->netfs.inode)); op->mtime = inode_get_mtime(&vnode->netfs.inode); afs_wait_for_operation(op); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index fee0aebf5a3d..ebd84a6cc3f0 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -209,7 +209,6 @@ static void netfs_issue_read(struct netfs_io_request *rreq, static void netfs_read_to_pagecache(struct netfs_io_request *rreq, struct readahead_control *ractl) { - struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned long long start = rreq->start; ssize_t size = rreq->len; int ret = 0; @@ -233,7 +232,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); subreq->source = source; if (source == NETFS_DOWNLOAD_FROM_SERVER) { - unsigned long long zp = umin(ictx->zero_point, rreq->i_size); + unsigned long long zero_point = netfs_read_zero_point(rreq->inode); + unsigned long long zp = umin(zero_point, rreq->i_size); size_t len = subreq->len; if (unlikely(rreq->origin == NETFS_READ_SINGLE)) @@ -249,7 +249,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq, pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx", rreq->debug_id, subreq->debug_index, subreq->len, size, - subreq->start, ictx->zero_point, rreq->i_size); + subreq->start, zero_point, rreq->i_size); netfs_cancel_read(subreq, ret); break; } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 05ea5b0cc0e8..b6ecd059dc4f 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -230,7 +230,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, * server would just return a block of zeros or a short read if * we try to read it. */ - if (fpos >= ctx->zero_point) { + if (fpos >= netfs_read_zero_point(inode)) { folio_zero_segment(folio, 0, offset); copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index f9ab69de3e29..25f8ceb15fad 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -376,8 +376,10 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret < 0) goto out; end = iocb->ki_pos + iov_iter_count(from); - if (end > ictx->zero_point) - ictx->zero_point = end; + spin_lock(&inode->i_lock); + if (end > ictx->_zero_point) + netfs_write_zero_point(inode, end); + spin_unlock(&inode->i_lock); fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode), FSCACHE_INVAL_DIO_WRITE); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 21357907b7ee..bad661ff2bec 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -211,18 +211,25 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback); void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct netfs_folio *finfo; - struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); + struct inode *inode = folio_inode(folio); + struct netfs_inode *ctx = netfs_inode(inode); size_t flen = folio_size(folio); _enter("{%lx},%zx,%zx", folio->index, offset, length); if (offset == 0 && length == flen) { - unsigned long long i_size = i_size_read(&ctx->inode); + unsigned long long i_size, remote_i_size, zero_point; unsigned long long fpos = folio_pos(folio), end; + netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); end = umin(fpos + flen, i_size); - if (fpos < i_size && end > ctx->zero_point) - ctx->zero_point = end; + if (fpos < i_size && end > zero_point) { + spin_lock(&inode->i_lock); + end = umin(fpos + flen, inode->i_size); + if (fpos < i_size && end > ctx->_zero_point) + netfs_write_zero_point(inode, end); + spin_unlock(&inode->i_lock); + } } folio_wait_private_2(folio); /* [DEPRECATED] */ @@ -292,15 +299,22 @@ EXPORT_SYMBOL(netfs_invalidate_folio); */ bool netfs_release_folio(struct folio *folio, gfp_t gfp) { - struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); - unsigned long long end; + struct inode *inode = folio_inode(folio); + struct netfs_inode *ctx = netfs_inode(inode); + unsigned long long i_size, remote_i_size, zero_point, end; if (folio_test_dirty(folio)) return false; - end = umin(folio_next_pos(folio), i_size_read(&ctx->inode)); - if (end > ctx->zero_point) - ctx->zero_point = end; + netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); + end = umin(folio_next_pos(folio), i_size); + if (end > zero_point) { + spin_lock(&inode->i_lock); + end = umin(folio_next_pos(folio), inode->i_size); + if (end > ctx->_zero_point) + netfs_write_zero_point(inode, end); + spin_unlock(&inode->i_lock); + } if (folio_test_private(folio)) return false; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 7fbf50907a7f..24fc2bb2f8a4 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -57,7 +57,8 @@ static void netfs_dump_request(const struct netfs_io_request *rreq) int netfs_folio_written_back(struct folio *folio) { enum netfs_folio_trace why = netfs_folio_trace_clear; - struct netfs_inode *ictx = netfs_inode(folio->mapping->host); + struct inode *inode = folio_inode(folio); + struct netfs_inode *ictx = netfs_inode(inode); struct netfs_folio *finfo; struct netfs_group *group = NULL; int gcount = 0; @@ -69,8 +70,10 @@ int netfs_folio_written_back(struct folio *folio) unsigned long long fend; fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len; - if (fend > ictx->zero_point) - ictx->zero_point = fend; + spin_lock(&ictx->inode.i_lock); + if (fend > ictx->_zero_point) + netfs_write_zero_point(inode, fend); + spin_unlock(&ictx->inode.i_lock); folio_detach_private(folio); group = finfo->netfs_group; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 9f76b0347fa9..feac491c5070 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -434,7 +434,8 @@ cifs_alloc_inode(struct super_block *sb) spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ - cifs_inode->netfs.remote_i_size = 0; + cifs_inode->netfs._remote_i_size = 0; + cifs_inode->netfs._zero_point = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; cifs_inode->epoch = 0; @@ -1303,7 +1304,8 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, struct cifsFileInfo *smb_file_src = src_file->private_data; struct cifsFileInfo *smb_file_target = dst_file->private_data; struct cifs_tcon *target_tcon, *src_tcon; - unsigned long long destend, fstart, fend, old_size, new_size; + unsigned long long i_size, old_size, new_size, zero_point; + unsigned long long destend, fstart, fend; unsigned int xid; int rc; @@ -1347,7 +1349,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, * Advance the EOF marker after the flush above to the end of the range * if it's short of that. */ - if (src_cifsi->netfs.remote_i_size < off + len) { + if (netfs_read_remote_i_size(src_inode) < off + len) { rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); if (rc < 0) goto unlock; @@ -1368,16 +1370,18 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); if (rc) goto unlock; - if (fend > target_cifsi->netfs.zero_point) - target_cifsi->netfs.zero_point = fend + 1; - old_size = target_cifsi->netfs.remote_i_size; + + spin_lock(&target_inode->i_lock); + if (fend > zero_point) + netfs_write_zero_point(target_inode, fend + 1); + i_size = target_inode->i_size; + spin_unlock(&target_inode->i_lock); /* Discard all the folios that overlap the destination region. */ cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend); truncate_inode_pages_range(&target_inode->i_data, fstart, fend); - fscache_invalidate(cifs_inode_cookie(target_inode), NULL, - i_size_read(target_inode), 0); + fscache_invalidate(cifs_inode_cookie(target_inode), NULL, i_size, 0); rc = -EOPNOTSUPP; if (target_tcon->ses->server->ops->duplicate_extents) { @@ -1402,8 +1406,12 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, rc = -EINVAL; } } - if (rc == 0 && new_size > target_cifsi->netfs.zero_point) - target_cifsi->netfs.zero_point = new_size; + if (rc == 0) { + spin_lock(&target_inode->i_lock); + if (new_size > target_cifsi->netfs._zero_point) + netfs_write_zero_point(target_inode, new_size); + spin_unlock(&target_inode->i_lock); + } } /* force revalidate of size and timestamps of target file now @@ -1474,7 +1482,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * Advance the EOF marker after the flush above to the end of the range * if it's short of that. */ - if (src_cifsi->netfs.remote_i_size < off + len) { + if (netfs_read_remote_i_size(src_inode) < off + len) { rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); if (rc < 0) goto unlock; @@ -1502,8 +1510,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, fscache_resize_cookie(cifs_inode_cookie(target_inode), i_size_read(target_inode)); } - if (rc > 0 && destoff + rc > target_cifsi->netfs.zero_point) - target_cifsi->netfs.zero_point = destoff + rc; + if (rc > 0) { + spin_lock(&target_inode->i_lock); + if (destoff + rc > target_cifsi->netfs._zero_point) + netfs_write_zero_point(target_inode, destoff + rc); + spin_unlock(&target_inode->i_lock); + } } file_accessed(src_file); diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 3990a9012264..9e27bfa7376b 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1465,6 +1465,7 @@ cifs_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) struct cifs_io_subrequest *rdata = mid->callback_data; struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode); struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); + struct inode *inode = &ictx->inode; struct smb_rqst rqst = { .rq_iov = rdata->iov, .rq_nvec = 1, .rq_iter = rdata->subreq.io_iter }; @@ -1538,7 +1539,7 @@ do_retry: } else { size_t trans = rdata->subreq.transferred + rdata->got_bytes; if (trans < rdata->subreq.len && - rdata->subreq.start + trans >= ictx->remote_i_size) { + rdata->subreq.start + trans >= netfs_read_remote_i_size(inode)) { rdata->result = 0; __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); } else if (rdata->got_bytes > 0) { diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 664a2c223089..b60344125f27 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -2517,18 +2517,23 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result) { struct netfs_io_request *wreq = wdata->rreq; - struct netfs_inode *ictx = netfs_inode(wreq->inode); + struct inode *inode = wreq->inode; + struct netfs_inode *ictx = netfs_inode(inode); loff_t wrend; if (result > 0) { + spin_lock(&inode->i_lock); + wrend = wdata->subreq.start + wdata->subreq.transferred + result; - if (wrend > ictx->zero_point && + if (wrend > ictx->_zero_point && (wdata->rreq->origin == NETFS_UNBUFFERED_WRITE || wdata->rreq->origin == NETFS_DIO_WRITE)) - ictx->zero_point = wrend; - if (wrend > ictx->remote_i_size) + netfs_write_zero_point(inode, wrend); + if (wrend > ictx->_remote_i_size) netfs_resize_file(ictx, wrend, true); + + spin_unlock(&inode->i_lock); } netfs_write_subrequest_terminated(&wdata->subreq, result); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 16a5310155d5..9472c0a6c187 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -119,7 +119,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode); mtime = inode_get_mtime(inode); if (timespec64_equal(&mtime, &fattr->cf_mtime) && - cifs_i->netfs.remote_i_size == fattr->cf_eof) { + netfs_read_remote_i_size(inode) == fattr->cf_eof) { cifs_dbg(FYI, "%s: inode %llu is unchanged\n", __func__, cifs_i->uniqueid); return; @@ -173,12 +173,12 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr, CIFS_I(inode)->time = 0; /* force reval */ return -ESTALE; } - if (inode_state_read_once(inode) & I_NEW) - CIFS_I(inode)->netfs.zero_point = fattr->cf_eof; - cifs_revalidate_cache(inode, fattr); spin_lock(&inode->i_lock); + if (inode_state_read_once(inode) & I_NEW) + netfs_write_zero_point(inode, fattr->cf_eof); + fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode); fattr->cf_atime = timestamp_truncate(fattr->cf_atime, inode); fattr->cf_ctime = timestamp_truncate(fattr->cf_ctime, inode); @@ -212,7 +212,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr, else clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags); - cifs_i->netfs.remote_i_size = fattr->cf_eof; + netfs_write_remote_i_size(inode, fattr->cf_eof); /* * Can't safely change the file size here if the client is writing to * it due to potential races. @@ -2772,7 +2772,9 @@ cifs_revalidate_mapping(struct inode *inode) if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RW_CACHE) goto skip_invalidate; - cifs_inode->netfs.zero_point = cifs_inode->netfs.remote_i_size; + spin_lock(&inode->i_lock); + netfs_write_zero_point(inode, netfs_inode(inode)->_remote_i_size); + spin_unlock(&inode->i_lock); rc = filemap_invalidate_inode(inode, true, 0, LLONG_MAX); if (rc) { cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n", diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index be22bbc4a65a..e860fa08b5e3 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -143,7 +143,8 @@ retry: fattr->cf_rdev = inode->i_rdev; fattr->cf_uid = inode->i_uid; fattr->cf_gid = inode->i_gid; - fattr->cf_eof = CIFS_I(inode)->netfs.remote_i_size; + fattr->cf_eof = + netfs_read_remote_i_size(inode); fattr->cf_symlink_target = NULL; } else { CIFS_I(inode)->time = 0; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index e6cb9b144530..0ea3ce1b94ea 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3402,8 +3402,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - struct netfs_inode *ictx = netfs_inode(inode); - unsigned long long i_size, new_size, remote_size; + unsigned long long i_size, new_size, remote_i_size, zero_point; long rc; unsigned int xid; @@ -3414,9 +3413,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, filemap_invalidate_lock(inode->i_mapping); - i_size = i_size_read(inode); - remote_size = ictx->remote_i_size; - if (offset + len >= remote_size && offset < i_size) { + netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); + if (offset + len >= remote_i_size && offset < i_size) { unsigned long long top = umin(offset + len, i_size); rc = filemap_write_and_wait_range(inode->i_mapping, offset, top - 1); @@ -3449,9 +3447,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, new_size); if (rc >= 0) { truncate_setsize(inode, new_size); + spin_lock(&inode->i_lock); netfs_resize_file(&cifsi->netfs, new_size, true); - if (offset < cifsi->netfs.zero_point) - cifsi->netfs.zero_point = offset; + if (offset < cifsi->netfs._zero_point) + netfs_write_zero_point(inode, offset); + spin_unlock(&inode->i_lock); fscache_resize_cookie(cifs_inode_cookie(inode), new_size); } } @@ -3474,7 +3474,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; struct file_zero_data_information fsctl_buf; - unsigned long long end = offset + len, i_size, remote_i_size; + unsigned long long end = offset + len, i_size, remote_i_size, zero_point; long rc; unsigned int xid; __u8 set_sparse = 1; @@ -3516,14 +3516,17 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, * that we locally hole-punch the tail of the dirty data, the proposed * EOF update will end up in the wrong place. */ - i_size = i_size_read(inode); - remote_i_size = netfs_inode(inode)->remote_i_size; + netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); + if (end > remote_i_size && i_size > remote_i_size) { unsigned long long extend_to = umin(end, i_size); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, extend_to); - if (rc >= 0) - netfs_inode(inode)->remote_i_size = extend_to; + if (rc >= 0) { + spin_lock(&inode->i_lock); + netfs_write_remote_i_size(inode, extend_to); + spin_unlock(&inode->i_lock); + } } unlock: @@ -3787,7 +3790,6 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - struct netfs_inode *ictx = &cifsi->netfs; loff_t old_eof, new_eof; xid = get_xid(); @@ -3805,7 +3807,9 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, goto out_2; truncate_pagecache_range(inode, off, old_eof); - ictx->zero_point = old_eof; + spin_lock(&inode->i_lock); + netfs_write_zero_point(inode, old_eof); + spin_unlock(&inode->i_lock); netfs_wait_for_outstanding_io(inode); rc = smb2_copychunk_range(xid, cfile, cfile, off + len, @@ -3822,8 +3826,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, rc = 0; truncate_setsize(inode, new_eof); + spin_lock(&inode->i_lock); netfs_resize_file(&cifsi->netfs, new_eof, true); - ictx->zero_point = new_eof; + netfs_write_zero_point(inode, new_eof); + spin_unlock(&inode->i_lock); fscache_resize_cookie(cifs_inode_cookie(inode), new_eof); out_2: filemap_invalidate_unlock(inode->i_mapping); @@ -3866,13 +3872,17 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, goto out_2; truncate_setsize(inode, new_eof); + spin_lock(&inode->i_lock); netfs_resize_file(&cifsi->netfs, i_size_read(inode), true); + spin_unlock(&inode->i_lock); fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode)); rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); if (rc < 0) goto out_2; - cifsi->netfs.zero_point = new_eof; + spin_lock(&inode->i_lock); + netfs_write_zero_point(inode, new_eof); + spin_unlock(&inode->i_lock); rc = smb3_zero_data(file, tcon, off, len, xid); if (rc < 0) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 995fcdd30681..3bd300347f16 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4608,6 +4608,7 @@ smb2_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode); struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink); struct smb2_hdr *shdr = (struct smb2_hdr *)rdata->iov[0].iov_base; + struct inode *inode = &ictx->inode; struct cifs_credits credits = { .value = 0, .instance = 0, @@ -4721,7 +4722,7 @@ do_retry: } else { size_t trans = rdata->subreq.transferred + rdata->got_bytes; if (trans < rdata->subreq.len && - rdata->subreq.start + trans >= ictx->remote_i_size) { + rdata->subreq.start + trans >= netfs_read_remote_i_size(inode)) { __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); rdata->result = 0; } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ba17ac5bf356..4fd1d796ad73 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -62,8 +62,8 @@ struct netfs_inode { struct fscache_cookie *cache; #endif struct mutex wb_lock; /* Writeback serialisation */ - loff_t remote_i_size; /* Size of the remote file */ - loff_t zero_point; /* Size after which we assume there's no data + loff_t _remote_i_size; /* Size of the remote file */ + loff_t _zero_point; /* Size after which we assume there's no data * on the server */ atomic_t io_count; /* Number of outstanding reqs */ unsigned long flags; @@ -474,6 +474,254 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) return container_of(inode, struct netfs_inode, inode); } +/** + * netfs_read_remote_i_size - Read remote_i_size safely + * @inode: The inode to access + * + * Read remote_i_size safely without the potential for tearing on 32-bit + * arches. + * + * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the + * i_size_read/write must be atomic with respect to the local cpu (unlike with + * preempt disabled), but they don't need to be atomic with respect to other + * cpus like in true SMP (so they need either to either locally disable irq + * around the read or for example on x86 they can be still implemented as a + * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit + * archs it makes no difference if preempt is enabled or not. + */ +static inline unsigned long long netfs_read_remote_i_size(const struct inode *inode) +{ + const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode); + unsigned long long remote_i_size; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + unsigned int seq; + + do { + seq = read_seqcount_begin(&inode->i_size_seqcount); + remote_i_size = ictx->_remote_i_size; + } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + remote_i_size = ictx->_remote_i_size; + preempt_enable(); +#else + /* Pairs with smp_store_release() in netfs_write_remote_i_size() */ + remote_i_size = smp_load_acquire(&ictx->_remote_i_size); +#endif + return remote_i_size; +} + +/* + * netfs_write_remote_i_size - Set remote_i_size safely + * @inode: The inode to access + * @remote_i_size: The new value for the size of the file on the server + * + * Set remote_i_size safely without the potential for tearing on 32-bit arches. + * + * Context: The caller must hold inode->i_lock. + * + * NOTE: unlike netfs_read_remote_i_size(), netfs_write_remote_i_size() does + * need locking around it (normally i_rwsem), otherwise on 32bit/SMP an update + * of i_size_seqcount can be lost, resulting in subsequent i_size_read() calls + * spinning forever. + */ +static inline void netfs_write_remote_i_size(struct inode *inode, + unsigned long long remote_i_size) +{ + struct netfs_inode *ictx = netfs_inode(inode); + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&inode->i_size_seqcount); + ictx->_remote_i_size = remote_i_size; + write_seqcount_end(&inode->i_size_seqcount); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + ictx->_remote_i_size = remote_i_size; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in netfs_read_remote_i_size() to + * ensure changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&ictx->_remote_i_size, remote_i_size); +#endif +} + +/** + * netfs_read_zero_point - Read zero_point safely + * @inode: The inode to access + * + * Read zero_point safely without the potential for tearing on 32-bit + * arches. + * + * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the + * i_size_read/write must be atomic with respect to the local cpu (unlike with + * preempt disabled), but they don't need to be atomic with respect to other + * cpus like in true SMP (so they need either to either locally disable irq + * around the read or for example on x86 they can be still implemented as a + * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit + * archs it makes no difference if preempt is enabled or not. + */ +static inline unsigned long long netfs_read_zero_point(const struct inode *inode) +{ + struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode); + unsigned long long zero_point; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + unsigned int seq; + + do { + seq = read_seqcount_begin(&inode->i_size_seqcount); + zero_point = ictx->_zero_point; + } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + zero_point = ictx->_zero_point; + preempt_enable(); +#else + /* Pairs with smp_store_release() in netfs_write_zero_point() */ + zero_point = smp_load_acquire(&ictx->_zero_point); +#endif + return zero_point; +} + +/* + * netfs_write_zero_point - Set zero_point safely + * @inode: The inode to access + * @zero_point: The new value for the point beyond which the server has no data + * + * Set zero_point safely without the potential for tearing on 32-bit arches. + * + * Context: The caller must hold inode->i_lock. + * + * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need + * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of + * i_size_seqcount can be lost, resulting in subsequent read calls spinning + * forever. + */ +static inline void netfs_write_zero_point(struct inode *inode, + unsigned long long zero_point) +{ + struct netfs_inode *ictx = netfs_inode(inode); + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&inode->i_size_seqcount); + ictx->_zero_point = zero_point; + write_seqcount_end(&inode->i_size_seqcount); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + ictx->_zero_point = zero_point; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in netfs_read_zero_point() to + * ensure changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&ictx->_zero_point, zero_point); +#endif +} + +/** + * netfs_read_sizes - Read remote_i_size and zero_point safely + * @inode: The inode to access + * @i_size: Where to return the local file size. + * @remote_i_size: Where to return the size of the file on the server + * @zero_point: Where to return the the point beyond which the server has no data + * + * Read remote_i_size and zero_point safely without the potential for tearing + * on 32-bit arches. + * + * NOTE: in a 32bit arch with a preemptable kernel and an UP compile the + * i_size_read/write must be atomic with respect to the local cpu (unlike with + * preempt disabled), but they don't need to be atomic with respect to other + * cpus like in true SMP (so they need either to either locally disable irq + * around the read or for example on x86 they can be still implemented as a + * cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit + * archs it makes no difference if preempt is enabled or not. + */ +static inline void netfs_read_sizes(const struct inode *inode, + unsigned long long *i_size, + unsigned long long *remote_i_size, + unsigned long long *zero_point) +{ + const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode); +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + unsigned int seq; + + do { + seq = read_seqcount_begin(&inode->i_size_seqcount); + *i_size = inode->i_size; + *remote_i_size = ictx->_remote_i_size; + *zero_point = ictx->_zero_point; + } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + *i_size = inode->i_size; + *remote_i_size = ictx->_remote_i_size; + *zero_point = ictx->_zero_point; + preempt_enable(); +#else + /* Pairs with smp_store_release() in i_size_write() */ + *i_size = smp_load_acquire(&inode->i_size); + /* Pairs with smp_store_release() in netfs_write_remote_i_size() */ + *remote_i_size = smp_load_acquire(&ictx->_remote_i_size); + /* Pairs with smp_store_release() in netfs_write_zero_point() */ + *zero_point = smp_load_acquire(&ictx->_zero_point); +#endif +} + +/* + * netfs_write_sizes - Set i_size, remote_i_size and zero_point safely + * @inode: The inode to access + * @i_size: The new value for the local size of the file + * @remote_i_size: The new value for the size of the file on the server + * @zero_point: The new value for the point beyond which the server has no data + * + * Set both remote_i_size and zero_point safely without the potential for + * tearing on 32-bit arches. + * + * Context: The caller must hold inode->i_lock. + * + * NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need + * locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of + * i_size_seqcount can be lost, resulting in subsequent read calls spinning + * forever. + */ +static inline void netfs_write_sizes(struct inode *inode, + unsigned long long i_size, + unsigned long long remote_i_size, + unsigned long long zero_point) +{ + struct netfs_inode *ictx = netfs_inode(inode); + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&inode->i_size_seqcount); + inode->i_size = i_size; + ictx->_remote_i_size = remote_i_size; + ictx->_zero_point = zero_point; + write_seqcount_end(&inode->i_size_seqcount); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + inode->i_size = i_size; + ictx->_remote_i_size = remote_i_size; + ictx->_zero_point = zero_point; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in i_size_read(), + * netfs_read_remote_i_size() and netfs_read_zero_point() to ensure + * changes related to inode size (such as page contents) are visible + * before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); + smp_store_release(&ictx->_remote_i_size, remote_i_size); + smp_store_release(&ictx->_zero_point, zero_point); +#endif +} + /** * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise @@ -488,8 +736,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, bool use_zero_point) { ctx->ops = ops; - ctx->remote_i_size = i_size_read(&ctx->inode); - ctx->zero_point = LLONG_MAX; + ctx->_remote_i_size = i_size_read(&ctx->inode); + ctx->_zero_point = LLONG_MAX; ctx->flags = 0; atomic_set(&ctx->io_count, 0); #if IS_ENABLED(CONFIG_FSCACHE) @@ -498,7 +746,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, mutex_init(&ctx->wb_lock); /* ->releasepage() drives zero_point */ if (use_zero_point) { - ctx->zero_point = ctx->remote_i_size; + ctx->_zero_point = ctx->_remote_i_size; mapping_set_release_always(ctx->inode.i_mapping); } } @@ -511,13 +759,40 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, +static inline void netfs_resize_file(struct netfs_inode *ictx, + unsigned long long new_i_size, bool changed_on_server) { +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + struct inode *inode = &ictx->inode; + + preempt_disable(); + write_seqcount_begin(&inode->i_size_seqcount); + if (changed_on_server) + ictx->_remote_i_size = new_i_size; + if (new_i_size < ictx->_zero_point) + ictx->_zero_point = new_i_size; + write_seqcount_end(&inode->i_size_seqcount); + preempt_enable(); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); if (changed_on_server) - ctx->remote_i_size = new_i_size; - if (new_i_size < ctx->zero_point) - ctx->zero_point = new_i_size; + ictx->_remote_i_size = new_i_size; + if (new_i_size < ictx->_zero_point) + ictx->_zero_point = new_i_size; + preempt_enable(); +#else + /* + * Pairs with smp_load_acquire() in netfs_read_remote_i_size and + * netfs_read_zero_point() to ensure changes related to inode size + * (such as page contents) are visible before we see the changed inode + * size. + */ + if (changed_on_server) + smp_store_release(&ictx->_remote_i_size, new_i_size); + if (new_i_size < ictx->_zero_point) + smp_store_release(&ictx->_zero_point, new_i_size); +#endif } /** -- cgit v1.2.3 From 4543a4d737944134a1394afe797622546fbcc98a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:43 +0100 Subject: netfs: Fix zeropoint update where i_size > remote_i_size Fix the update of the zero point[*] by netfs_release_folio() when there is uncommitted data in the pagecache beyond the folio being released but the on-server EOF is in this folio (ie. i_size > remote_i_size). The update needs to limit zero_point to remote_i_size, not i_size as i_size is a local phenomenon reflecting updates made locally to the pagecache, not stuff written to the server. remote_i_size tracks the server's i_size. [*] The zero point is the file position from which we can assume that the server will just return zeros, so we can avoid generating reads. Note that netfs_invalidate_folio() probably doesn't need fixing as zero_point should be updated by setattr after truncation or fallocate. Found with: fsx -q -N 1000000 -p 10000 -o 128000 -l 600000 \ /xfstest.test/junk --replay-ops=junk.fsxops using the following as junk.fsxops: truncate 0x0 0x1bbae 0x82864 write 0x3ef2e 0xf9c8 0x1bbae write 0x67e05 0xcb5a 0x4e8f6 mapread 0x57781 0x85b6 0x7495f copy_range 0x5d3d 0x10329 0x54fac 0x7495f write 0x64710 0x1c2b 0x7495f mapread 0x64000 0x1000 0x7495f on cifs with the default cache option. It shows read-gaps on folio 0x64 failing with a short read (ie. it hits EOF) if the FMODE_READ check is commented out in netfs_perform_write(): if (//(file->f_mode & FMODE_READ) || netfs_is_cache_enabled(ctx)) { and no fscache. This was initially found with the generic/522 xfstest. Fixes: cce6bfa6ca0e ("netfs: Fix trimming of streaming-write folios in netfs_inval_folio()") Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-7-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index bad661ff2bec..723571ca1b88 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -307,10 +307,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) return false; netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point); - end = umin(folio_next_pos(folio), i_size); + end = folio_next_pos(folio); if (end > zero_point) { spin_lock(&inode->i_lock); - end = umin(folio_next_pos(folio), inode->i_size); + end = umin(end, ctx->_remote_i_size); if (end > ctx->_zero_point) netfs_write_zero_point(inode, end); spin_unlock(&inode->i_lock); -- cgit v1.2.3 From dc7832d05deb4d632e8035e3299e31a3528fa0d0 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Tue, 12 May 2026 13:33:44 +0100 Subject: netfs: fix VM_BUG_ON_FOLIO() issue in netfs_write_begin() call The multiple runs of generic/013 test-case is capable to reproduce a kernel BUG at mm/filemap.c:1504 with probability of 30%. while true; do sudo ./check generic/013 done [ 9849.452376] page: refcount:3 mapcount:0 mapping:00000000e58ff252 index:0x10781 pfn:0x1c322 [ 9849.452412] memcg:ffff8881a1915800 [ 9849.452417] aops:ceph_aops ino:1000058db9e dentry name(?):"f9XXXXXX" [ 9849.452432] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) [ 9849.452441] raw: 0017ffffc0000000 0000000000000000 dead000000000122 ffff88816110d248 [ 9849.452445] raw: 0000000000010781 0000000000000000 00000003ffffffff ffff8881a1915800 [ 9849.452447] page dumped because: VM_BUG_ON_FOLIO(!folio_test_locked(folio)) [ 9849.452474] ------------[ cut here ]------------ [ 9849.452476] kernel BUG at mm/filemap.c:1504! [ 9849.478635] Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI [ 9849.481772] CPU: 2 UID: 0 PID: 84223 Comm: fsstress Not tainted 7.0.0-rc1+ #18 PREEMPT(full) [ 9849.482881] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-9.fc43 06/1 0/2025 [ 9849.484539] RIP: 0010:folio_unlock+0x85/0xa0 [ 9849.485076] Code: 89 df 31 f6 e8 1c f3 ff ff 48 8b 5d f8 c9 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 48 c7 c6 80 6c d9 a7 48 89 df e8 4b b3 10 00 <0f> 0b 48 89 df e8 21 e6 2c 00 eb 9d 0f 1f 40 00 66 66 2e 0f 1f 84 [ 9849.493818] RSP: 0018:ffff8881bb8076b0 EFLAGS: 00010246 [ 9849.495740] RAX: 0000000000000000 RBX: ffffea00070c8980 RCX: 0000000000000000 [ 9849.498678] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 9849.500559] RBP: ffff8881bb8076b8 R08: 0000000000000000 R09: 0000000000000000 [ 9849.501097] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000010782000 [ 9849.502108] R13: ffff8881935de738 R14: ffff88816110d010 R15: 0000000000001000 [ 9849.502516] FS: 00007e36cbe94740(0000) GS:ffff88824a899000(0000) knlGS:0000000000000000 [ 9849.502996] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9849.503810] CR2: 000000c0002b0000 CR3: 000000011bbf6004 CR4: 0000000000772ef0 [ 9849.504459] PKRU: 55555554 [ 9849.504626] Call Trace: [ 9849.505242] [ 9849.505379] netfs_write_begin+0x7c8/0x10a0 [ 9849.505877] ? __kasan_check_read+0x11/0x20 [ 9849.506384] ? __pfx_netfs_write_begin+0x10/0x10 [ 9849.507178] ceph_write_begin+0x8c/0x1c0 [ 9849.507934] generic_perform_write+0x391/0x8f0 [ 9849.508503] ? __pfx_generic_perform_write+0x10/0x10 [ 9849.509062] ? file_update_time_flags+0x19a/0x4b0 [ 9849.509581] ? ceph_get_caps+0x63/0xf0 [ 9849.510259] ? ceph_get_caps+0x63/0xf0 [ 9849.510530] ceph_write_iter+0xe79/0x1ae0 [ 9849.511282] ? __pfx_ceph_write_iter+0x10/0x10 [ 9849.511839] ? lock_acquire+0x1ad/0x310 [ 9849.512334] ? ksys_write+0xf9/0x230 [ 9849.512582] ? lock_is_held_type+0xaa/0x140 [ 9849.513128] vfs_write+0x512/0x1110 [ 9849.513634] ? __fget_files+0x33/0x350 [ 9849.513893] ? __pfx_vfs_write+0x10/0x10 [ 9849.514143] ? mutex_lock_nested+0x1b/0x30 [ 9849.514394] ksys_write+0xf9/0x230 [ 9849.514621] ? __pfx_ksys_write+0x10/0x10 [ 9849.514887] ? do_syscall_64+0x25e/0x1520 [ 9849.515122] ? __kasan_check_read+0x11/0x20 [ 9849.515366] ? trace_hardirqs_on_prepare+0x178/0x1c0 [ 9849.515655] __x64_sys_write+0x72/0xd0 [ 9849.515885] ? trace_hardirqs_on+0x24/0x1c0 [ 9849.516130] x64_sys_call+0x22f/0x2390 [ 9849.516341] do_syscall_64+0x12b/0x1520 [ 9849.516545] ? do_syscall_64+0x27c/0x1520 [ 9849.516783] ? do_syscall_64+0x27c/0x1520 [ 9849.517003] ? lock_release+0x318/0x480 [ 9849.517220] ? __x64_sys_io_getevents+0x143/0x2d0 [ 9849.517479] ? percpu_ref_put_many.constprop.0+0x8f/0x210 [ 9849.517779] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 9849.518073] ? do_syscall_64+0x25e/0x1520 [ 9849.518291] ? __kasan_check_read+0x11/0x20 [ 9849.518519] ? trace_hardirqs_on_prepare+0x178/0x1c0 [ 9849.518799] ? do_syscall_64+0x27c/0x1520 [ 9849.519024] ? local_clock_noinstr+0xf/0x120 [ 9849.519262] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 9849.519544] ? do_syscall_64+0x25e/0x1520 [ 9849.519781] ? __kasan_check_read+0x11/0x20 [ 9849.520008] ? trace_hardirqs_on_prepare+0x178/0x1c0 [ 9849.520273] ? do_syscall_64+0x27c/0x1520 [ 9849.520491] ? trace_hardirqs_on_prepare+0x178/0x1c0 [ 9849.520767] ? irqentry_exit+0x10c/0x6c0 [ 9849.520984] ? trace_hardirqs_off+0x86/0x1b0 [ 9849.521224] ? exc_page_fault+0xab/0x130 [ 9849.521472] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 9849.521766] RIP: 0033:0x7e36cbd14907 [ 9849.521989] Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 [ 9849.523057] RSP: 002b:00007ffff2d2a968 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 9849.523484] RAX: ffffffffffffffda RBX: 000000000000e549 RCX: 00007e36cbd14907 [ 9849.523885] RDX: 000000000000e549 RSI: 00005bd797ec6370 RDI: 0000000000000004 [ 9849.524277] RBP: 0000000000000004 R08: 0000000000000047 R09: 00005bd797ec6370 [ 9849.524652] R10: 0000000000000078 R11: 0000000000000246 R12: 0000000000000049 [ 9849.525062] R13: 0000000010781a37 R14: 00005bd797ec6370 R15: 0000000000000000 [ 9849.525447] [ 9849.525574] Modules linked in: intel_rapl_msr intel_rapl_common intel_uncore_frequency_common intel_pmc_core pmt_telemetry pmt_discovery pmt_class intel_pmc_ssram_telemetry intel_vsec kvm_intel joydev kvm irqbypass ghash_clmulni_intel aesni_intel input_leds rapl mac_hid psmouse vga16fb serio_raw vgastate floppy i2c_piix4 bochs qemu_fw_cfg i2c_smbus pata_acpi sch_fq_codel rbd msr parport_pc ppdev lp parport efi_pstore [ 9849.529150] ---[ end trace 0000000000000000 ]--- [ 9849.529502] RIP: 0010:folio_unlock+0x85/0xa0 [ 9849.530813] Code: 89 df 31 f6 e8 1c f3 ff ff 48 8b 5d f8 c9 31 c0 31 d2 31 f6 31 ff c3 cc cc cc cc 48 c7 c6 80 6c d9 a7 48 89 df e8 4b b3 10 00 <0f> 0b 48 89 df e8 21 e6 2c 00 eb 9d 0f 1f 40 00 66 66 2e 0f 1f 84 [ 9849.534986] RSP: 0018:ffff8881bb8076b0 EFLAGS: 00010246 [ 9849.536198] RAX: 0000000000000000 RBX: ffffea00070c8980 RCX: 0000000000000000 [ 9849.537718] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 9849.539321] RBP: ffff8881bb8076b8 R08: 0000000000000000 R09: 0000000000000000 [ 9849.540862] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000010782000 [ 9849.542438] R13: ffff8881935de738 R14: ffff88816110d010 R15: 0000000000001000 [ 9849.543996] FS: 00007e36cbe94740(0000) GS:ffff88824b899000(0000) knlGS:0000000000000000 [ 9849.545854] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9849.547092] CR2: 00007e36cb3ff000 CR3: 000000011bbf6006 CR4: 0000000000772ef0 [ 9849.548679] PKRU: 55555554 The race sequence: 1. Read completes -> netfs_read_collection() runs 2. netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, ...) 3. netfs_wait_for_read() returns -EFAULT to netfs_write_begin() 4. The netfs_unlock_abandoned_read_pages() unlocks the folio 5. netfs_write_begin() calls folio_unlock(folio) -> VM_BUG_ON_FOLIO() The key reason of the issue that netfs_unlock_abandoned_read_pages() doesn't check the flag NETFS_RREQ_NO_UNLOCK_FOLIO and executes folio_unlock() unconditionally. This patch implements in netfs_unlock_abandoned_read_pages() logic similar to netfs_unlock_read_folio(). Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") Signed-off-by: Viacheslav Dubeyko Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-8-dhowells@redhat.com Reviewed-by: Paulo Alcantara (Red Hat) cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: Ceph Development Signed-off-by: Christian Brauner --- fs/netfs/read_retry.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 5ec548b996d6..e10eb5a07332 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -292,8 +292,15 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq) struct folio *folio = folioq_folio(p, slot); if (folio && !folioq_is_marked2(p, slot)) { - trace_netfs_folio(folio, netfs_folio_trace_abandon); - folio_unlock(folio); + if (folio->index == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, + &rreq->flags)) { + _debug("no unlock"); + } else { + trace_netfs_folio(folio, + netfs_folio_trace_abandon); + folio_unlock(folio); + } } } } -- cgit v1.2.3 From 7e3d8db899d54af39fafb2eb3392b0cdae9973b5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:45 +0100 Subject: netfs: Fix potential uninitialised var in netfs_extract_user_iter() In netfs_extract_user_iter(), if it's given a zero-length iterator, it will fall through the loop without setting ret, and so the error handling behaviour will be undefined, depending on whether ret happens to be negative. The value of ret then propagates back up the callstack. Fix this by presetting ret to 0. Fixes: 85dd2c8ff368 ("netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-9-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/iterator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 154a14bb2d7f..6903028b7162 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -43,7 +43,7 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, unsigned int max_pages; unsigned int npages = 0; unsigned int i; - ssize_t ret; + ssize_t ret = 0; size_t count = orig_len, offset, len; size_t bv_size, pg_size; -- cgit v1.2.3 From 0aad5704c6b4d14007d4eab15883e8524e4310f4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 12 May 2026 13:33:46 +0100 Subject: netfs: fix error handling in netfs_extract_user_iter() In netfs_extract_user_iter(), if iov_iter_extract_pages() failed to extract user pages, bail out on -ENOMEM, otherwise return the error code only if @npages == 0, allowing short DIO reads and writes to be issued. This fixes mmapstress02 from LTP tests against CIFS. Fixes: 85dd2c8ff368 ("netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator") Reported-by: Xiaoli Feng Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-10-dhowells@redhat.com Cc: netfs@lists.linux.dev Cc: stable@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/iterator.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 6903028b7162..429e4396e1b0 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -22,7 +22,7 @@ * * Extract the page fragments from the given amount of the source iterator and * build up a second iterator that refers to all of those bits. This allows - * the original iterator to disposed of. + * the original iterator to be disposed of. * * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA be * allowed on the pages extracted. @@ -67,8 +67,8 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, ret = iov_iter_extract_pages(orig, &pages, count, max_pages - npages, extraction_flags, &offset); - if (ret < 0) { - pr_err("Couldn't get user pages (rc=%zd)\n", ret); + if (unlikely(ret <= 0)) { + ret = ret ?: -EIO; break; } @@ -97,6 +97,13 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, npages += cur_npages; } + if (ret < 0 && (ret == -ENOMEM || npages == 0)) { + for (i = 0; i < npages; i++) + unpin_user_page(bv[i].bv_page); + kvfree(bv); + return ret; + } + iov_iter_bvec(new, orig->data_source, bv, npages, orig_len - count); return npages; } -- cgit v1.2.3 From 0ef37eef83fad3542ee06db2940433ae1a92b39d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:47 +0100 Subject: netfs: Fix overrun check in netfs_extract_user_iter() Fix netfs_extract_user_iter() so that if iov_iter_extract_pages() overfills pages[], then those pages don't get included in the iterator constructed at the end of the function. If there was an overfill, memory corruption has already happened. Fixes: 85dd2c8ff368 ("netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator") Closes: https://sashiko.dev/#/patchset/20260427154639.180684-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-11-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/iterator.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 429e4396e1b0..b375567e0520 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -72,21 +72,24 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, break; } - if (ret > count) { - pr_err("get_pages rc=%zd more than %zu\n", ret, count); + if (WARN(ret > count, + "%s: extract_pages overrun %zd > %zu bytes\n", + __func__, ret, count)) { + ret = -EIO; break; } - count -= ret; - ret += offset; - cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE); - - if (npages + cur_npages > max_pages) { - pr_err("Out of bvec array capacity (%u vs %u)\n", - npages + cur_npages, max_pages); + cur_npages = DIV_ROUND_UP(offset + ret, PAGE_SIZE); + if (WARN(cur_npages > max_pages - npages, + "%s: extract_pages overrun %u > %u pages\n", + __func__, npages + cur_npages, max_pages)) { + ret = -EIO; break; } + count -= ret; + ret += offset; + for (i = 0; i < cur_npages; i++) { len = ret > PAGE_SIZE ? PAGE_SIZE : ret; bvec_set_page(bv + npages + i, *pages++, len - offset, offset); @@ -97,6 +100,11 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, npages += cur_npages; } + /* Note: Don't try to clean up after EIO. Either we got no pages, so + * nothing to clean up, or we got a buffer overrun, memory corruption + * and can't trust the stuff in the buffer (a WARN was emitted). + */ + if (ret < 0 && (ret == -ENOMEM || npages == 0)) { for (i = 0; i < npages; i++) unpin_user_page(bv[i].bv_page); -- cgit v1.2.3 From 156ac2ec2ee77c44c4eb7439d6d165247ba12247 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:48 +0100 Subject: netfs: Fix netfs_invalidate_folio() to clear dirty bit if all changes gone If a streaming write is made, this will leave the relevant modified folio in a not-uptodate, but dirty state with a netfs_folio struct hung off of folio->private indicating the dirty range. Subsequently truncating the file such that the dirty data in the folio is removed, but the first part of the folio theoretically remains will cause the netfs_folio struct to be discarded... but will leave the dirty flag set. If the folio is then read via mmap(), netfs_read_folio() will see that the page is dirty and jump to netfs_read_gaps() to fill in the missing bits. netfs_read_gaps(), however, expects there to be a netfs_folio struct present and can oops because truncate removed it. Fix this by calling folio_cancel_dirty() in netfs_invalidate_folio() in the event that all the dirty data in the folio is erased (as nfs does). Also add some tracepoints to log modifications to a dirty page. This can be reproduced with something like: dd if=/dev/zero of=/xfstest.test/foo bs=1M count=1 umount /xfstest.test mount /xfstest.test xfs_io -c "w 0xbbbf 0xf96c" \ -c "truncate 0xbbbf" \ -c "mmap -r 0xb000 0x11000" \ -c "mr 0xb000 0x11000" \ /xfstest.test/foo with fscaching disabled (otherwise streaming writes are suppressed) and a change to netfs_perform_write() to disallow streaming writes if the fd is open O_RDWR: if (//(file->f_mode & FMODE_READ) || <--- comment this out netfs_is_cache_enabled(ctx)) { It should be reproducible even without this change, but if prevents the above trivial xfs_io command from reproducing it. Note that the initial dd is important: the file must start out sufficiently large that the zero-point logic doesn't just clear the gaps because it knows there's nothing in the file to read yet. Unmounting and mounting is needed to clear the pagecache (there are other ways to do that that may also work). This was initially reproduced with the generic/522 xfstest on some patches that remove the FMODE_READ restriction. Fixes: 9ebff83e6481 ("netfs: Prep to use folio->private for write grouping and streaming write") Reported-by: Marc Dionne Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-12-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 6 +++++- include/trace/events/netfs.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 723571ca1b88..24b20e80e9a8 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -263,6 +263,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) /* Move the start of the data. */ finfo->dirty_len = fend - iend; finfo->dirty_offset = offset; + trace_netfs_folio(folio, netfs_folio_trace_invalidate_front); return; } @@ -271,12 +272,14 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) */ if (iend >= fend) { finfo->dirty_len = offset - fstart; + trace_netfs_folio(folio, netfs_folio_trace_invalidate_tail); return; } /* A partial write was split. The caller has already zeroed * it, so just absorb the hole. */ + trace_netfs_folio(folio, netfs_folio_trace_invalidate_middle); } return; @@ -284,8 +287,9 @@ erase_completely: netfs_put_group(netfs_folio_group(folio)); folio_detach_private(folio); folio_clear_uptodate(folio); + folio_cancel_dirty(folio); kfree(finfo); - return; + trace_netfs_folio(folio, netfs_folio_trace_invalidate_all); } EXPORT_SYMBOL(netfs_invalidate_folio); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 8c936fc575d5..0b702f74aefe 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -194,6 +194,10 @@ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ + EM(netfs_folio_trace_invalidate_all, "inval-all") \ + EM(netfs_folio_trace_invalidate_front, "inval-front") \ + EM(netfs_folio_trace_invalidate_middle, "inval-mid") \ + EM(netfs_folio_trace_invalidate_tail, "inval-tail") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ EM(netfs_folio_trace_kill_g, "kill-g") \ -- cgit v1.2.3 From daeb443b92817021c1234e8eded219e164b7c35d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:49 +0100 Subject: netfs: Defer the emission of trace_netfs_folio() Change netfs_perform_write() to keep the netfs_folio trace value in a variable and emit it later to make it easier to choose the value displayed. This is a prerequisite for a subsequent patch. Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-13-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index b6ecd059dc4f..278aeb074e75 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -149,6 +149,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, } do { + enum netfs_folio_trace trace; struct netfs_folio *finfo; struct netfs_group *group; unsigned long long fpos; @@ -222,7 +223,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(copied == 0)) goto copy_failed; netfs_set_group(folio, netfs_group); - trace_netfs_folio(folio, netfs_folio_is_uptodate); + trace = netfs_folio_is_uptodate; goto copied; } @@ -238,7 +239,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, folio_zero_segment(folio, offset + copied, flen); __netfs_set_group(folio, netfs_group); folio_mark_uptodate(folio); - trace_netfs_folio(folio, netfs_modify_and_clear); + trace = netfs_modify_and_clear; goto copied; } @@ -256,7 +257,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, } __netfs_set_group(folio, netfs_group); folio_mark_uptodate(folio); - trace_netfs_folio(folio, netfs_whole_folio_modify); + trace = netfs_whole_folio_modify; goto copied; } @@ -283,7 +284,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(copied == 0)) goto copy_failed; netfs_set_group(folio, netfs_group); - trace_netfs_folio(folio, netfs_just_prefetch); + trace = netfs_just_prefetch; goto copied; } @@ -297,7 +298,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (offset == 0 && copied == flen) { __netfs_set_group(folio, netfs_group); folio_mark_uptodate(folio); - trace_netfs_folio(folio, netfs_streaming_filled_page); + trace = netfs_streaming_filled_page; goto copied; } @@ -312,7 +313,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, finfo->dirty_len = copied; folio_attach_private(folio, (void *)((unsigned long)finfo | NETFS_FOLIO_INFO)); - trace_netfs_folio(folio, netfs_streaming_write); + trace = netfs_streaming_write; goto copied; } @@ -332,9 +333,9 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, folio_detach_private(folio); folio_mark_uptodate(folio); kfree(finfo); - trace_netfs_folio(folio, netfs_streaming_cont_filled_page); + trace = netfs_streaming_cont_filled_page; } else { - trace_netfs_folio(folio, netfs_streaming_write_cont); + trace = netfs_streaming_write_cont; } goto copied; } @@ -350,6 +351,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, continue; copied: + trace_netfs_folio(folio, trace); flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ -- cgit v1.2.3 From 7b4dcf1b9455a6e52ac7478b4057dbe10359576d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:50 +0100 Subject: netfs: Fix streaming write being overwritten In order to avoid reading whilst writing, netfslib will allow "streaming writes" in which dirty data is stored directly into folios without reading them first. Such folios are marked dirty but may not be marked uptodate. If a folio is entirely written by a streaming write, uptodate will be set, otherwise it will have a netfs_folio struct attached to ->private recording the dirty region. In the event that a partially written streaming write page is to be overwritten entirely by a single write(), netfs_perform_write() will try to copy over it, but doesn't discard the netfs_folio if it succeeds; further, it doesn't correctly handle a partial copy that overwrites some of the dirty data. Fix this by the following: (1) If the folio is successfully overwritten, free the netfs_folio struct before marking the page uptodate. (2) If the copy to the folio partially fails, but short of the dirty data, just ignore the copy. (3) If the copy partially fails and overwrites some of the dirty data, accept the copy, update the netfs_folio struct to record the new data. If the folio is now filled, free the netfs_folio and set uptodate, otherwise return a partial write. Found with: fsx -q -N 1000000 -p 10000 -o 128000 -l 600000 \ /xfstest.test/junk --replay-ops=junk.fsxops using the following as junk.fsxops: truncate 0x0 0 0x927c0 write 0x63fb8 0x53c8 0 copy_range 0xb704 0x19b9 0x24429 0x79380 write 0x2402b 0x144a2 0x90660 * write 0x204d5 0x140a0 0x927c0 * copy_range 0x1f72c 0x137d0 0x7a906 0x927c0 * read 0x00000 0x20000 0x9157c read 0x20000 0x20000 0x9157c read 0x40000 0x20000 0x9157c read 0x60000 0x20000 0x9157c read 0x7e1a0 0xcfb9 0x9157c on cifs with the default cache option. It shows folio 0x24 misbehaving if the FMODE_READ check is commented out in netfs_perform_write(): if (//(file->f_mode & FMODE_READ) || netfs_is_cache_enabled(ctx)) { and no fscache. This was initially found with the generic/522 xfstest. Fixes: 8f52de0077ba ("netfs: Reduce number of conditional branches in netfs_perform_write()") Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-14-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 47 ++++++++++++++++++++++++++++++++------------ include/trace/events/netfs.h | 3 +++ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 278aeb074e75..991552724868 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -246,18 +246,38 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, /* See if we can write a whole folio in one go. */ if (!maybe_trouble && offset == 0 && part >= flen) { copied = copy_folio_from_iter_atomic(folio, offset, part, iter); - if (unlikely(copied == 0)) + if (likely(copied == part)) { + if (finfo) { + trace = netfs_whole_folio_modify_filled; + goto folio_now_filled; + } + __netfs_set_group(folio, netfs_group); + folio_mark_uptodate(folio); + trace = netfs_whole_folio_modify; + goto copied; + } + if (copied == 0) goto copy_failed; - if (unlikely(copied < part)) { + if (!finfo || copied <= finfo->dirty_offset) { maybe_trouble = true; iov_iter_revert(iter, copied); copied = 0; folio_unlock(folio); goto retry; } - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); - trace = netfs_whole_folio_modify; + + /* We overwrote some existing dirty data, so we have to + * accept the partial write. + */ + finfo->dirty_len += finfo->dirty_offset; + if (finfo->dirty_len == flen) { + trace = netfs_whole_folio_modify_filled_efault; + goto folio_now_filled; + } + if (copied > finfo->dirty_len) + finfo->dirty_len = copied; + finfo->dirty_offset = 0; + trace = netfs_whole_folio_modify_efault; goto copied; } @@ -327,16 +347,10 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto copy_failed; finfo->dirty_len += copied; if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { - if (finfo->netfs_group) - folio_change_private(folio, finfo->netfs_group); - else - folio_detach_private(folio); - folio_mark_uptodate(folio); - kfree(finfo); trace = netfs_streaming_cont_filled_page; - } else { - trace = netfs_streaming_write_cont; + goto folio_now_filled; } + trace = netfs_streaming_write_cont; goto copied; } @@ -350,6 +364,13 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto out; continue; + folio_now_filled: + if (finfo->netfs_group) + folio_change_private(folio, finfo->netfs_group); + else + folio_detach_private(folio); + folio_mark_uptodate(folio); + kfree(finfo); copied: trace_netfs_folio(folio, trace); flush_dcache_folio(folio); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 0b702f74aefe..aa9940ba307b 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -177,6 +177,9 @@ EM(netfs_folio_is_uptodate, "mod-uptodate") \ EM(netfs_just_prefetch, "mod-prefetch") \ EM(netfs_whole_folio_modify, "mod-whole-f") \ + EM(netfs_whole_folio_modify_efault, "mod-whole-f!") \ + EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \ + EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \ EM(netfs_modify_and_clear, "mod-n-clear") \ EM(netfs_streaming_write, "mod-streamw") \ EM(netfs_streaming_write_cont, "mod-streamw+") \ -- cgit v1.2.3 From b6a4ae1634b3ad2aaa05222e53d36da532852faf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:51 +0100 Subject: netfs: Fix potential deadlock in write-through mode Fix netfs_advance_writethrough() to always unlock the supplied folio and to mark it dirty if it isn't yet written to the end. Unfortunately, it can't be marked for writeback until the folio is done with as that may cause a deadlock against mmapped reads and writes. Even though it has been marked dirty, premature writeback can't occur as the caller is holding both inode->i_rwsem (which will prevent concurrent truncation, fallocation, DIO and other writes) and ictx->wb_lock (which will cause flushing to wait and writeback to skip or wait). Note that this may be easier to deal with once the queuing of folios is split from the generation of subrequests. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Closes: https://sashiko.dev/#/patchset/20260427154639.180684-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-15-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/write_issue.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index b0e9690bb90c..03961622996b 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -414,12 +414,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (streamw) netfs_issue_write(wreq, cache); - /* Flip the page to the writeback state and unlock. If we're called - * from write-through, then the page has already been put into the wb - * state. - */ - if (wreq->origin == NETFS_WRITEBACK) - folio_start_writeback(folio); + folio_start_writeback(folio); folio_unlock(folio); if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { @@ -647,29 +642,41 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache) { + int ret; + _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end); - if (!*writethrough_cache) { - if (folio_test_dirty(folio)) - /* Sigh. mmap. */ - folio_clear_dirty_for_io(folio); + /* The folio is locked. */ + if (*writethrough_cache != folio) { + if (*writethrough_cache) { + /* Did the folio get moved? */ + folio_put(*writethrough_cache); + *writethrough_cache = NULL; + } /* We can make multiple writes to the folio... */ - folio_start_writeback(folio); if (wreq->len == 0) trace_netfs_folio(folio, netfs_folio_trace_wthru); else trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); *writethrough_cache = folio; + folio_get(folio); } wreq->len += copied; - if (!to_page_end) + + if (!to_page_end) { + folio_mark_dirty(folio); + folio_unlock(folio); return 0; + } + ret = netfs_write_folio(wreq, wbc, folio); + folio_put(*writethrough_cache); *writethrough_cache = NULL; - return netfs_write_folio(wreq, wbc, folio); + wreq->submitted = wreq->len; + return ret; } /* @@ -683,8 +690,12 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c _enter("R=%x", wreq->debug_id); - if (writethrough_cache) + if (writethrough_cache) { + folio_lock(writethrough_cache); netfs_write_folio(wreq, wbc, writethrough_cache); + folio_put(writethrough_cache); + wreq->submitted = wreq->len; + } netfs_end_issue_write(wreq); -- cgit v1.2.3 From a41168aef634356a9b87ec44349e3c82835700a5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:52 +0100 Subject: netfs: Fix read-gaps to remove netfs_folio from filled folio Fix netfs_read_gaps() to remove the netfs_folio record from the folio record before marking the folio uptodate if it successfully fills the gaps around the dirty data in a streaming write folio (dirty, but not uptodate). Found with: fsx -q -N 1000000 -p 10000 -o 128000 -l 600000 \ /xfstest.test/junk --replay-ops=junk.fsxops using the following as junk.fsxops: truncate 0x0 0x138b1 0x8b15d * write 0x507ee 0x10df7 0x927c0 write 0x19993 0x10e04 0x927c0 * mapwrite 0x66214 0x1a253 0x927c0 copy_range 0xb704 0x89b9 0x24429 0x79380 write 0x2402b 0x144a2 0x90660 * mapwrite 0x204d5 0x140a0 0x927c0 * copy_range 0x1f72c 0x137d0 0x7a906 0x927c0 * read 0 0x9157c 0x9157c on cifs with the default cache option. It shows folio 0x24 misbehaving if the FMODE_READ check is commented out in netfs_perform_write(): if (//(file->f_mode & FMODE_READ) || netfs_is_cache_enabled(ctx)) { and no fscache. This was initially found with the generic/522 xfstest. Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-16-dhowells@redhat.com Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index ebd84a6cc3f0..51f844bfbdff 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -395,6 +395,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) { struct netfs_io_request *rreq; struct address_space *mapping = folio->mapping; + struct netfs_group *group = netfs_folio_group(folio); struct netfs_folio *finfo = netfs_folio_info(folio); struct netfs_inode *ctx = netfs_inode(mapping->host); struct folio *sink = NULL; @@ -461,6 +462,12 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) ret = netfs_wait_for_read(rreq); if (ret >= 0) { + if (group) + folio_change_private(folio, group); + else + folio_detach_private(folio); + kfree(finfo); + trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); flush_dcache_folio(folio); folio_mark_uptodate(folio); } @@ -496,10 +503,8 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct netfs_inode *ctx = netfs_inode(mapping->host); int ret; - if (folio_test_dirty(folio)) { - trace_netfs_folio(folio, netfs_folio_trace_read_gaps); + if (folio_test_dirty(folio)) return netfs_read_gaps(file, folio); - } _enter("%lx", folio->index); -- cgit v1.2.3 From 70a7b9193bbbfceaab5974de66834c64ccc875dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:53 +0100 Subject: netfs: Fix write streaming disablement if fd open O_RDWR In netfs_perform_write(), "write streaming" (the caching of dirty data in dirty but !uptodate folios) is performed to avoid the need to read data that is just going to get immediately overwritten. However, this is/will be disabled in three circumstances: if the fd is open O_RDWR, if fscache is in use (as we need to round out the blocks for DIO) or if content encryption is enabled (again for rounding out purposes). The idea behind disabling it if the fd is open O_RDWR is that we'd need to flush the write-streaming page before we could read the data, particularly through mmap. But netfs now fills in the gaps if ->read_folio() is called on the page, so that is unnecessary. Further, this doesn't actually work if a separate fd is open for reading. Fix this by removing the check for O_RDWR, thereby allowing streaming writes even when we might read. This caused a number of problems with the generic/522 xfstest, but those are now fixed. Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write") Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-17-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 991552724868..f79fb5996540 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -203,11 +203,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, } /* Decide how we should modify a folio. We might be attempting - * to do write-streaming, in which case we don't want to a - * local RMW cycle if we can avoid it. If we're doing local - * caching or content crypto, we award that priority over - * avoiding RMW. If the file is open readably, then we also - * assume that we may want to read what we wrote. + * to do write-streaming, as we don't want to a local RMW cycle + * if we can avoid it. If we're doing local caching or content + * crypto, we award that priority over avoiding RMW. If the + * file is open readably, then we let ->read_folio() fill in + * the gaps. */ finfo = netfs_folio_info(folio); group = netfs_folio_group(folio); @@ -283,12 +283,9 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, /* We don't want to do a streaming write on a file that loses * caching service temporarily because the backing store got - * culled and we don't really want to get a streaming write on - * a file that's open for reading as ->read_folio() then has to - * be able to flush it. + * culled. */ - if ((file->f_mode & FMODE_READ) || - netfs_is_cache_enabled(ctx)) { + if (netfs_is_cache_enabled(ctx)) { if (finfo) { netfs_stat(&netfs_n_wh_wstream_conflict); goto flush_content; -- cgit v1.2.3 From 3e5dd91b87a8b1450217b56a336bee315f40da7d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:54 +0100 Subject: netfs: Fix early put of sink folio in netfs_read_gaps() Fix netfs_read_gaps() to release the sink page it uses after waiting for the request to complete. The way the sink page is used is that an ITER_BVEC-class iterator is created that has the gaps from the target folio at either end, but has the sink page tiled over the middle so that a single read op can fill in both gaps. The bug was found by KASAN detecting a UAF on the generic/075 xfstest in the cifsd kernel thread that handles reception of data from the TCP socket: BUG: KASAN: use-after-free in _copy_to_iter+0x48a/0xa20 Write of size 885 at addr ffff888107f92000 by task cifsd/1285 CPU: 2 UID: 0 PID: 1285 Comm: cifsd Not tainted 7.0.0 #6 PREEMPT(lazy) Call Trace: dump_stack_lvl+0x5d/0x80 print_report+0x17f/0x4f1 kasan_report+0x100/0x1e0 kasan_check_range+0x10f/0x1e0 __asan_memcpy+0x3c/0x60 _copy_to_iter+0x48a/0xa20 __skb_datagram_iter+0x2c9/0x430 skb_copy_datagram_iter+0x6e/0x160 tcp_recvmsg_locked+0xce0/0x1130 tcp_recvmsg+0xeb/0x300 inet_recvmsg+0xcf/0x3a0 sock_recvmsg+0xea/0x100 cifs_readv_from_socket+0x3a6/0x4d0 [cifs] cifs_read_iter_from_socket+0xdd/0x130 [cifs] cifs_readv_receive+0xaad/0xb10 [cifs] cifs_demultiplex_thread+0x1148/0x1740 [cifs] kthread+0x1cf/0x210 Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") Reported-by: Steve French Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-18-dhowells@redhat.com Reviewed-by: Paulo Alcantara (Red Hat) cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 51f844bfbdff..e7ad511e494c 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -457,9 +457,6 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) netfs_read_to_pagecache(rreq, NULL); - if (sink) - folio_put(sink); - ret = netfs_wait_for_read(rreq); if (ret >= 0) { if (group) @@ -471,6 +468,9 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) flush_dcache_folio(folio); folio_mark_uptodate(folio); } + + if (sink) + folio_put(sink); folio_unlock(folio); netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; -- cgit v1.2.3 From 5046a34f0643441f05b0253ea64e1a3af87efe14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:55 +0100 Subject: netfs: Fix leak of request in netfs_write_begin() error handling Fix netfs_write_begin() to not leak our ref on the request in the event that we get an error from netfs_wait_for_read(). Fixes: 4090b31422a6 ("netfs: Add a function to consolidate beginning a read") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-19-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index e7ad511e494c..004d426c02b4 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -687,9 +687,9 @@ retry: netfs_read_to_pagecache(rreq, NULL); ret = netfs_wait_for_read(rreq); + netfs_put_request(rreq, netfs_rreq_trace_put_return); if (ret < 0) goto error; - netfs_put_request(rreq, netfs_rreq_trace_put_return); have_folio: ret = folio_wait_private_2_killable(folio); -- cgit v1.2.3 From dbe556972100fabb8e5a1b3d2163831ff07b1e8e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:56 +0100 Subject: netfs: Fix potential UAF in netfs_unlock_abandoned_read_pages() netfs_unlock_abandoned_read_pages(rreq) accesses the index of the folios it is wanting to unlock and compares that to rreq->no_unlock_folio so that it doesn't unlock a folio being read for netfs_perform_write() or netfs_write_begin(). However, given that netfs_unlock_abandoned_read_pages() is called _after_ NETFS_RREQ_IN_PROGRESS is cleared, the one folio that it's not allowed to dereference is the one specified by ->no_unlock_folio as ownership immediately reverts to the caller. Fix this by storing the folio pointer instead and using that rather than the index. Also fix netfs_unlock_read_folio() where the same applies. Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-20-dhowells@redhat.com cc: Paulo Alcantara cc: Viacheslav Dubeyko cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 4 ++-- fs/netfs/read_collect.c | 2 +- fs/netfs/read_retry.c | 2 +- include/linux/netfs.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 004d426c02b4..83d0b8153e96 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -670,7 +670,7 @@ retry: ret = PTR_ERR(rreq); goto error; } - rreq->no_unlock_folio = folio->index; + rreq->no_unlock_folio = folio; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); @@ -736,7 +736,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, goto error; } - rreq->no_unlock_folio = folio->index; + rreq->no_unlock_folio = folio; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 3c9b847885c2..23660a590124 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -83,7 +83,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, } just_unlock: - if (folio->index == rreq->no_unlock_folio && + if (folio == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { _debug("no unlock"); } else { diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index e10eb5a07332..f59a70f3a086 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -292,7 +292,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq) struct folio *folio = folioq_folio(p, slot); if (folio && !folioq_is_marked2(p, slot)) { - if (folio->index == rreq->no_unlock_folio && + if (folio == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { _debug("no unlock"); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 4fd1d796ad73..243c0f737938 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -252,7 +252,7 @@ struct netfs_io_request { unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ unsigned long long abandon_to; /* Position to abandon folios to */ - pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + const struct folio *no_unlock_folio; /* Don't unlock this folio after read */ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; unsigned int rsize; /* Maximum read size (0 for none) */ -- cgit v1.2.3 From 6d91acc7fb85d33ea58fca9b964a32a453937f4b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:57 +0100 Subject: netfs: Fix partial invalidation of streaming-write folio In netfs_invalidate_folio(), if the region of a partial invalidation overlaps the front (but not all) of a dirty write cached in a streaming write page (dirty, but not uptodate, with the dirty region tracked by a netfs_folio struct), the function modifies the dirty region - but incorrectly as it moves the region forward by setting the start to the start, not the end, of the invalidation region. Fix this by setting finfo->dirty_offset to the end of the invalidation region (iend). Fixes: cce6bfa6ca0e ("netfs: Fix trimming of streaming-write folios in netfs_inval_folio()") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-21-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 24b20e80e9a8..5d554512ed23 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -262,7 +262,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) goto erase_completely; /* Move the start of the data. */ finfo->dirty_len = fend - iend; - finfo->dirty_offset = offset; + finfo->dirty_offset = iend; trace_netfs_folio(folio, netfs_folio_trace_invalidate_front); return; } -- cgit v1.2.3 From ccde2ac757c713535b224233a296de40efe5212d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:58 +0100 Subject: netfs: Fix folio->private handling in netfs_perform_write() Under some circumstances, netfs_perform_write() doesn't correctly manipulate folio->private between NULL, NETFS_FOLIO_COPY_TO_CACHE, pointing to a group and pointing to a netfs_folio struct, leading to potential multiple attachments of private data with associated folio ref leaks and also leaks of netfs_folio structs or netfs_group refs. Fix this by consolidating the place at which a folio is marked uptodate in one place and having that look at what's attached to folio->private and decide how to clean it up and then set the new group. Also, the content shouldn't be flushed if group is NULL, even if a group is specified in the netfs_group parameter, as that would be the case for a new folio. A filesystem should always specify netfs_group or never specify netfs_group. The Sashiko auto-review tool noted that it was theoretically possible that the fpos >= ctx->zero_point section might leak if it modified a streaming write folio. This is unlikely, but with a network filesystem, third party changes can happen. It also pointed out that __netfs_set_group() would leak if called multiple times on the same folio from the "whole folio modify section". Fixes: 8f52de0077ba ("netfs: Reduce number of conditional branches in netfs_perform_write()") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-22-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 134 ++++++++++++++++++++++++++----------------- include/trace/events/netfs.h | 1 + 2 files changed, 82 insertions(+), 53 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index f79fb5996540..6bde3320bcec 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -12,24 +12,6 @@ #include #include "internal.h" -static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) -{ - if (netfs_group) - folio_attach_private(folio, netfs_get_group(netfs_group)); -} - -static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) -{ - void *priv = folio_get_private(folio); - - if (unlikely(priv != netfs_group)) { - if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) - folio_attach_private(folio, netfs_get_group(netfs_group)); - else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) - folio_detach_private(folio); - } -} - /* * Grab a folio for writing and lock it. Attempt to allocate as large a folio * as possible to hold as much of the remaining length as possible in one go. @@ -157,6 +139,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, size_t offset; /* Offset into pagecache folio */ size_t part; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + void *priv; offset = pos & (max_chunk - 1); part = min(max_chunk - offset, iov_iter_count(iter)); @@ -202,6 +185,25 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto error_folio_unlock; } + finfo = netfs_folio_info(folio); + group = netfs_folio_group(folio); + + /* If the requested group differs from the group set on the + * page, then we need to flush out the folio if it has a group + * set (ie. is non-NULL). Note that COPY_TO_CACHE is a special + * case, being a netfs annotation rather than an actual group. + * + * The filesystem isn't permitted to mix writes with groups and + * writes without groups as the NULL group is used to indicate + * that no group is set. + */ + if (unlikely(group != netfs_group) && + group != NETFS_FOLIO_COPY_TO_CACHE && + group) { + WARN_ON_ONCE(!netfs_group); + goto flush_content; + } + /* Decide how we should modify a folio. We might be attempting * to do write-streaming, as we don't want to a local RMW cycle * if we can avoid it. If we're doing local caching or content @@ -209,22 +211,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, * file is open readably, then we let ->read_folio() fill in * the gaps. */ - finfo = netfs_folio_info(folio); - group = netfs_folio_group(folio); - - if (unlikely(group != netfs_group) && - group != NETFS_FOLIO_COPY_TO_CACHE) - goto flush_content; - if (folio_test_uptodate(folio)) { if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; - netfs_set_group(folio, netfs_group); trace = netfs_folio_is_uptodate; - goto copied; + goto copied_uptodate; } /* If the page is above the zero-point then we assume that the @@ -237,24 +231,22 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(copied == 0)) goto copy_failed; folio_zero_segment(folio, offset + copied, flen); - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); - trace = netfs_modify_and_clear; - goto copied; + if (finfo) + trace = netfs_modify_and_clear_rm_finfo; + else + trace = netfs_modify_and_clear; + goto mark_uptodate; } /* See if we can write a whole folio in one go. */ if (!maybe_trouble && offset == 0 && part >= flen) { copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (likely(copied == part)) { - if (finfo) { + if (finfo) trace = netfs_whole_folio_modify_filled; - goto folio_now_filled; - } - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); - trace = netfs_whole_folio_modify; - goto copied; + else + trace = netfs_whole_folio_modify; + goto mark_uptodate; } if (copied == 0) goto copy_failed; @@ -272,7 +264,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, finfo->dirty_len += finfo->dirty_offset; if (finfo->dirty_len == flen) { trace = netfs_whole_folio_modify_filled_efault; - goto folio_now_filled; + goto mark_uptodate; } if (copied > finfo->dirty_len) finfo->dirty_len = copied; @@ -300,11 +292,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, copied = copy_folio_from_iter_atomic(folio, offset, part, iter); if (unlikely(copied == 0)) goto copy_failed; - netfs_set_group(folio, netfs_group); trace = netfs_just_prefetch; - goto copied; + goto copied_uptodate; } + /* Do a streaming write on a folio that has nothing in it yet. */ if (!finfo) { ret = -EIO; if (WARN_ON(folio_get_private(folio))) @@ -313,10 +305,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(copied == 0)) goto copy_failed; if (offset == 0 && copied == flen) { - __netfs_set_group(folio, netfs_group); - folio_mark_uptodate(folio); trace = netfs_streaming_filled_page; - goto copied; + goto mark_uptodate; } finfo = kzalloc_obj(*finfo); @@ -345,7 +335,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, finfo->dirty_len += copied; if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { trace = netfs_streaming_cont_filled_page; - goto folio_now_filled; + goto mark_uptodate; } trace = netfs_streaming_write_cont; goto copied; @@ -361,13 +351,36 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, goto out; continue; - folio_now_filled: - if (finfo->netfs_group) - folio_change_private(folio, finfo->netfs_group); - else - folio_detach_private(folio); + /* Mark a folio as being up to data when we've filled it + * completely. If the folio has a group attached, then it must + * be the same group, otherwise we should have flushed it out + * above. We have to get rid of the netfs_folio struct if + * there was one. + */ + mark_uptodate: folio_mark_uptodate(folio); - kfree(finfo); + + copied_uptodate: + priv = folio_get_private(folio); + if (likely(priv == netfs_group)) { + /* Already set correctly; no change required. */ + } else if (priv == NETFS_FOLIO_COPY_TO_CACHE) { + if (!netfs_group) + folio_detach_private(folio); + else + folio_change_private(folio, netfs_get_group(netfs_group)); + } else if (!priv) { + folio_attach_private(folio, netfs_get_group(netfs_group)); + } else { + WARN_ON_ONCE(!finfo); + if (netfs_group) + /* finfo->netfs_group has a ref */ + folio_change_private(folio, netfs_group); + else + folio_detach_private(folio); + kfree(finfo); + } + copied: trace_netfs_folio(folio, trace); flush_dcache_folio(folio); @@ -530,6 +543,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr struct inode *inode = file_inode(file); struct netfs_inode *ictx = netfs_inode(inode); vm_fault_t ret = VM_FAULT_NOPAGE; + void *priv; int err; _enter("%lx", folio->index); @@ -550,7 +564,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr } group = netfs_folio_group(folio); - if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { + if (group && + group != netfs_group && + group != NETFS_FOLIO_COPY_TO_CACHE) { folio_unlock(folio); err = filemap_fdatawrite_range(mapping, folio_pos(folio), @@ -572,7 +588,19 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); else trace_netfs_folio(folio, netfs_folio_trace_mkwrite); - netfs_set_group(folio, netfs_group); + + priv = folio_get_private(folio); + if (priv != netfs_group) { + if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) + folio_detach_private(folio); + else if (netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) + folio_change_private(folio, netfs_get_group(netfs_group)); + else if (netfs_group && !priv) + folio_attach_private(folio, netfs_get_group(netfs_group)); + else + WARN_ON_ONCE(1); + } + file_update_time(file); set_bit(NETFS_ICTX_MODIFIED_ATTR, &ictx->flags); if (ictx->ops->post_modify) diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index aa9940ba307b..082cb03c6131 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -181,6 +181,7 @@ EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \ EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \ EM(netfs_modify_and_clear, "mod-n-clear") \ + EM(netfs_modify_and_clear_rm_finfo, "mod-n-clear+") \ EM(netfs_streaming_write, "mod-streamw") \ EM(netfs_streaming_write_cont, "mod-streamw+") \ EM(netfs_flush_content, "flush") \ -- cgit v1.2.3 From ded0c6f1606061148c202825f7e53d711f9f84cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:33:59 +0100 Subject: netfs: Fix netfs_read_folio() to wait on writeback Fix netfs_read_folio() to wait for an ongoing writeback to complete so that it can trust the dirty flag and whatever is attached to folio->private (folio->private may get cleaned up by the collector before it clears the writeback flag). Fixes: ee4cdf7ba857 ("netfs: Speed up buffered reading") Closes: https://sashiko.dev/#/patchset/20260414082004.3756080-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-23-dhowells@redhat.com cc: Paulo Alcantara cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 83d0b8153e96..76d0f6a29aba 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -503,6 +503,8 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct netfs_inode *ctx = netfs_inode(mapping->host); int ret; + folio_wait_writeback(folio); + if (folio_test_dirty(folio)) return netfs_read_gaps(file, folio); -- cgit v1.2.3 From 9871938f99cc6cb266a77265491660e2375271f5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:34:00 +0100 Subject: netfs, afs: Fix write skipping in dir/link writepages Fix netfs_write_single() and afs_single_writepages() to better handle a write that would be skipped due to lock contention and WB_SYNC_NONE by returning 1 from netfs_write_single() if it skipped and making afs_single_writepages() skip also. If a skip occurs, the inode must be re-marked as the VFS may have cleared the mark. This is really only theoretical for directories in netfs_write_single() as the only path to that is through afs_single_writepages() that takes the ->validate_lock around it, thereby serialising it. Fixes: 6dd80936618c ("afs: Use netfslib for directories") Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-24-dhowells@redhat.com cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/dir.c | 11 ++++++++++- fs/netfs/write_issue.c | 7 ++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index aaaa55878ffd..d1542a1a50bf 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -2206,7 +2206,14 @@ int afs_single_writepages(struct address_space *mapping, /* Need to lock to prevent the folio queue and folios from being thrown * away. */ - down_read(&dvnode->validate_lock); + if (!down_read_trylock(&dvnode->validate_lock)) { + if (wbc->sync_mode == WB_SYNC_NONE) { + /* The VFS will have undirtied the inode. */ + netfs_single_mark_inode_dirty(&dvnode->netfs.inode); + return 0; + } + down_read(&dvnode->validate_lock); + } if (is_dir ? test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) : @@ -2214,6 +2221,8 @@ int afs_single_writepages(struct address_space *mapping, iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size_read(&dvnode->netfs.inode)); ret = netfs_writeback_single(mapping, wbc, &iter); + if (ret == 1) + ret = 0; /* Skipped write due to lock conflict. */ } up_read(&dvnode->validate_lock); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 03961622996b..c03c7cc45e47 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -830,6 +830,9 @@ static int netfs_write_folio_single(struct netfs_io_request *wreq, * * Write a monolithic, non-pagecache object back to the server and/or * the cache. + * + * Return: 0 if successful; 1 if skipped due to lock conflict and WB_SYNC_NONE; + * or a negative error code. */ int netfs_writeback_single(struct address_space *mapping, struct writeback_control *wbc, @@ -846,8 +849,10 @@ int netfs_writeback_single(struct address_space *mapping, if (!mutex_trylock(&ictx->wb_lock)) { if (wbc->sync_mode == WB_SYNC_NONE) { + /* The VFS will have undirtied the inode. */ + netfs_single_mark_inode_dirty(&ictx->inode); netfs_stat(&netfs_n_wb_lock_skip); - return 0; + return 1; } netfs_stat(&netfs_n_wb_lock_wait); mutex_lock(&ictx->wb_lock); -- cgit v1.2.3 From c0410adf3da6db46f3513411fcf95e63c2f1d1ad Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2026 13:34:01 +0100 Subject: afs: Fix the locking used by afs_get_link() The afs filesystem in the kernel doesn't do locking correctly for symbolic links. There are a number of problems: (1) It doesn't do any locking around afs_read_single() to prevent races between multiple ->get_link() calls, thereby allowing the possibility of leaks. (2) It doesn't use RCU barriering when accessing the buffer pointers during RCU pathwalk. (3) It can race with another thread updating the contents of the symlink if a third party updated it on the server. Fix this by the following means: (0) Move symlink handling into its own file as this makes it more complicated. (1) Take the validate_lock around afs_read_single() to prevent races between multiple ->get_link() calls. (2) Keep a separate copy of the symlink contents with an rcu_head. This is always going to be a lot smaller than a page, so it can be kmalloc'd and save quite a bit of memory. It also needs a refcount for non-RCU pathwalk. (3) Split the symlink read and write-to-cache routines in afs from those for directories. (4) Discard the I/O buffer as soon as the write-to-cache completes as this is a full page (plus a folio_queue). (5) If there's no cache, discard the I/O buffer immediately after reading and copying if there is no cache. Fixes: eae9e78951bb ("afs: Use netfslib for symlinks, allowing them to be cached") Fixes: 6698c02d64b2 ("afs: Locally initialise the contents of a new symlink on creation") Closes: https://sashiko.dev/#/patchset/20260326104544.509518-1-dhowells%40redhat.com Signed-off-by: David Howells Link: https://patch.msgid.link/20260512123404.719402-25-dhowells@redhat.com cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/Makefile | 1 + fs/afs/dir.c | 68 ++++++------- fs/afs/fsclient.c | 4 +- fs/afs/inode.c | 96 +----------------- fs/afs/internal.h | 34 +++++-- fs/afs/symlink.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/afs/validation.c | 14 ++- fs/afs/yfsclient.c | 4 +- 8 files changed, 357 insertions(+), 142 deletions(-) create mode 100644 fs/afs/symlink.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index b49b8fe682f3..0d8f1982d596 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -30,6 +30,7 @@ kafs-y := \ server.o \ server_list.o \ super.o \ + symlink.o \ validation.o \ vlclient.o \ vl_alias.o \ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index d1542a1a50bf..498b99ccdf0e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -44,6 +44,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); +static int afs_dir_writepages(struct address_space *mapping, + struct writeback_control *wbc); const struct file_operations afs_dir_file_operations = { .open = afs_dir_open, @@ -68,7 +70,7 @@ const struct inode_operations afs_dir_inode_operations = { }; const struct address_space_operations afs_dir_aops = { - .writepages = afs_single_writepages, + .writepages = afs_dir_writepages, }; const struct dentry_operations afs_fs_dentry_operations = { @@ -233,22 +235,13 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file) struct iov_iter iter; ssize_t ret; loff_t i_size; - bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) && - !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags)); i_size = i_size_read(&dvnode->netfs.inode); - if (is_dir) { - if (i_size < AFS_DIR_BLOCK_SIZE) - return afs_bad(dvnode, afs_file_error_dir_small); - if (i_size > AFS_DIR_BLOCK_SIZE * 1024) { - trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); - return -EFBIG; - } - } else { - if (i_size > AFSPATHMAX) { - trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); - return -EFBIG; - } + if (i_size < AFS_DIR_BLOCK_SIZE) + return afs_bad(dvnode, afs_file_error_dir_small); + if (i_size > AFS_DIR_BLOCK_SIZE * 1024) { + trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); + return -EFBIG; } /* Expand the storage. TODO: Shrink the storage too. */ @@ -277,24 +270,18 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file) * buffer. */ ret = -ESTALE; - } else if (is_dir) { + } else { int ret2 = afs_dir_check(dvnode); if (ret2 < 0) ret = ret2; - } else if (i_size < folioq_folio_size(dvnode->directory, 0)) { - /* NUL-terminate a symlink. */ - char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0); - - symlink[i_size] = 0; - kunmap_local(symlink); } } return ret; } -ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file) +static ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file) { ssize_t ret; @@ -1763,13 +1750,20 @@ error: return ret; } +static void afs_symlink_put(struct afs_operation *op) +{ + kfree(op->create.symlink); + op->create.symlink = NULL; + afs_create_put(op); +} + static const struct afs_operation_ops afs_symlink_operation = { .issue_afs_rpc = afs_fs_symlink, .issue_yfs_rpc = yfs_fs_symlink, .success = afs_create_success, .aborted = afs_check_for_remote_deletion, .edit_dir = afs_create_edit_dir, - .put = afs_create_put, + .put = afs_symlink_put, }; /* @@ -1779,7 +1773,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *content) { struct afs_operation *op; + struct afs_symlink *symlink; struct afs_vnode *dvnode = AFS_FS_I(dir); + size_t clen = strlen(content); int ret; _enter("{%llx:%llu},{%pd},%s", @@ -1791,12 +1787,20 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, goto error; ret = -EINVAL; - if (strlen(content) >= AFSPATHMAX) + if (clen >= AFSPATHMAX) + goto error; + + ret = -ENOMEM; + symlink = kmalloc_flex(struct afs_symlink, content, clen + 1, GFP_KERNEL); + if (!symlink) goto error; + refcount_set(&symlink->ref, 1); + memcpy(symlink->content, content, clen + 1); op = afs_alloc_operation(NULL, dvnode->volume); if (IS_ERR(op)) { ret = PTR_ERR(op); + kfree(symlink); goto error; } @@ -1808,7 +1812,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, op->dentry = dentry; op->ops = &afs_symlink_operation; op->create.reason = afs_edit_dir_for_symlink; - op->create.symlink = content; + op->create.symlink = symlink; op->mtime = current_time(dir); ret = afs_do_sync_operation(op); afs_dir_unuse_cookie(dvnode, ret); @@ -2192,15 +2196,13 @@ error: } /* - * Write the file contents to the cache as a single blob. + * Write the directory contents to the cache as a single blob. */ -int afs_single_writepages(struct address_space *mapping, - struct writeback_control *wbc) +static int afs_dir_writepages(struct address_space *mapping, + struct writeback_control *wbc) { struct afs_vnode *dvnode = AFS_FS_I(mapping->host); struct iov_iter iter; - bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) && - !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags)); int ret = 0; /* Need to lock to prevent the folio queue and folios from being thrown @@ -2215,9 +2217,7 @@ int afs_single_writepages(struct address_space *mapping, down_read(&dvnode->validate_lock); } - if (is_dir ? - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) : - atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) { + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size_read(&dvnode->netfs.inode)); ret = netfs_writeback_single(mapping, wbc, &iter); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 95494d5f2b8a..a2ffd60889f8 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -886,7 +886,7 @@ void afs_fs_symlink(struct afs_operation *op) namesz = name->len; padsz = (4 - (namesz & 3)) & 3; - c_namesz = strlen(op->create.symlink); + c_namesz = strlen(op->create.symlink->content); c_padsz = (4 - (c_namesz & 3)) & 3; reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4); @@ -910,7 +910,7 @@ void afs_fs_symlink(struct afs_operation *op) bp = (void *) bp + padsz; } *bp++ = htonl(c_namesz); - memcpy(bp, op->create.symlink, c_namesz); + memcpy(bp, op->create.symlink->content, c_namesz); bp = (void *) bp + c_namesz; if (c_padsz > 0) { memset(bp, 0, c_padsz); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 19fe2e392885..3f48458694ba 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -25,96 +25,6 @@ #include "internal.h" #include "afs_fs.h" -void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op) -{ - size_t size = strlen(op->create.symlink) + 1; - size_t dsize = 0; - char *p; - - if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size, - mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0) - return; - - vnode->directory_size = dsize; - p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0); - memcpy(p, op->create.symlink, size); - kunmap_local(p); - set_bit(AFS_VNODE_DIR_READ, &vnode->flags); - netfs_single_mark_inode_dirty(&vnode->netfs.inode); -} - -static void afs_put_link(void *arg) -{ - struct folio *folio = virt_to_folio(arg); - - kunmap_local(arg); - folio_put(folio); -} - -const char *afs_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) -{ - struct afs_vnode *vnode = AFS_FS_I(inode); - struct folio *folio; - char *content; - ssize_t ret; - - if (!dentry) { - /* RCU pathwalk. */ - if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode)) - return ERR_PTR(-ECHILD); - goto good; - } - - if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags)) - goto fetch; - - ret = afs_validate(vnode, NULL); - if (ret < 0) - return ERR_PTR(ret); - - if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && - test_bit(AFS_VNODE_DIR_READ, &vnode->flags)) - goto good; - -fetch: - ret = afs_read_single(vnode, NULL); - if (ret < 0) - return ERR_PTR(ret); - set_bit(AFS_VNODE_DIR_READ, &vnode->flags); - -good: - folio = folioq_folio(vnode->directory, 0); - folio_get(folio); - content = kmap_local_folio(folio, 0); - set_delayed_call(callback, afs_put_link, content); - return content; -} - -int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen) -{ - DEFINE_DELAYED_CALL(done); - const char *content; - int len; - - content = afs_get_link(dentry, d_inode(dentry), &done); - if (IS_ERR(content)) { - do_delayed_call(&done); - return PTR_ERR(content); - } - - len = umin(strlen(content), buflen); - if (copy_to_user(buffer, content, len)) - len = -EFAULT; - do_delayed_call(&done); - return len; -} - -static const struct inode_operations afs_symlink_inode_operations = { - .get_link = afs_get_link, - .readlink = afs_readlink, -}; - static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) { static unsigned long once_only; @@ -214,7 +124,7 @@ static int afs_inode_init_from_status(struct afs_operation *op, inode->i_mode = S_IFLNK | status->mode; inode->i_op = &afs_symlink_inode_operations; } - inode->i_mapping->a_ops = &afs_dir_aops; + inode->i_mapping->a_ops = &afs_symlink_aops; inode_nohighmem(inode); mapping_set_release_always(inode->i_mapping); break; @@ -769,12 +679,14 @@ void afs_evict_inode(struct inode *inode) .range_end = LLONG_MAX, }; - afs_single_writepages(inode->i_mapping, &wbc); + inode->i_mapping->a_ops->writepages(inode->i_mapping, &wbc); } netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); netfs_free_folioq_buffer(vnode->directory); + if (vnode->symlink) + afs_evict_symlink(vnode); afs_set_cache_aux(vnode, &aux); netfs_clear_inode_writeback(inode, &aux); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 816dc848ea71..0b72a8566299 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -710,6 +710,7 @@ struct afs_vnode { #define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */ struct folio_queue *directory; /* Directory contents */ + struct afs_symlink __rcu *symlink; /* Symlink content */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ struct list_head granted_locks; /* locks granted on this file */ @@ -776,6 +777,15 @@ struct afs_permits { struct afs_permit permits[] __counted_by(nr_permits); /* List of permits sorted by key pointer */ }; +/* + * Copy of symlink content for normal use. + */ +struct afs_symlink { + struct rcu_head rcu; + refcount_t ref; + char content[]; +}; + /* * Error prioritisation and accumulation. */ @@ -887,7 +897,7 @@ struct afs_operation { struct { int reason; /* enum afs_edit_dir_reason */ mode_t mode; - const char *symlink; + struct afs_symlink *symlink; } create; struct { bool need_rehash; @@ -1098,13 +1108,10 @@ extern const struct inode_operations afs_dir_inode_operations; extern const struct address_space_operations afs_dir_aops; extern const struct dentry_operations afs_fs_dentry_operations; -ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file); ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file) __acquires(&dvnode->validate_lock); extern void afs_d_release(struct dentry *); extern void afs_check_for_remote_deletion(struct afs_operation *); -int afs_single_writepages(struct address_space *mapping, - struct writeback_control *wbc); /* * dir_edit.c @@ -1247,10 +1254,6 @@ extern void afs_fs_probe_cleanup(struct afs_net *); */ extern const struct afs_operation_ops afs_fetch_status_operation; -void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op); -const char *afs_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback); -int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen); extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *); extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); extern int afs_ilookup5_test_by_fid(struct inode *, void *); @@ -1600,6 +1603,21 @@ void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server extern int __init afs_fs_init(void); extern void afs_fs_exit(void); +/* + * symlink.c + */ +extern const struct inode_operations afs_symlink_inode_operations; +extern const struct address_space_operations afs_symlink_aops; + +void afs_invalidate_symlink(struct afs_vnode *vnode); +void afs_evict_symlink(struct afs_vnode *vnode); +void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op); +const char *afs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback); +int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen); +int afs_symlink_writepages(struct address_space *mapping, + struct writeback_control *wbc); + /* * validation.c */ diff --git a/fs/afs/symlink.c b/fs/afs/symlink.c new file mode 100644 index 000000000000..ed5868369f37 --- /dev/null +++ b/fs/afs/symlink.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS filesystem symbolic link handling + * + * Copyright (C) 2026 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include +#include +#include +#include +#include "internal.h" + +static void afs_put_symlink(struct afs_symlink *symlink) +{ + if (refcount_dec_and_test(&symlink->ref)) + kfree_rcu(symlink, rcu); +} + +static void afs_replace_symlink(struct afs_vnode *vnode, struct afs_symlink *symlink) +{ + struct afs_symlink *old; + + old = rcu_replace_pointer(vnode->symlink, symlink, + lockdep_is_held(&vnode->validate_lock)); + if (old) + afs_put_symlink(old); +} + +/* + * In the event that a third-party update of a symlink occurs, dispose of the + * copy of the old contents. Called under ->validate_lock. + */ +void afs_invalidate_symlink(struct afs_vnode *vnode) +{ + afs_replace_symlink(vnode, NULL); +} + +/* + * Dispose of a symlink copy during inode deletion. + */ +void afs_evict_symlink(struct afs_vnode *vnode) +{ + struct afs_symlink *old; + + old = rcu_replace_pointer(vnode->symlink, NULL, true); + if (old) + afs_put_symlink(old); + +} + +/* + * Set up a locally created symlink inode for immediate write to the cache. + */ +void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op) +{ + struct afs_symlink *symlink = op->create.symlink; + size_t dsize = 0; + size_t size = strlen(symlink->content) + 1; + char *p; + + rcu_assign_pointer(vnode->symlink, symlink); + op->create.symlink = NULL; + + if (!fscache_cookie_enabled(netfs_i_cookie(&vnode->netfs))) + return; + + if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size, + mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0) + return; + + vnode->directory_size = dsize; + p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0); + memcpy(p, symlink->content, size); + kunmap_local(p); + netfs_single_mark_inode_dirty(&vnode->netfs.inode); +} + +/* + * Read a symlink in a single download. + */ +static ssize_t afs_do_read_symlink(struct afs_vnode *vnode) +{ + struct afs_symlink *symlink; + struct iov_iter iter; + ssize_t ret; + loff_t i_size; + + i_size = i_size_read(&vnode->netfs.inode); + if (i_size > PAGE_SIZE - 1) { + trace_afs_file_error(vnode, -EFBIG, afs_file_error_dir_big); + return -EFBIG; + } + + if (!vnode->directory) { + size_t cur_size = 0; + + ret = netfs_alloc_folioq_buffer(NULL, + &vnode->directory, &cur_size, PAGE_SIZE, + mapping_gfp_mask(vnode->netfs.inode.i_mapping)); + vnode->directory_size = PAGE_SIZE - 1; + if (ret < 0) + return ret; + } + + iov_iter_folio_queue(&iter, ITER_DEST, vnode->directory, 0, 0, PAGE_SIZE); + + /* AFS requires us to perform the read of a symlink as a single unit to + * avoid issues with the content being changed between reads. + */ + ret = netfs_read_single(&vnode->netfs.inode, NULL, &iter); + if (ret >= 0) { + i_size = ret; + if (i_size > PAGE_SIZE - 1) { + trace_afs_file_error(vnode, -EFBIG, afs_file_error_dir_big); + return -EFBIG; + } + vnode->directory_size = i_size; + + /* Copy the symlink. */ + symlink = kmalloc_flex(struct afs_symlink, content, i_size + 1, + GFP_KERNEL); + if (!symlink) + return -ENOMEM; + + refcount_set(&symlink->ref, 1); + symlink->content[i_size] = 0; + + const char *s = kmap_local_folio(folioq_folio(vnode->directory, 0), 0); + + memcpy(symlink->content, s, i_size); + kunmap_local(s); + + afs_replace_symlink(vnode, symlink); + } + + if (!fscache_cookie_enabled(netfs_i_cookie(&vnode->netfs))) { + netfs_free_folioq_buffer(vnode->directory); + vnode->directory = NULL; + vnode->directory_size = 0; + } + + return ret; +} + +static ssize_t afs_read_symlink(struct afs_vnode *vnode) +{ + ssize_t ret; + + fscache_use_cookie(afs_vnode_cache(vnode), false); + ret = afs_do_read_symlink(vnode); + fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL); + return ret; +} + +static void afs_put_link(void *arg) +{ + afs_put_symlink(arg); +} + +const char *afs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + struct afs_symlink *symlink; + struct afs_vnode *vnode = AFS_FS_I(inode); + ssize_t ret; + + if (!dentry) { + /* RCU pathwalk. */ + symlink = rcu_dereference(vnode->symlink); + if (!symlink || !afs_check_validity(vnode)) + return ERR_PTR(-ECHILD); + set_delayed_call(callback, NULL, NULL); + return symlink->content; + } + + if (vnode->symlink) { + ret = afs_validate(vnode, NULL); + if (ret < 0) + return ERR_PTR(ret); + + down_read(&vnode->validate_lock); + if (vnode->symlink) + goto good; + up_read(&vnode->validate_lock); + } + + if (down_write_killable(&vnode->validate_lock) < 0) + return ERR_PTR(-ERESTARTSYS); + if (!vnode->symlink) { + ret = afs_read_symlink(vnode); + if (ret < 0) { + up_write(&vnode->validate_lock); + return ERR_PTR(ret); + } + } + + downgrade_write(&vnode->validate_lock); + +good: + symlink = rcu_dereference_protected(vnode->symlink, + lockdep_is_held(&vnode->validate_lock)); + refcount_inc(&symlink->ref); + up_read(&vnode->validate_lock); + + set_delayed_call(callback, afs_put_link, symlink); + return symlink->content; +} + +int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + DEFINE_DELAYED_CALL(done); + const char *content; + int len; + + content = afs_get_link(dentry, d_inode(dentry), &done); + if (IS_ERR(content)) { + do_delayed_call(&done); + return PTR_ERR(content); + } + + len = umin(strlen(content), buflen); + if (copy_to_user(buffer, content, len)) + len = -EFAULT; + do_delayed_call(&done); + return len; +} + +/* + * Write the symlink contents to the cache as a single blob. We then throw + * away the page we used to receive it. + */ +int afs_symlink_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct iov_iter iter; + int ret = 0; + + if (!down_read_trylock(&vnode->validate_lock)) { + if (wbc->sync_mode == WB_SYNC_NONE) { + /* The VFS will have undirtied the inode. */ + netfs_single_mark_inode_dirty(&vnode->netfs.inode); + return 0; + } + down_read(&vnode->validate_lock); + } + + if (vnode->directory && + atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE) { + iov_iter_folio_queue(&iter, ITER_SOURCE, vnode->directory, 0, 0, + i_size_read(&vnode->netfs.inode)); + ret = netfs_writeback_single(mapping, wbc, &iter); + } + + if (ret == 0) { + mutex_lock(&vnode->netfs.wb_lock); + netfs_free_folioq_buffer(vnode->directory); + vnode->directory = NULL; + vnode->directory_size = 0; + mutex_unlock(&vnode->netfs.wb_lock); + } else if (ret == 1) { + ret = 0; /* Skipped write due to lock conflict. */ + } + + up_read(&vnode->validate_lock); + return ret; +} + +const struct inode_operations afs_symlink_inode_operations = { + .get_link = afs_get_link, + .readlink = afs_readlink, +}; + +const struct address_space_operations afs_symlink_aops = { + .writepages = afs_symlink_writepages, +}; diff --git a/fs/afs/validation.c b/fs/afs/validation.c index 0ba8336c9025..e997563af658 100644 --- a/fs/afs/validation.c +++ b/fs/afs/validation.c @@ -465,11 +465,17 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) vnode->cb_ro_snapshot = cb_ro_snapshot; vnode->cb_scrub = cb_scrub; - /* if the vnode's data version number changed then its contents are - * different */ + /* If the vnode's data version number changed then its contents are + * different. Note that afs_apply_status() doesn't set ZAP_DATA on + * directories. + */ zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); - if (zap) - afs_zap_data(vnode); + if (zap) { + if (S_ISREG(vnode->netfs.inode.i_mode)) + afs_zap_data(vnode); + else if (S_ISLNK(vnode->netfs.inode.i_mode)) + afs_invalidate_symlink(vnode); + } up_write(&vnode->validate_lock); _leave(" = 0"); return 0; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 24fb562ebd33..d941179730a9 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -960,7 +960,7 @@ void yfs_fs_symlink(struct afs_operation *op) _enter(""); - contents_sz = strlen(op->create.symlink); + contents_sz = strlen(op->create.symlink->content); call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink, sizeof(__be32) + sizeof(struct yfs_xdr_RPCFlags) + @@ -981,7 +981,7 @@ void yfs_fs_symlink(struct afs_operation *op) bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &dvp->fid); bp = xdr_encode_name(bp, name); - bp = xdr_encode_string(bp, op->create.symlink, contents_sz); + bp = xdr_encode_string(bp, op->create.symlink->content, contents_sz); bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime); yfs_check_req(call, bp); -- cgit v1.2.3 From 5e121a81667a83e9a01d62b429e340f5a4a84abc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 May 2026 09:48:49 +0200 Subject: spi: ep93xx: fix error pointer deref after DMA setup failure The driver falls back to PIO mode if DMA setup fails during probe. Make sure to the clear the DMA channel pointers on setup failure to avoid dereferencing an error pointer on later probe errors or driver unbind. This issue was flagged by Sashiko when reviewing a devres allocation conversion patch. Fixes: e79e7c2df627 ("spi: ep93xx: add DT support for Cirrus EP93xx") Link: https://sashiko.dev/#/patchset/20260429091333.165363-1-johan%40kernel.org?part=10 Cc: stable@vger.kernel.org # 6.12 Cc: Nikita Shubin Signed-off-by: Johan Hovold Acked-by: Nikita Shubin Link: https://patch.msgid.link/20260512074849.915143-1-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-ep93xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index db50018050e5..f716c9607be4 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -582,12 +582,14 @@ static int ep93xx_spi_setup_dma(struct device *dev, struct ep93xx_spi *espi) espi->dma_rx = dma_request_chan(dev, "rx"); if (IS_ERR(espi->dma_rx)) { ret = dev_err_probe(dev, PTR_ERR(espi->dma_rx), "rx DMA setup failed"); + espi->dma_rx = NULL; goto fail_free_page; } espi->dma_tx = dma_request_chan(dev, "tx"); if (IS_ERR(espi->dma_tx)) { ret = dev_err_probe(dev, PTR_ERR(espi->dma_tx), "tx DMA setup failed"); + espi->dma_tx = NULL; goto fail_release_rx; } -- cgit v1.2.3 From 2cb156213093a62b80cf40b1ec71738e93491971 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 9 May 2026 00:13:36 +0200 Subject: net: ethernet: cortina: No mapping is a dropped rx Increase stats.rx_dropped++ even if this is the first fragment (skb == NULL) so we are doing proper accounting. Fixes: b266bacba796 ("net: ethernet: cortina: Drop half-assembled SKB") Link: https://sashiko.dev/#/patchset/20260505-gemini-ethernet-fix-v2-1-997c31d06079%40kernel.org Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20260509-gemini-ethernet-fixes-v1-1-6c5d20ddc35b@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cortina/gemini.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 065cbbf52686..466445c9e08b 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1491,9 +1491,9 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE); if (!gpage) { dev_err(geth->dev, "could not find mapping\n"); + port->stats.rx_dropped++; if (skb) { napi_free_frags(&port->napi); - port->stats.rx_dropped++; skb = NULL; } continue; -- cgit v1.2.3 From 06937db21ee311ed07eba47954447245041a982d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 9 May 2026 00:13:37 +0200 Subject: net: ethernet: cortina: Make RX SKB per-port The SKB used to assemble packets from fragments in gmac_rx() is static local, but the Gemini has two ethernet ports, meaning there can be races between the ports on a bad day if a device is using both. Make the RX SKB a per-port variable and carry it over between invocations in the port struct instead. Zero the pointer once we call napi_gro_frags(), on error (after calling napi_free_frags()) or if the port is stopped. Zero it in some place where not strictly necessary just to emphasize what is going on. This was found by Sashiko during normal patch review. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Link: https://sashiko.dev/#/patchset/20260505-gemini-ethernet-fix-v2-1-997c31d06079%40kernel.org Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20260509-gemini-ethernet-fixes-v1-2-6c5d20ddc35b@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cortina/gemini.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 466445c9e08b..d5a56366fb43 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -122,6 +122,8 @@ struct gemini_ethernet_port { struct napi_struct napi; struct hrtimer rx_coalesce_timer; unsigned int rx_coalesce_nsecs; + struct sk_buff *rx_skb; + unsigned int freeq_refill; struct gmac_txq txq[TX_QUEUE_NUM]; unsigned int txq_order; @@ -1442,10 +1444,10 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) unsigned short m = (1 << port->rxq_order) - 1; struct gemini_ethernet *geth = port->geth; void __iomem *ptr_reg = port->rxq_rwptr; + struct sk_buff *skb = port->rx_skb; unsigned int frame_len, frag_len; struct gmac_rxdesc *rx = NULL; struct gmac_queue_page *gpage; - static struct sk_buff *skb; union gmac_rxdesc_0 word0; union gmac_rxdesc_1 word1; union gmac_rxdesc_3 word3; @@ -1504,6 +1506,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) if (skb) { napi_free_frags(&port->napi); port->stats.rx_dropped++; + skb = NULL; } skb = gmac_skb_if_good_frame(port, word0, frame_len); @@ -1554,6 +1557,7 @@ err_drop: port->stats.rx_dropped++; } + port->rx_skb = skb; writew(r, ptr_reg); return budget; } @@ -1881,6 +1885,7 @@ static int gmac_stop(struct net_device *netdev) gmac_disable_tx_rx(netdev); gmac_stop_dma(port); napi_disable(&port->napi); + port->rx_skb = NULL; gmac_enable_irq(netdev, 0); gmac_cleanup_rxq(netdev); -- cgit v1.2.3 From ebd8ec2b309e3a447851b456ccaf8fb39f3661e7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 9 May 2026 00:13:38 +0200 Subject: net: ethernet: cortina: Carry over frag counter The gmac_rx() NAPI poll function assembles packets in an SKB from a ring buffer. If the ring buffer gets completely emptied during a poll cycle, we exit gmac_rx(), but the packet is not yet completely assembled in the SKB, yet the fragment counter frag_nr is reset to zero on the next invocation. Solve this by making the RX fragment counter a part of the port struct, and carry it over between invocations. Reset the fragment counter only right after calling napi_gro_frags(), on error (after calling napi_free_frags()) or if stopping the port. Reset it in some place where not strictly necessary just to emphasize what is going on. This was found by Sashiko during normal patch review. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Link: https://sashiko.dev/#/patchset/20260505-gemini-ethernet-fix-v2-1-997c31d06079%40kernel.org Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20260509-gemini-ethernet-fixes-v1-3-6c5d20ddc35b@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cortina/gemini.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index d5a56366fb43..4c762229ce42 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -123,6 +123,7 @@ struct gemini_ethernet_port { struct hrtimer rx_coalesce_timer; unsigned int rx_coalesce_nsecs; struct sk_buff *rx_skb; + unsigned int rx_frag_nr; unsigned int freeq_refill; struct gmac_txq txq[TX_QUEUE_NUM]; @@ -1444,6 +1445,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) unsigned short m = (1 << port->rxq_order) - 1; struct gemini_ethernet *geth = port->geth; void __iomem *ptr_reg = port->rxq_rwptr; + unsigned int frag_nr = port->rx_frag_nr; struct sk_buff *skb = port->rx_skb; unsigned int frame_len, frag_len; struct gmac_rxdesc *rx = NULL; @@ -1457,7 +1459,6 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) unsigned short r, w; union dma_rwptr rw; dma_addr_t mapping; - int frag_nr = 0; spin_lock_irqsave(&geth->irq_lock, flags); rw.bits32 = readl(ptr_reg); @@ -1497,6 +1498,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) if (skb) { napi_free_frags(&port->napi); skb = NULL; + frag_nr = 0; } continue; } @@ -1507,6 +1509,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) napi_free_frags(&port->napi); port->stats.rx_dropped++; skb = NULL; + frag_nr = 0; } skb = gmac_skb_if_good_frame(port, word0, frame_len); @@ -1541,6 +1544,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) if (word3.bits32 & EOF_BIT) { napi_gro_frags(&port->napi); skb = NULL; + frag_nr = 0; --budget; } continue; @@ -1549,6 +1553,7 @@ err_drop: if (skb) { napi_free_frags(&port->napi); skb = NULL; + frag_nr = 0; } if (mapping) @@ -1558,6 +1563,7 @@ err_drop: } port->rx_skb = skb; + port->rx_frag_nr = frag_nr; writew(r, ptr_reg); return budget; } @@ -1886,6 +1892,7 @@ static int gmac_stop(struct net_device *netdev) gmac_stop_dma(port); napi_disable(&port->napi); port->rx_skb = NULL; + port->rx_frag_nr = 0; gmac_enable_irq(netdev, 0); gmac_cleanup_rxq(netdev); -- cgit v1.2.3 From 36a8d04a8293afcb9304cf0cd3741f67698f2a1a Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Fri, 8 May 2026 19:37:28 -0700 Subject: net: ethernet: cs89x0: remove stale CONFIG_MACH_MX31ADS reference The legacy ARM board file for MACH_MX31ADS was removed in commit c93197b0041d ("ARM: imx: Remove i.MX31 board files"), but a reference to it remained in the cs89x0 driver. Drop this unused code. Signed-off-by: Ethan Nelson-Moore Fixes: c93197b0041d ("ARM: imx: Remove i.MX31 board files") Link: https://patch.msgid.link/20260509023732.42256-1-enelsonmoore@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cirrus/cs89x0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index fa5857923db4..b4bfd6c174e7 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1271,7 +1271,6 @@ static const struct net_device_ops net_ops = { static void __init reset_chip(struct net_device *dev) { -#if !defined(CONFIG_MACH_MX31ADS) struct net_local *lp = netdev_priv(dev); unsigned long reset_start_time; @@ -1298,7 +1297,6 @@ static void __init reset_chip(struct net_device *dev) while ((readreg(dev, PP_SelfST) & INIT_DONE) == 0 && time_before(jiffies, reset_start_time + 2)) ; -#endif /* !CONFIG_MACH_MX31ADS */ } /* This is the real probe routine. -- cgit v1.2.3 From 55dda532bbc261aef495e403c8900c5e2ab5fa34 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Wed, 6 May 2026 15:42:38 +0200 Subject: wifi: ath11k: fix error path leaks in some WMI WOW calls Fix two instances where we used to directly return the result of ath11k_wmi_cmd_send(...). Because we did not check the return value, we also did not free the skb in the error path. Fixes: 79802b13a492 ("ath11k: implement WoW enable and wakeup commands") Signed-off-by: Nicolas Escande Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260506134240.2284016-2-nico.escande@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wmi.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 40747fba3b0c..024c2aad9fb4 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -9332,6 +9332,7 @@ int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar) struct wmi_wow_host_wakeup_ind *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9345,14 +9346,20 @@ int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar) ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow host wakeup ind\n"); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_HOSTWAKEUP_FROM_SLEEP_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_wow_enable(struct ath11k *ar) { struct wmi_wow_enable_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9367,7 +9374,13 @@ int ath11k_wmi_wow_enable(struct ath11k *ar) cmd->pause_iface_config = WOW_IFACE_PAUSE_ENABLED; ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow enable\n"); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, -- cgit v1.2.3 From ebad0b48996fd4919c36bbcb07289d37d046de74 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Wed, 6 May 2026 15:42:39 +0200 Subject: wifi: ath11k: fix error path leaks in some WMI calls This is the same pattern that was previously identified as problematic: direct 'return ath11k_wmi_cmd_send(...)' will leak the skb in the error path if it is not explicitly handled. Fixes: c417b247ba04 ("ath11k: implement hardware data filter") Fixes: 9cbd7fc9be82 ("ath11k: support MAC address randomization in scan") Fixes: ba9177fcef21 ("ath11k: Add basic WoW functionalities") Fixes: fec4b898f369 ("ath11k: Add WoW net-detect functionality") Fixes: c3c36bfe998b ("ath11k: support ARP and NS offload") Fixes: a16d9b50cfba ("ath11k: support GTK rekey offload") Fixes: 652f69ed9c1b ("ath11k: Add support for SAR") Fixes: 0f84a156aa3b ("ath11k: Handle keepalive during WoWLAN suspend and resume") Signed-off-by: Nicolas Escande Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260506134240.2284016-3-nico.escande@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wmi.c | 112 +++++++++++++++++++++++++++++----- 1 file changed, 96 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 024c2aad9fb4..dca6e011cc40 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -9299,7 +9299,7 @@ int ath11k_wmi_hw_data_filter_cmd(struct ath11k *ar, u32 vdev_id, { struct wmi_hw_data_filter_cmd *cmd; struct sk_buff *skb; - int len; + int ret, len; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9324,7 +9324,13 @@ int ath11k_wmi_hw_data_filter_cmd(struct ath11k *ar, u32 vdev_id, "hw data filter enable %d filter_bitmap 0x%x\n", enable, filter_bitmap); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_HW_DATA_FILTER_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_HW_DATA_FILTER_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_wow_host_wakeup_ind(struct ath11k *ar) @@ -9389,7 +9395,7 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, struct sk_buff *skb; struct wmi_scan_prob_req_oui_cmd *cmd; u32 prob_req_oui; - int len; + int ret, len; prob_req_oui = (((u32)mac_addr[0]) << 16) | (((u32)mac_addr[1]) << 8) | mac_addr[2]; @@ -9408,7 +9414,13 @@ int ath11k_wmi_scan_prob_req_oui(struct ath11k *ar, ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "scan prob req oui %d\n", prob_req_oui); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SCAN_PROB_REQ_OUI_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SCAN_PROB_REQ_OUI_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_SCAN_PROB_REQ_OUI_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_wow_add_wakeup_event(struct ath11k *ar, u32 vdev_id, @@ -9418,6 +9430,7 @@ int ath11k_wmi_wow_add_wakeup_event(struct ath11k *ar, u32 vdev_id, struct wmi_wow_add_del_event_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9435,7 +9448,13 @@ int ath11k_wmi_wow_add_wakeup_event(struct ath11k *ar, u32 vdev_id, ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow add wakeup event %s enable %d vdev_id %d\n", wow_wakeup_event(event), enable, vdev_id); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_ENABLE_DISABLE_WAKE_EVENT_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_wow_add_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id, @@ -9448,6 +9467,7 @@ int ath11k_wmi_wow_add_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id, struct sk_buff *skb; u8 *ptr; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*tlv) + /* array struct */ @@ -9540,7 +9560,13 @@ int ath11k_wmi_wow_add_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id, ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow add pattern vdev_id %d pattern_id %d pattern_offset %d\n", vdev_id, pattern_id, pattern_offset); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_ADD_WAKE_PATTERN_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_ADD_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id) @@ -9548,6 +9574,7 @@ int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id) struct wmi_wow_del_pattern_cmd *cmd; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len); @@ -9566,7 +9593,13 @@ int ath11k_wmi_wow_del_pattern(struct ath11k *ar, u32 vdev_id, u32 pattern_id) ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "tlv wow del pattern vdev_id %d pattern_id %d\n", vdev_id, pattern_id); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_WOW_DEL_WAKE_PATTERN_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_WOW_DEL_WAKE_PATTERN_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static struct sk_buff * @@ -9710,6 +9743,7 @@ int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id, struct wmi_pno_scan_req *pno_scan) { struct sk_buff *skb; + int ret; if (pno_scan->enable) skb = ath11k_wmi_op_gen_config_pno_start(ar, vdev_id, pno_scan); @@ -9719,7 +9753,13 @@ int ath11k_wmi_wow_config_pno(struct ath11k *ar, u32 vdev_id, if (IS_ERR_OR_NULL(skb)) return -ENOMEM; - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_NETWORK_LIST_OFFLOAD_CONFIG_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } static void ath11k_wmi_fill_ns_offload(struct ath11k *ar, @@ -9837,6 +9877,7 @@ int ath11k_wmi_arp_ns_offload(struct ath11k *ar, u8 *buf_ptr; size_t len; u8 ns_cnt, ns_ext_tuples = 0; + int ret; offload = &arvif->arp_ns_offload; ns_cnt = offload->ipv6_count; @@ -9875,7 +9916,13 @@ int ath11k_wmi_arp_ns_offload(struct ath11k *ar, if (ns_ext_tuples) ath11k_wmi_fill_ns_offload(ar, offload, &buf_ptr, enable, 1); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_SET_ARP_NS_OFFLOAD_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar, @@ -9883,7 +9930,7 @@ int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar, { struct wmi_gtk_rekey_offload_cmd *cmd; struct ath11k_rekey_data *rekey_data = &arvif->rekey_data; - int len; + int ret, len; struct sk_buff *skb; __le64 replay_ctr; @@ -9917,14 +9964,20 @@ int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar, ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "offload gtk rekey vdev: %d %d\n", arvif->vdev_id, enable); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID offload\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_gtk_rekey_getinfo(struct ath11k *ar, struct ath11k_vif *arvif) { struct wmi_gtk_rekey_offload_cmd *cmd; - int len; + int ret, len; struct sk_buff *skb; len = sizeof(*cmd); @@ -9941,7 +9994,13 @@ int ath11k_wmi_gtk_rekey_getinfo(struct ath11k *ar, ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "get gtk rekey vdev_id: %d\n", arvif->vdev_id); - return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + ret = ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_GTK_OFFLOAD_CMDID getinfo\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_val) @@ -9951,6 +10010,7 @@ int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_va struct sk_buff *skb; u8 *buf_ptr; u32 len, sar_len_aligned, rsvd_len_aligned; + int ret; sar_len_aligned = roundup(BIOS_SAR_TABLE_LEN, sizeof(u32)); rsvd_len_aligned = roundup(BIOS_SAR_RSVD1_LEN, sizeof(u32)); @@ -9981,7 +10041,13 @@ int ath11k_wmi_pdev_set_bios_sar_table_param(struct ath11k *ar, const u8 *sar_va tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) | FIELD_PREP(WMI_TLV_LEN, rsvd_len_aligned); - return ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_SAR_TABLE_CMDID); + ret = ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_SAR_TABLE_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_PDEV_SET_BIOS_SAR_TABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar) @@ -9992,6 +10058,7 @@ int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar) struct sk_buff *skb; u8 *buf_ptr; u32 len, rsvd_len_aligned; + int ret; rsvd_len_aligned = roundup(BIOS_SAR_RSVD2_LEN, sizeof(u32)); len = sizeof(*cmd) + TLV_HDR_SIZE + rsvd_len_aligned; @@ -10011,7 +10078,13 @@ int ath11k_wmi_pdev_set_bios_geo_table_param(struct ath11k *ar) tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) | FIELD_PREP(WMI_TLV_LEN, rsvd_len_aligned); - return ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_GEO_TABLE_CMDID); + ret = ath11k_wmi_cmd_send(wmi, skb, WMI_PDEV_SET_BIOS_GEO_TABLE_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_PDEV_SET_BIOS_GEO_TABLE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } int ath11k_wmi_sta_keepalive(struct ath11k *ar, @@ -10022,6 +10095,7 @@ int ath11k_wmi_sta_keepalive(struct ath11k *ar, struct wmi_sta_keepalive_arp_resp *arp; struct sk_buff *skb; size_t len; + int ret; len = sizeof(*cmd) + sizeof(*arp); skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, len); @@ -10053,7 +10127,13 @@ int ath11k_wmi_sta_keepalive(struct ath11k *ar, "sta keepalive vdev %d enabled %d method %d interval %d\n", arg->vdev_id, arg->enabled, arg->method, arg->interval); - return ath11k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + ret = ath11k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); + if (ret) { + ath11k_warn(ar->ab, "failed to send WMI_STA_KEEPALIVE_CMDID\n"); + dev_kfree_skb(skb); + } + + return ret; } bool ath11k_wmi_supports_6ghz_cc_ext(struct ath11k *ar) -- cgit v1.2.3 From 7320d6eb861e9913193a7801834c661381756a79 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Wed, 6 May 2026 15:42:40 +0200 Subject: wifi: ath11k: fix error path leak in ath11k_tm_cmd_wmi_ftm() This is similar to what was fixed by previous patches. We have a call to ath11k_wmi_cmd_send() which does check the return value, but forgot to free the related skb on error. Fixes: b43310e44edc ("wifi: ath11k: factory test mode support") Signed-off-by: Nicolas Escande Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260506134240.2284016-4-nico.escande@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/testmode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath11k/testmode.c b/drivers/net/wireless/ath/ath11k/testmode.c index a9751ea2a0b7..c72eed358f6d 100644 --- a/drivers/net/wireless/ath/ath11k/testmode.c +++ b/drivers/net/wireless/ath/ath11k/testmode.c @@ -457,6 +457,7 @@ static int ath11k_tm_cmd_wmi_ftm(struct ath11k *ar, struct nlattr *tb[]) ret = ath11k_wmi_cmd_send(wmi, skb, cmd_id); if (ret) { ath11k_warn(ar->ab, "failed to send wmi ftm command: %d\n", ret); + dev_kfree_skb(skb); goto out; } -- cgit v1.2.3 From 54a5b38e4396530e5b2f12b54d3844e860ab6784 Mon Sep 17 00:00:00 2001 From: Kang Yang Date: Tue, 28 Apr 2026 14:17:37 +0800 Subject: wifi: ath10k: skip WMI and beacon transmission when device is wedged In ath10k_wmi_cmd_send(), the current code detects ATH10K_STATE_WEDGED and sets ret to -ESHUTDOWN, but still proceeds to transmit pending beacons and calls ath10k_wmi_cmd_send_nowait(). This can lead to incorrect behavior, as WMI commands and beacons are still sent after the device has been marked as wedged, and the original -ESHUTDOWN return value may be overwritten by the result of the send path. The wedged state indicates the hardware is already unreliable, and no further interaction with firmware is expected or meaningful in this state. Fix this by skipping beacon transmission and the WMI send path entirely once ATH10K_STATE_WEDGED is detected, ensuring consistent return values and avoiding unnecessary firmware interaction. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00288-QCARMSWPZ-1 Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00189 Fixes: c256a94d1b1b ("wifi: ath10k: shutdown driver when hardware is unreliable") Signed-off-by: Kang Yang Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260428061737.37-1-kang.yang@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 0bdb38edd915..e57588c19c80 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3,7 +3,6 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ @@ -1947,15 +1946,15 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) ret = -ESHUTDOWN; ath10k_dbg(ar, ATH10K_DBG_WMI, "drop wmi command %d, hardware is wedged\n", cmd_id); - } - /* try to send pending beacons first. they take priority */ - ath10k_wmi_tx_beacons_nowait(ar); + } else { + /* try to send pending beacons first. they take priority */ + ath10k_wmi_tx_beacons_nowait(ar); - ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id); - - if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) - ret = -ESHUTDOWN; + ret = ath10k_wmi_cmd_send_nowait(ar, skb, cmd_id); + if (ret && test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) + ret = -ESHUTDOWN; + } (ret != -EAGAIN); }), 3 * HZ); -- cgit v1.2.3 From 7cee43fcb0c3f71441d2faaa8c2202b6a88b6bef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:28:55 -0700 Subject: net: shaper: flip the polarity of the valid flag The usual way of inserting entries which are not yet fully ready into XArray is to have a VALID flag. The shaper code has a NOT_VALID flag. Since XArray code does not let us create entries with marks already set - the creation of entries is currently not atomic. Flip the polarity of the VALID flag. This closes the tiny race in net_shaper_pre_insert() of entries being created without the NOT_VALID flag. Fixes: 93954b40f6a4 ("net-shapers: implement NL set and delete operations") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 1069fa4eb9f6..d2b8f1f951b1 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -275,11 +275,13 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle, parent->id = 0; } -/* - * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as - * it's cleared by xa_store(). +/* MARK_0 is already in use due to XA_FLAGS_ALLOC. The VALID mark is set on + * an entry only after the device-side configuration has completed + * successfully (see net_shaper_commit()). Lookups and dumps must filter on + * this mark to avoid exposing tentative entries inserted by + * net_shaper_pre_insert() while the driver call is still in flight. */ -#define NET_SHAPER_NOT_VALID XA_MARK_1 +#define NET_SHAPER_VALID XA_MARK_1 static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, @@ -289,8 +291,8 @@ net_shaper_lookup(struct net_shaper_binding *binding, struct net_shaper_hierarchy *hierarchy; hierarchy = net_shaper_hierarchy_rcu(binding); - if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID)) + if (!hierarchy || !xa_get_mark(&hierarchy->shapers, index, + NET_SHAPER_VALID)) return NULL; return xa_load(&hierarchy->shapers, index); @@ -370,13 +372,10 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding, goto free_id; } - /* Mark 'tentative' shaper inside the hierarchy container. - * xa_set_mark is a no-op if the previous store fails. + /* Insert as 'tentative' (no VALID mark). The mark will be set by + * net_shaper_commit() once the driver-side configuration succeeds. */ - xa_lock(&hierarchy->shapers); - prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); - __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID); - xa_unlock(&hierarchy->shapers); + prev = xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); if (xa_err(prev)) { NL_SET_ERR_MSG(extack, "Can't insert shaper into device store"); kfree_rcu(cur, rcu); @@ -413,8 +412,7 @@ static void net_shaper_commit(struct net_shaper_binding *binding, /* Successful update: drop the tentative mark * and update the hierarchy container. */ - __xa_clear_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID); + __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID); *cur = shapers[i]; } xa_unlock(&hierarchy->shapers); @@ -431,8 +429,9 @@ static void net_shaper_rollback(struct net_shaper_binding *binding) return; xa_lock(&hierarchy->shapers); - xa_for_each_marked(&hierarchy->shapers, index, cur, - NET_SHAPER_NOT_VALID) { + xa_for_each(&hierarchy->shapers, index, cur) { + if (xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_VALID)) + continue; __xa_erase(&hierarchy->shapers, index); kfree(cur); } @@ -836,7 +835,8 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, goto out_unlock; for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, - U32_MAX, XA_PRESENT)); ctx->start_index++) { + U32_MAX, NET_SHAPER_VALID)); + ctx->start_index++) { ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; -- cgit v1.2.3 From 235fb5376139c3419f2218349f1fa2f06f24f7ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:28:56 -0700 Subject: net: shaper: fix trivial ordering issue in net_shaper_commit() We should update the entry before we mark it as valid. Fixes: 93954b40f6a4 ("net-shapers: implement NL set and delete operations") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index d2b8f1f951b1..86319ddbf290 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -295,6 +295,10 @@ net_shaper_lookup(struct net_shaper_binding *binding, NET_SHAPER_VALID)) return NULL; + /* Pairs with smp_wmb() in net_shaper_commit(): if the entry is + * valid, its contents must be visible too. + */ + smp_rmb(); return xa_load(&hierarchy->shapers, index); } @@ -412,8 +416,9 @@ static void net_shaper_commit(struct net_shaper_binding *binding, /* Successful update: drop the tentative mark * and update the hierarchy container. */ - __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID); *cur = shapers[i]; + smp_wmb(); + __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID); } xa_unlock(&hierarchy->shapers); } @@ -837,6 +842,10 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, U32_MAX, NET_SHAPER_VALID)); ctx->start_index++) { + /* Pairs with smp_wmb() in net_shaper_commit(): the entry + * is marked VALID, so its contents must be visible too. + */ + smp_rmb(); ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; -- cgit v1.2.3 From a9a2fa1da619f276580b0d4c5d12efac89e8642b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:28:57 -0700 Subject: net: shaper: reject duplicate leaves in GROUP request net_shaper_nl_group_doit() does not deduplicate NET_SHAPER_A_LEAVES entries. When userspace supplies the same leaf handle twice, the same old-parent pointer lands twice in old_nodes[]. The cleanup loop double frees the parent. Of course the same parent may still be in old_nodes[] twice if we are moving multiple of its leaves. Note that this patch also implicitly fixes the fact that the i >= leaves_count path forgets to set ret. Fixes: 5d5d4700e75d ("net-shapers: implement NL group operation") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-4-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 60 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 86319ddbf290..c8960821cf23 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -941,6 +941,46 @@ static int net_shaper_handle_cmp(const struct net_shaper_handle *a, return memcmp(a, b, sizeof(*a)); } +static int net_shaper_parse_leaves(struct net_shaper_binding *binding, + struct genl_info *info, + const struct net_shaper *node, + struct net_shaper *leaves, + int leaves_count) +{ + struct nlattr *attr; + int i, j, ret, rem; + + i = 0; + nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (WARN_ON_ONCE(i >= leaves_count)) + return -EINVAL; + + ret = net_shaper_parse_leaf(binding, attr, info, + node, &leaves[i]); + if (ret) + return ret; + + /* Reject duplicates */ + for (j = 0; j < i; j++) { + if (net_shaper_handle_cmp(&leaves[i].handle, + &leaves[j].handle)) + continue; + + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "Duplicate leaf shaper %d:%d", + leaves[i].handle.scope, + leaves[i].handle.id); + return -EINVAL; + } + + i++; + } + + return 0; +} + static int net_shaper_parent_from_leaves(int leaves_count, const struct net_shaper *leaves, struct net_shaper *node, @@ -1197,10 +1237,9 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) struct net_shaper **old_nodes, *leaves, node = {}; struct net_shaper_hierarchy *hierarchy; struct net_shaper_binding *binding; - int i, ret, rem, leaves_count; + int i, ret, leaves_count; int old_nodes_count = 0; struct sk_buff *msg; - struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES)) return -EINVAL; @@ -1228,19 +1267,10 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_leaves; - i = 0; - nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, - genlmsg_data(info->genlhdr), - genlmsg_len(info->genlhdr), rem) { - if (WARN_ON_ONCE(i >= leaves_count)) - goto free_leaves; - - ret = net_shaper_parse_leaf(binding, attr, info, - &node, &leaves[i]); - if (ret) - goto free_leaves; - i++; - } + ret = net_shaper_parse_leaves(binding, info, &node, + leaves, leaves_count); + if (ret) + goto free_leaves; /* Prepare the msg reply in advance, to avoid device operation * rollback on allocation failure. -- cgit v1.2.3 From 6e8ae9d805d4b9ecec49bb9e457d9bae0b21b540 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:28:58 -0700 Subject: selftests: drv-net: add shaper test for duplicate leaves Add test exercising duplicate leaves. Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-5-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/shaper.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py index 11310f19bfa0..e39d270e688d 100755 --- a/tools/testing/selftests/drivers/net/shaper.py +++ b/tools/testing/selftests/drivers/net/shaper.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx +import errno + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_raises, ksft_true, KsftSkipEx from lib.py import EthtoolFamily, NetshaperFamily from lib.py import NetDrvEnv from lib.py import NlError @@ -438,6 +441,21 @@ def queue_update(cfg, nl_shaper) -> None: nl_shaper.delete({'ifindex': cfg.ifindex, 'handle': {'scope': 'queue', 'id': i}}) +def dup_leaves(cfg, nl_shaper) -> None: + """ Ensure that the kernel rejects duplicate leaves. """ + if not cfg.groups: + raise KsftSkipEx("device does not support node scope") + + with ksft_raises(NlError) as cm: + nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 0}}, + {'handle': {'scope': 'queue', 'id': 0}}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + ksft_eq(cm.exception.error, errno.EINVAL) + def main() -> None: with NetDrvEnv(__file__, queue_count=4) as cfg: cfg.queues = False @@ -453,7 +471,9 @@ def main() -> None: basic_groups, qgroups, delegation, - queue_update], args=(cfg, NetshaperFamily())) + dup_leaves, + queue_update], + args=(cfg, NetshaperFamily())) ksft_exit() -- cgit v1.2.3 From 8054f85b83f42a37d482fc77ea7c9ff06a9407d9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:28:59 -0700 Subject: net: shaper: set ret to -ENOMEM when genlmsg_new() fails in group_doit genlmsg_new() alloc failure path in net_shaper_nl_group_doit() forgets to set ret before jumping to error handling. Fixes: 5d5d4700e75d ("net-shapers: implement NL group operation") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-6-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index c8960821cf23..12e5e0c18643 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -1276,8 +1276,10 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) * rollback on allocation failure. */ msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); - if (!msg) + if (!msg) { + ret = -ENOMEM; goto free_leaves; + } hierarchy = net_shaper_hierarchy_setup(binding); if (!hierarchy) { -- cgit v1.2.3 From 0f9a857e34d0f8c018a3e4435c6f0e92e8d2f38c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:29:00 -0700 Subject: net: shaper: fix undersized reply skb allocation in GROUP command net_shaper_group_send_reply() writes both the NET_SHAPER_A_IFINDEX attribute (via net_shaper_fill_binding()) and the nested NET_SHAPER_A_HANDLE attribute (via net_shaper_fill_handle()), but the reply skb at the call site in net_shaper_nl_group_doit() is allocated using net_shaper_handle_size(), which only accounts for the nested handle. The allocation is therefore short by nla_total_size(sizeof(u32)) (8 bytes) for the IFINDEX attribute. In practice the slab allocator rounds up the small allocation so the bug is latent, but the size accounting is wrong and could bite if the reply grew further. Introduce net_shaper_group_reply_size() that accounts for the full reply payload and use it both at the genlmsg_new() call site and in the defensive WARN_ONCE message. Fixes: 5d5d4700e75d ("net-shapers: implement NL group operation") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-7-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 12e5e0c18643..08fde2d9e8aa 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -90,6 +90,12 @@ static int net_shaper_handle_size(void) nla_total_size(sizeof(u32))); } +static int net_shaper_group_reply_size(void) +{ + return nla_total_size(sizeof(u32)) + /* NET_SHAPER_A_IFINDEX */ + net_shaper_handle_size(); /* NET_SHAPER_A_HANDLE */ +} + static int net_shaper_fill_binding(struct sk_buff *msg, const struct net_shaper_binding *binding, u32 type) @@ -1227,7 +1233,7 @@ static int net_shaper_group_send_reply(struct net_shaper_binding *binding, free_msg: /* Should never happen as msg is pre-allocated with enough space. */ WARN_ONCE(true, "calculated message payload length (%d)", - net_shaper_handle_size()); + net_shaper_group_reply_size()); nlmsg_free(msg); return -EMSGSIZE; } @@ -1275,7 +1281,7 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) /* Prepare the msg reply in advance, to avoid device operation * rollback on allocation failure. */ - msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); + msg = genlmsg_new(net_shaper_group_reply_size(), GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto free_leaves; -- cgit v1.2.3 From fbf5df34a4dbcd09d433dd4f0916bf9b2ddb16de Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:29:01 -0700 Subject: tools: ynl: add scope qualifier for definitions Using definitions in kernel policies is awkward right now. On one hand we want defines for max values and such. On the other we don't have a way of adding kernel-only defines. Adding unnecessary defines to uAPI is a bad idea, we won't be able to delete them. And when it comes to policy user space should just query it via the policy dump, not use hard coded defines. Add a "scope" property to definitions, which will let us tell the codegen that a definition is for kernel use only. Support following values: - uapi: render into the uAPI header (default, today's behavior) - kernel: render to kernel header only - user: same as kernel but for the user-side generated header Definitions may have a header property (definition is "external", provided by existing header). Extend the scope to headers, too. If definition has both scope and header properties we will only generate the includes in the right scope. Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-8-kuba@kernel.org Signed-off-by: Paolo Abeni --- Documentation/netlink/genetlink-c.yaml | 9 +++++++++ Documentation/netlink/genetlink-legacy.yaml | 9 +++++++++ Documentation/netlink/genetlink.yaml | 9 +++++++++ Documentation/netlink/netlink-raw.yaml | 9 +++++++++ tools/net/ynl/pyynl/ynl_gen_c.py | 31 +++++++++++++++++++++++++++-- 5 files changed, 65 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 57f59fe23e3f..4ea31e8fc4d1 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -69,6 +69,15 @@ properties: header: description: For C-compatible languages, header which already defines this value. type: string + scope: + description: | + Visibility of this definition. "uapi" (default) renders into + the uAPI header, "kernel" renders into the kernel-side + generated header, "user" renders into the user-side + generated header. When combined with `header:`, the + definition is not rendered, and the named header is + included only by code matching the scope. + enum: [ uapi, kernel, user ] type: enum: [ const, enum, flags ] doc: diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 66fb8653a344..f9c44747729a 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -83,6 +83,15 @@ properties: header: description: For C-compatible languages, header which already defines this value. type: string + scope: + description: | + Visibility of this definition. "uapi" (default) renders into + the uAPI header, "kernel" renders into the kernel-side + generated header, "user" renders into the user-side + generated header. When combined with `header:`, the + definition is not rendered, and the named header is + included only by code matching the scope. + enum: [ uapi, kernel, user ] type: enum: [ const, enum, flags, struct ] # Trim doc: diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index a1194d5d93fc..d3f3f3399ddf 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -55,6 +55,15 @@ properties: header: description: For C-compatible languages, header which already defines this value. type: string + scope: + description: | + Visibility of this definition. "uapi" (default) renders into + the uAPI header, "kernel" renders into the kernel-side + generated header, "user" renders into the user-side + generated header. When combined with `header:`, the + definition is not rendered, and the named header is + included only by code matching the scope. + enum: [ uapi, kernel, user ] type: enum: [ const, enum, flags ] doc: diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index dd98dda55bd0..4c436b59a34b 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -87,6 +87,15 @@ properties: header: description: For C-compatible languages, header which already defines this value. type: string + scope: + description: | + Visibility of this definition. "uapi" (default) renders into + the uAPI header, "kernel" renders into the kernel-side + generated header, "user" renders into the user-side + generated header. When combined with `header:`, the + definition is not rendered, and the named header is + included only by code matching the scope. + enum: [ uapi, kernel, user ] type: enum: [ const, enum, flags, struct ] # Trim doc: diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 0e1e486c1185..cdc3646f2642 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -3212,6 +3212,8 @@ def render_uapi(family, cw): for const in family['definitions']: if const.get('header'): continue + if const.get('scope', 'uapi') != 'uapi': + continue if const['type'] != 'const': cw.writes_defines(defines) @@ -3339,6 +3341,25 @@ def render_uapi(family, cw): cw.p(f'#endif /* {hdr_prot} */') +def render_scoped_consts(family, cw, scope): + defines = [] + for const in family['definitions']: + if const['type'] != 'const': + continue + if const.get('header'): + continue + if const.get('scope') != scope: + continue + name_pfx = const.get('name-prefix', f"{family.ident_name}-") + defines.append([ + c_upper(family.get('c-define-name', + f"{name_pfx}{const['name']}")), + const['value']]) + if defines: + cw.writes_defines(defines) + cw.nl() + + def _render_user_ntf_entry(ri, op): if not ri.family.is_classic(): ri.cw.block_start(line=f"[{op.enum_name}] = ") @@ -3504,8 +3525,12 @@ def main(): cw.p('#include "ynl.h"') headers = [] for definition in parsed['definitions'] + parsed['attribute-sets']: - if 'header' in definition: - headers.append(definition['header']) + if 'header' not in definition: + continue + scope = definition.get('scope', 'uapi') + if scope != 'uapi' and scope != args.mode: + continue + headers.append(definition['header']) if args.mode == 'user': headers.append(parsed.uapi_header) seen_header = [] @@ -3522,6 +3547,7 @@ def main(): for one in args.user_header: cw.p(f'#include "{one}"') else: + render_scoped_consts(parsed, cw, 'user') cw.p('struct ynl_sock;') cw.nl() render_user_family(parsed, cw, True) @@ -3529,6 +3555,7 @@ def main(): if args.mode == "kernel": if args.header: + render_scoped_consts(parsed, cw, 'kernel') for _, struct in sorted(parsed.pure_nested_structs.items()): if struct.request: cw.p('/* Common nested types */') -- cgit v1.2.3 From 8d5806c600fddb907ebe378f9c366d4b52ac3a39 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:29:02 -0700 Subject: net: shaper: reject handle IDs exceeding internal bit-width net_shaper_parse_handle() reads the user-supplied handle ID via nla_get_u32(), accepting the full u32 range. However, the xarray key is built by net_shaper_handle_to_index() using FIELD_PREP(NET_SHAPER_ID_MASK, handle->id), where NET_SHAPER_ID_MASK is GENMASK(25, 0) - only 26 bits wide. FIELD_PREP silently masks off the upper bits at runtime. A user-supplied NODE id like 0x04000123 becomes id 0x123. Additionally, a user-supplied id equal to NET_SHAPER_ID_UNSPEC (0x03FFFFFF, which is NET_SHAPER_ID_MASK itself) would collide with the sentinel used internally by the group operation to signal "allocate a new NODE id". Reject user-supplied IDs >= NET_SHAPER_ID_MASK (i.e., >= 0x03FFFFFF) in the policy. Fixes: 4b623f9f0f59 ("net-shapers: implement NL get operation") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-9-kuba@kernel.org Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/net_shaper.yaml | 7 +++++++ net/shaper/shaper.c | 4 +++- net/shaper/shaper_nl_gen.c | 7 ++++++- net/shaper/shaper_nl_gen.h | 2 ++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml index 3f2ad772b64b..de01f922040a 100644 --- a/Documentation/netlink/specs/net_shaper.yaml +++ b/Documentation/netlink/specs/net_shaper.yaml @@ -33,6 +33,11 @@ doc: | @cap-get operation. definitions: + - + type: const + name: max-handle-id + value: 0x3fffffe + scope: kernel - type: enum name: scope @@ -140,6 +145,8 @@ attribute-sets: - name: id type: u32 + checks: + max: max-handle-id doc: | Numeric identifier of a shaper. The id semantic depends on the scope. For @queue scope it's the queue id and for @node diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 08fde2d9e8aa..eb049847fed6 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -21,6 +21,8 @@ #define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK +static_assert(NET_SHAPER_ID_UNSPEC == NET_SHAPER_MAX_HANDLE_ID + 1); + struct net_shaper_hierarchy { struct xarray shapers; }; @@ -360,7 +362,7 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding, handle->id == NET_SHAPER_ID_UNSPEC) { u32 min, max; - handle->id = NET_SHAPER_ID_MASK - 1; + handle->id = NET_SHAPER_MAX_HANDLE_ID; max = net_shaper_handle_to_index(handle); handle->id = 0; min = net_shaper_handle_to_index(handle); diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c index 9b29be3ef19a..76eff85ec66d 100644 --- a/net/shaper/shaper_nl_gen.c +++ b/net/shaper/shaper_nl_gen.c @@ -11,10 +11,15 @@ #include +/* Integer value ranges */ +static const struct netlink_range_validation net_shaper_a_handle_id_range = { + .max = NET_SHAPER_MAX_HANDLE_ID, +}; + /* Common nested types */ const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = { [NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3), - [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, }, + [NET_SHAPER_A_HANDLE_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &net_shaper_a_handle_id_range), }; const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = { diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h index 42c46c52c775..2406652a9014 100644 --- a/net/shaper/shaper_nl_gen.h +++ b/net/shaper/shaper_nl_gen.h @@ -12,6 +12,8 @@ #include +#define NET_SHAPER_MAX_HANDLE_ID 67108862 + /* Common nested types */ extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1]; extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1]; -- cgit v1.2.3 From b62b29e6de6711f5918940aa6ff2bbab6d6af502 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:29:03 -0700 Subject: net: shaper: enforce singleton NETDEV scope with id 0 The NETDEV scope represents a singleton root shaper in the per-device hierarchy. All code assumes NETDEV shapers have id 0: net_shaper_default_parent() hardcodes parent->id = 0 when returning the NETDEV parent for QUEUE/NODE children, and the UAPI documentation describes NETDEV scope as "the main shaper" (singular, not plural). Make sure we reject non-0 IDs. Fixes: 4b623f9f0f59 ("net-shapers: implement NL get operation") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-10-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index eb049847fed6..4ae3ee6764a0 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -482,6 +482,12 @@ static int net_shaper_parse_handle(const struct nlattr *attr, else if (handle->scope == NET_SHAPER_SCOPE_NODE) id = NET_SHAPER_ID_UNSPEC; + if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) { + NL_SET_ERR_MSG_ATTR(info->extack, id_attr, + "Netdev scope is a singleton, must use ID 0"); + return -EINVAL; + } + handle->id = id; return 0; } -- cgit v1.2.3 From ce372e869f9f492f3d5aa9a0ae75ed52c61d2d6f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 May 2026 12:29:04 -0700 Subject: net: shaper: reject QUEUE scope handle with missing id net_shaper_parse_handle() does not enforce that the user provides the handle ID. For NODE the ID defaults to UNSPEC for all other cases it defaults to 0. For NETDEV 0 is the only option. For QUEUE defaulting to 0 makes less intuitive sense. Specifically because the behavior should (IMHO) be the same for all cases where there may be more than one ID (QUEUE and NODE). We should either document this as intentional or reject. I picked the latter with no strong conviction. Fixes: 4b623f9f0f59 ("net-shapers: implement NL get operation") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260510192904.3987113-11-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/shaper/shaper.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 4ae3ee6764a0..b1c65110f04d 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -477,10 +477,15 @@ static int net_shaper_parse_handle(const struct nlattr *attr, * shaper (any other value). */ id_attr = tb[NET_SHAPER_A_HANDLE_ID]; - if (id_attr) + if (id_attr) { id = nla_get_u32(id_attr); - else if (handle->scope == NET_SHAPER_SCOPE_NODE) + } else if (handle->scope == NET_SHAPER_SCOPE_NODE) { id = NET_SHAPER_ID_UNSPEC; + } else if (handle->scope == NET_SHAPER_SCOPE_QUEUE) { + NL_SET_ERR_ATTR_MISS(info->extack, attr, + NET_SHAPER_A_HANDLE_ID); + return -EINVAL; + } if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) { NL_SET_ERR_MSG_ATTR(info->extack, id_attr, -- cgit v1.2.3 From 603ab5ea6482c723216b59cb733e8ba248619ee9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 11 May 2026 21:55:23 -0500 Subject: SMB3.1.1: add missing QUERY_DIR info levels New Infolevels for QUERY_DIR (and QUERY_INFO) levels 78 through 81 are now being used by Windows clients and were added to the documentation. Add defines for them (and correct some typos in documentation). See MS-SMB2 2.2.33 and MS-FSCC 2.4 Signed-off-by: Steve French --- fs/smb/common/fscc.h | 4 ++-- fs/smb/common/smb2pdu.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/common/fscc.h b/fs/smb/common/fscc.h index b4ccddca9256..bc3012cc295d 100644 --- a/fs/smb/common/fscc.h +++ b/fs/smb/common/fscc.h @@ -260,12 +260,12 @@ typedef struct { char FileName[]; } __packed FILE_DIRECTORY_INFO; /* level 0x101 FF resp data */ -/* See MS-FSCC 2.4.13 */ +/* See MS-FSCC 2.4.14 */ struct smb2_file_eof_info { /* encoding of request for level 10 */ __le64 EndOfFile; /* new end of file value */ } __packed; /* level 20 Set */ -/* See MS-FSCC 2.4.14 */ +/* See MS-FSCC 2.4.15 */ typedef struct { __le32 NextEntryOffset; __u32 FileIndex; diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index a4b12eb8df81..aeb0a245c532 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1566,6 +1566,10 @@ struct validate_negotiate_info_rsp { #define FILE_STANDARD_LINK_INFORMATION 54 #define FILE_ID_INFORMATION 59 #define FILE_ID_EXTD_DIRECTORY_INFORMATION 60 /* also for QUERY_DIR */ +#define FileId64ExtdDirectoryInformation 78 /* also for QUERY_DIR */ +#define FileId64ExtdBothDirectoryInformation 79 /* also for QUERY_DIR */ +#define FileIdAllExtdDirectoryInformation 80 /* also for QUERY_DIR */ +#define FileIdAllExtdBothDirectoryInformation 81 /* also for QUERY_DIR */ /* Used for Query Info and Find File POSIX Info for SMB3.1.1 and SMB1 */ #define SMB_FIND_FILE_POSIX_INFO 0x064 -- cgit v1.2.3 From 17ee873dba04d05090dfc5b2b9e08cfc8e4f147f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 3 Mar 2026 10:48:54 +0100 Subject: HID: hid-sjoy: race between init and usage The driver uses an initial IO to set the device to a default state. That initialization is currently being done after the device node has been created. That means that the single buffer used for output can be altered while IO is in progress. Move the intialization before announcement to user space. Fixes: fac733f029251 ("HID: force feedback support for SmartJoy PLUS PS2/USB adapter") Signed-off-by: Oliver Neukum Signed-off-by: Jiri Kosina --- drivers/hid/hid-sjoy.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-sjoy.c b/drivers/hid/hid-sjoy.c index bab93d71b760..963c45113204 100644 --- a/drivers/hid/hid-sjoy.c +++ b/drivers/hid/hid-sjoy.c @@ -91,17 +91,17 @@ static int sjoyff_init(struct hid_device *hid) set_bit(FF_RUMBLE, dev->ffbit); - error = input_ff_create_memless(dev, sjoyff, hid_sjoyff_play); - if (error) { - kfree(sjoyff); - return error; - } - sjoyff->report = report; sjoyff->report->field[0]->value[0] = 0x01; sjoyff->report->field[0]->value[1] = 0x00; sjoyff->report->field[0]->value[2] = 0x00; hid_hw_request(hid, sjoyff->report, HID_REQ_SET_REPORT); + + error = input_ff_create_memless(dev, sjoyff, hid_sjoyff_play); + if (error) { + kfree(sjoyff); + return error; + } } hid_info(hid, "Force feedback for SmartJoy PLUS PS2/USB adapter\n"); -- cgit v1.2.3 From 637ad3a56a3b889527d1dacea6fea2a8bd648140 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Mon, 11 May 2026 22:51:51 +0100 Subject: block: don't overwrite bip_vcnt in bio_integrity_copy_user() bio_integrity_add_page() already sets bip_vcnt to 1 for the bounce segment. Overwriting it with nr_vecs breaks bip_vcnt <= bip_max_vcnt on WRITE (bip_max_vcnt is 1), so the gap-merge checks in block/blk.h read past the bip_vec[] flex array. On READ the read is in bounds but lands on a saved user bvec instead of the bounce. The line was added for split propagation, but bio_integrity_clone() doesn't copy bip_vcnt and BIP_CLONE_FLAGS excludes BIP_COPY_USER. Fixes: 3991657ae707 ("block: set bip_vcnt correctly") Signed-off-by: David Carlier Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260511215151.346228-1-devnexen@gmail.com Signed-off-by: Jens Axboe --- block/bio-integrity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index e54c6e06e1cb..78e678d4104b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -308,7 +308,6 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, } bip->bip_flags |= BIP_COPY_USER; - bip->bip_vcnt = nr_vecs; return 0; free_bip: bio_integrity_free(bio); -- cgit v1.2.3 From 2c6e6a18a37b905cb584eb0dda3ae482162a81ca Mon Sep 17 00:00:00 2001 From: Casey Chen Date: Mon, 11 May 2026 15:22:30 -0600 Subject: block: recompute nr_integrity_segments in blk_insert_cloned_request blk_insert_cloned_request() already recomputes nr_phys_segments against the bottom queue, because "the queue settings related to segment counting may differ from the original queue." The exact same reasoning applies to integrity segments: a stacked driver's underlying queue can have tighter virt_boundary_mask, seg_boundary_mask, or max_segment_size than the top queue, in which case blk_rq_count_integrity_sg() against the bottom queue produces a different count than the cached rq->nr_integrity_segments inherited from the source request by blk_rq_prep_clone(). When the cached count is lower than the bottom queue's actual count, blk_rq_map_integrity_sg() trips BUG_ON(segments > rq->nr_integrity_segments); on dispatch. The same families of stacked setups that motivated the existing nr_phys_segments recompute -- dm-multipath fanning out to nvme-rdma in particular -- can produce this. Mirror the nr_phys_segments handling: when the request carries integrity, recompute nr_integrity_segments against the bottom queue and reject the request if it exceeds the bottom queue's max_integrity_segments. blk_rq_count_integrity_sg() and queue_max_integrity_segments() are both already available via , which blk-mq.c includes. This closes a latent gap in the stacking contract and brings the integrity-segment accounting in line with the existing phys-segment accounting. Fixes: 76c313f658d2 ("blk-integrity: improved sg segment mapping") Signed-off-by: Casey Chen Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260511212230.27511-1-cachen@purestorage.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4c5c16cce4f8..d0c37daf568f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3307,6 +3307,25 @@ blk_status_t blk_insert_cloned_request(struct request *rq) return BLK_STS_IOERR; } + /* + * Integrity segment counting depends on the same queue limits + * (virt_boundary_mask, seg_boundary_mask, max_segment_size) that + * vary across stacked queues, so recompute against the bottom + * queue just like nr_phys_segments above. + */ + if (blk_integrity_rq(rq) && rq->bio) { + unsigned short max_int_segs = queue_max_integrity_segments(q); + + rq->nr_integrity_segments = + blk_rq_count_integrity_sg(rq->q, rq->bio); + if (rq->nr_integrity_segments > max_int_segs) { + printk(KERN_ERR "%s: over max integrity segments limit. (%u > %u)\n", + __func__, rq->nr_integrity_segments, + max_int_segs); + return BLK_STS_IOERR; + } + } + if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; -- cgit v1.2.3 From 5f90dcfa8dc32a488581b78e575cdd7808ba5c78 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 5 Feb 2026 09:11:31 +0100 Subject: HID: quirks: really enable the intended work around for appledisplay Commit c7fabe4ad921 ("HID: quirks: work around VID/PID conflict for appledisplay") intends to add a quirk for kernels built with Apple Cinema Display support, but it refers to the non-existing config option CONFIG_APPLEDISPLAY, whereas the config option for Apple Cinema Display support is named CONFIG_USB_APPLEDISPLAY. Refer to the intended config option CONFIG_USB_APPLEDISPLAY in the ifdef directive. Fixes: c7fabe4ad921 ("HID: quirks: work around VID/PID conflict for appledisplay") Signed-off-by: Lukas Bulwahn Signed-off-by: Jiri Kosina --- drivers/hid/hid-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 9e88c9d6c6dc..512049963978 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -235,7 +235,7 @@ static const struct hid_device_id hid_quirks[] = { * used as a driver. See hid_scan_report(). */ static const struct hid_device_id hid_have_special_driver[] = { -#if IS_ENABLED(CONFIG_APPLEDISPLAY) +#if IS_ENABLED(CONFIG_USB_APPLEDISPLAY) { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x9218) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x9219) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x921c) }, -- cgit v1.2.3 From 8582792cf23b3d94674d4d838f7cde9a28d0fcaf Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Tue, 12 May 2026 01:09:29 -0400 Subject: block: bio-integrity: Fix null-ptr-deref in bio_integrity_map_user() pin_user_pages_fast() can partially succeed and return the number of pages that were actually pinned. However, the bio_integrity_map_user() does not handle this partial pinning. This leads to a general protection fault since bvec_from_pages() dereferences an unpinned page address, which is 0. To fix this, add a check to verify that all requested memory is pinned. If partial pinning occurs, unpin the memory and return -EFAULT. Kernel Oops: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 UID: 0 PID: 1061 Comm: nvme-passthroug Not tainted 7.0.0-11783-g90957f9314e8-dirty #16 PREEMPT(lazy) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 RIP: 0010:bio_integrity_map_user.cold+0x1b0/0x9d6 Fixes: 492c5d455969 ("block: bio-integrity: directly map user buffers") Acked-by: Chao Shi Acked-by: Weidong Zhu Acked-by: Dave Tian Signed-off-by: Sungwoo Kim Tested-by: Shin'ichiro Kawasaki Link: https://github.com/linux-blktests/blktests/pull/244 Link: https://patch.msgid.link/20260512050929.541397-2-iam@sung-woo.kim Signed-off-by: Jens Axboe --- block/bio-integrity.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 78e678d4104b..e796de1a749e 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -402,6 +402,24 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) if (unlikely(ret < 0)) goto free_bvec; + /* + * Handle partial pinning. This can happen when pin_user_pages_fast() + * returns fewer pages than requested. + */ + if (user_backed_iter(iter) && unlikely(ret != bytes)) { + if (ret > 0) { + int npinned = DIV_ROUND_UP(offset + ret, PAGE_SIZE); + int i; + + for (i = 0; i < npinned; i++) + unpin_user_page(pages[i]); + } + if (pages != stack_pages) + kvfree(pages); + ret = -EFAULT; + goto free_bvec; + } + nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset, &is_p2p); if (pages != stack_pages) -- cgit v1.2.3 From 11f152c0acaa924d93339000cb785d34e003aff5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 21 Apr 2026 16:27:01 +0200 Subject: xen/arm: Replace __ASSEMBLY__ with __ASSEMBLER__ in interface.h While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize now on the __ASSEMBLER__ macro that is provided by the compilers. Signed-off-by: Thomas Huth Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20260421142701.548978-1-thuth@redhat.com> --- include/xen/arm/interface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h index c3eada2642aa..61360b89da40 100644 --- a/include/xen/arm/interface.h +++ b/include/xen/arm/interface.h @@ -30,7 +30,7 @@ #define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* Explicitly size integers that represent pfns in the interface with * Xen so that we can have one ABI that works for 32 and 64 bit guests. * Note that this means that the xen_pfn_t type may be capable of -- cgit v1.2.3 From f097d246677b03db814c5862f368cea341b76a00 Mon Sep 17 00:00:00 2001 From: Florian Pradines Date: Sat, 9 May 2026 09:45:17 +0000 Subject: HID: mcp2221: fix OOB write in mcp2221_raw_event() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mcp2221_raw_event() copies device-supplied data into mcp->rxbuf at offset rxbuf_idx without checking that the copy fits within the destination buffer. A device responding with up to 60 bytes to a small I2C/SMBus read can overflow the buffer. Add a rxbuf_size field to struct mcp2221, set it alongside rxbuf in mcp_i2c_smbus_read(), and check rxbuf_idx + data[3] <= rxbuf_size before the memcpy. Reported-by: Benoît Sevens Signed-off-by: Florian Pradines Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index be80970ab48e..e4ddd8e9293b 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -128,6 +128,7 @@ struct mcp2221 { u8 *rxbuf; u8 txbuf[64]; int rxbuf_idx; + int rxbuf_size; int status; u8 cur_i2c_clk_div; struct gpio_chip *gc; @@ -330,12 +331,14 @@ static int mcp_i2c_smbus_read(struct mcp2221 *mcp, mcp->txbuf[3] = (u8)(msg->addr << 1); total_len = msg->len; mcp->rxbuf = msg->buf; + mcp->rxbuf_size = msg->len; } else { mcp->txbuf[1] = smbus_len; mcp->txbuf[2] = 0; mcp->txbuf[3] = (u8)(smbus_addr << 1); total_len = smbus_len; mcp->rxbuf = smbus_buf; + mcp->rxbuf_size = smbus_len; } ret = mcp_send_data_req_status(mcp, mcp->txbuf, 4); @@ -919,6 +922,10 @@ static int mcp2221_raw_event(struct hid_device *hdev, mcp->status = -EINVAL; break; } + if (mcp->rxbuf_idx + data[3] > mcp->rxbuf_size) { + mcp->status = -EINVAL; + break; + } buf = mcp->rxbuf; memcpy(&buf[mcp->rxbuf_idx], &data[4], data[3]); mcp->rxbuf_idx = mcp->rxbuf_idx + data[3]; -- cgit v1.2.3 From d93ba918a185aca2594da63e92fdc5495b559c0f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 16 Apr 2026 14:16:54 +0100 Subject: HID: magicmouse: Prevent out-of-bounds (OOB) read during DOUBLE_REPORT_ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is currently possible for a malicious or misconfigured USB device to cause an out-of-bounds (OOB) read when submitting reports using DOUBLE_REPORT_ID by specifying a large report length and providing a smaller one. Let's prevent that by comparing the specified report length with the actual size of the data read in from userspace. If the actual data length ends up being smaller than specified, we'll politely warn the user and prevent any further processing. Signed-off-by: Lee Jones Reviewed-by: Günther Noack Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index e70bd3dc07ab..802a3479e24b 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -390,6 +390,10 @@ static int magicmouse_raw_event(struct hid_device *hdev, struct input_dev *input = msc->input; int x = 0, y = 0, ii, clicks = 0, npoints; + /* Protect against zero sized recursive calls from DOUBLE_REPORT_ID */ + if (size < 1) + return 0; + switch (data[0]) { case TRACKPAD_REPORT_ID: case TRACKPAD2_BT_REPORT_ID: @@ -490,6 +494,18 @@ static int magicmouse_raw_event(struct hid_device *hdev, /* Sometimes the trackpad sends two touch reports in one * packet. */ + + /* Ensure that we have at least 2 elements (report type and size) */ + if (size < 2) + return 0; + + if (size < data[1] + 2) { + hid_warn(hdev, + "received report length (%d) was smaller than specified (%d)", + size, data[1] + 2); + return 0; + } + magicmouse_raw_event(hdev, report, data + 2, data[1]); magicmouse_raw_event(hdev, report, data + 2 + data[1], size - 2 - data[1]); -- cgit v1.2.3 From cac61b58a3b6340c52afa06bb15eac033158db2f Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Fri, 17 Apr 2026 08:47:02 -0700 Subject: HID: playstation: Clamp num_touch_reports A device would never lie about the number of touch reports would it? If it does the loop in dualshock4_parse_report will read off the end of the touch_reports array, up to about 2 KiB for the maximum number of 256 loop iteraions. The data that is read is emitted via evdev if the DS4_TOUCH_POINT_INACTIVE bit happens to be set. Protect against this by clamping the num_touch_reports value provided by the device to the maximum size of the touch_reports array. Fixes: 752038248808 ("HID: playstation: add DualShock4 touchpad support.") Cc: stable@vger.kernel.org Reported-by: Xingyu Jin Signed-off-by: T.J. Mercier Signed-off-by: Jiri Kosina --- drivers/hid/hid-playstation.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index c43caac20b61..e48537331675 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -2384,7 +2384,8 @@ static int dualshock4_parse_report(struct ps_device *ps_dev, struct hid_report * } ds4_report = &usb->common; - num_touch_reports = usb->num_touch_reports; + num_touch_reports = min_t(u8, usb->num_touch_reports, + ARRAY_SIZE(usb->touch_reports)); touch_reports = usb->touch_reports; } else if (hdev->bus == BUS_BLUETOOTH && report->id == DS4_INPUT_REPORT_BT && size == DS4_INPUT_REPORT_BT_SIZE) { @@ -2404,7 +2405,8 @@ static int dualshock4_parse_report(struct ps_device *ps_dev, struct hid_report * } ds4_report = &bt->common; - num_touch_reports = bt->num_touch_reports; + num_touch_reports = min_t(u8, bt->num_touch_reports, + ARRAY_SIZE(bt->touch_reports)); touch_reports = bt->touch_reports; } else if (hdev->bus == BUS_BLUETOOTH && report->id == DS4_INPUT_REPORT_BT_MINIMAL && -- cgit v1.2.3 From 4db2af929279c799b5653a39eb0795c72baffca4 Mon Sep 17 00:00:00 2001 From: Sangyun Kim Date: Mon, 20 Apr 2026 14:13:17 +0900 Subject: HID: appletb-kbd: fix UAF in inactivity-timer cleanup path Commit 38224c472a03 ("HID: appletb-kbd: fix slab use-after-free bug in appletb_kbd_probe") added timer_delete_sync(&kbd->inactivity_timer) to both the probe close_hw error path and appletb_kbd_remove(), but the way it was wired in left the inactivity timer reachable during driver tear-down via two distinct windows. Window A -- put_device() before timer_delete_sync(): put_device(&kbd->backlight_dev->dev); timer_delete_sync(&kbd->inactivity_timer); The inactivity_timer softirq reads kbd->backlight_dev and calls backlight_device_set_brightness() -> mutex_lock(&ops_lock). If a concurrent hid_appletb_bl unbind drops the last devm reference between these two calls, the backlight_device is freed and the mutex_lock() touches freed memory. Window B -- backlight cleanup before hid_hw_stop(): if (kbd->backlight_dev) { timer_delete_sync(...); put_device(...); } hid_hw_close(hdev); hid_hw_stop(hdev); Even after Window A is closed, hid_hw_close()/hid_hw_stop() still run afterwards, so a late ".event" callback from the HID core (USB URB completion on real Apple hardware) can arrive after timer_delete_sync() drained the softirq but before put_device() drops the reference. That callback reaches reset_inactivity_timer(), which calls mod_timer() and re-arms the timer. The freshly re-armed timer can then fire on the about-to-be-freed backlight_device. Both windows produce the same KASAN slab-use-after-free: BUG: KASAN: slab-use-after-free in __mutex_lock+0x1aab/0x21c0 Read of size 8 at addr ffff88803ee9a108 by task swapper/0/0 Call Trace: __mutex_lock backlight_device_set_brightness appletb_inactivity_timer call_timer_fn run_timer_softirq handle_softirqs Allocated by task N: devm_backlight_device_register appletb_bl_probe Freed by task M: (concurrent hid_appletb_bl unbind path) Close both windows at once by reworking the tear-down in appletb_kbd_remove() and in the probe close_hw error path so that 1) hid_hw_close()/hid_hw_stop() run before the backlight cleanup, guaranteeing no further .event callback can fire and re-arm the timer, and 2) inside the "if (kbd->backlight_dev)" block, timer_delete_sync() runs before put_device(), so the softirq is drained before the final reference is dropped. Fixes: 38224c472a03 ("HID: appletb-kbd: fix slab use-after-free bug in appletb_kbd_probe") Cc: stable@vger.kernel.org Signed-off-by: Sangyun Kim Signed-off-by: Jiri Kosina --- drivers/hid/hid-appletb-kbd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index 0fdc0968b9ef..8feac9e3589b 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -440,13 +440,13 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id unregister_handler: input_unregister_handler(&kbd->inp_handler); close_hw: - if (kbd->backlight_dev) { - put_device(&kbd->backlight_dev->dev); - timer_delete_sync(&kbd->inactivity_timer); - } hid_hw_close(hdev); stop_hw: hid_hw_stop(hdev); + if (kbd->backlight_dev) { + timer_delete_sync(&kbd->inactivity_timer); + put_device(&kbd->backlight_dev->dev); + } return ret; } @@ -457,13 +457,13 @@ static void appletb_kbd_remove(struct hid_device *hdev) appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); input_unregister_handler(&kbd->inp_handler); + hid_hw_close(hdev); + hid_hw_stop(hdev); + if (kbd->backlight_dev) { - put_device(&kbd->backlight_dev->dev); timer_delete_sync(&kbd->inactivity_timer); + put_device(&kbd->backlight_dev->dev); } - - hid_hw_close(hdev); - hid_hw_stop(hdev); } static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg) -- cgit v1.2.3 From 1654e53349d4e657b331de354313461f401f5063 Mon Sep 17 00:00:00 2001 From: Sangyun Kim Date: Mon, 20 Apr 2026 14:13:18 +0900 Subject: HID: appletb-kbd: run inactivity autodim from workqueues The autodim code in hid-appletb-kbd takes backlight_device->ops_lock via backlight_device_set_brightness() -> mutex_lock() from two different atomic contexts: * appletb_inactivity_timer() is a struct timer_list callback, so it runs in softirq context. Every expiry triggers BUG: sleeping function called from invalid context at kernel/locking/mutex.c:591 Call Trace: __might_resched __mutex_lock backlight_device_set_brightness appletb_inactivity_timer call_timer_fn run_timer_softirq * reset_inactivity_timer() is called from appletb_kbd_hid_event() and appletb_kbd_inp_event(). On real USB hardware these run in softirq/IRQ context (URB completion and input-event dispatch). When the Touch Bar has already been dimmed or turned off, the reset path calls backlight_device_set_brightness() directly to restore brightness, producing the same warning. Both call sites hit the same mutex_lock()-from-atomic bug. Fix them together by moving the blocking work onto the system workqueue: * Convert the inactivity timer from struct timer_list to struct delayed_work; the callback (appletb_inactivity_work) now runs in process context where mutex_lock() is legal. * Add a dedicated struct work_struct restore_brightness_work and have reset_inactivity_timer() schedule it instead of calling backlight_device_set_brightness() directly. Cancel both works synchronously during driver tear-down alongside the existing backlight reference drop. The semantics are unchanged (same delays, same state transitions on dim, turn-off and user activity); only the execution context of the sleeping call changes. The timer field and callback are renamed to match their new type; reset_inactivity_timer() keeps its name because it is invoked from input event paths that read naturally as "reset the inactivity timer". Fixes: 93a0fc489481 ("HID: hid-appletb-kbd: add support for automatic brightness control while using the touchbar") Cc: stable@vger.kernel.org Signed-off-by: Sangyun Kim Signed-off-by: Jiri Kosina --- drivers/hid/hid-appletb-kbd.c | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index 8feac9e3589b..462010a75899 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include "hid-ids.h" @@ -62,7 +62,8 @@ struct appletb_kbd { struct input_handle kbd_handle; struct input_handle tpd_handle; struct backlight_device *backlight_dev; - struct timer_list inactivity_timer; + struct delayed_work inactivity_work; + struct work_struct restore_brightness_work; bool has_dimmed; bool has_turned_off; u8 saved_mode; @@ -164,16 +165,18 @@ static int appletb_tb_key_to_slot(unsigned int code) } } -static void appletb_inactivity_timer(struct timer_list *t) +static void appletb_inactivity_work(struct work_struct *work) { - struct appletb_kbd *kbd = timer_container_of(kbd, t, inactivity_timer); + struct appletb_kbd *kbd = container_of(to_delayed_work(work), + struct appletb_kbd, + inactivity_work); if (kbd->backlight_dev && appletb_tb_autodim) { if (!kbd->has_dimmed) { backlight_device_set_brightness(kbd->backlight_dev, 1); kbd->has_dimmed = true; - mod_timer(&kbd->inactivity_timer, - jiffies + secs_to_jiffies(appletb_tb_idle_timeout)); + mod_delayed_work(system_wq, &kbd->inactivity_work, + secs_to_jiffies(appletb_tb_idle_timeout)); } else if (!kbd->has_turned_off) { backlight_device_set_brightness(kbd->backlight_dev, 0); kbd->has_turned_off = true; @@ -181,16 +184,25 @@ static void appletb_inactivity_timer(struct timer_list *t) } } +static void appletb_restore_brightness_work(struct work_struct *work) +{ + struct appletb_kbd *kbd = container_of(work, struct appletb_kbd, + restore_brightness_work); + + if (kbd->backlight_dev) + backlight_device_set_brightness(kbd->backlight_dev, 2); +} + static void reset_inactivity_timer(struct appletb_kbd *kbd) { if (kbd->backlight_dev && appletb_tb_autodim) { if (kbd->has_dimmed || kbd->has_turned_off) { - backlight_device_set_brightness(kbd->backlight_dev, 2); kbd->has_dimmed = false; kbd->has_turned_off = false; + schedule_work(&kbd->restore_brightness_work); } - mod_timer(&kbd->inactivity_timer, - jiffies + secs_to_jiffies(appletb_tb_dim_timeout)); + mod_delayed_work(system_wq, &kbd->inactivity_work, + secs_to_jiffies(appletb_tb_dim_timeout)); } } @@ -408,9 +420,11 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id dev_err_probe(dev, -ENODEV, "Failed to get backlight device\n"); } else { backlight_device_set_brightness(kbd->backlight_dev, 2); - timer_setup(&kbd->inactivity_timer, appletb_inactivity_timer, 0); - mod_timer(&kbd->inactivity_timer, - jiffies + secs_to_jiffies(appletb_tb_dim_timeout)); + INIT_DELAYED_WORK(&kbd->inactivity_work, appletb_inactivity_work); + INIT_WORK(&kbd->restore_brightness_work, + appletb_restore_brightness_work); + mod_delayed_work(system_wq, &kbd->inactivity_work, + secs_to_jiffies(appletb_tb_dim_timeout)); } kbd->inp_handler.event = appletb_kbd_inp_event; @@ -444,7 +458,8 @@ close_hw: stop_hw: hid_hw_stop(hdev); if (kbd->backlight_dev) { - timer_delete_sync(&kbd->inactivity_timer); + cancel_delayed_work_sync(&kbd->inactivity_work); + cancel_work_sync(&kbd->restore_brightness_work); put_device(&kbd->backlight_dev->dev); } return ret; @@ -461,7 +476,8 @@ static void appletb_kbd_remove(struct hid_device *hdev) hid_hw_stop(hdev); if (kbd->backlight_dev) { - timer_delete_sync(&kbd->inactivity_timer); + cancel_delayed_work_sync(&kbd->inactivity_work); + cancel_work_sync(&kbd->restore_brightness_work); put_device(&kbd->backlight_dev->dev); } } -- cgit v1.2.3 From b08665fe80fab0956e64741c07d9bbcec635c34d Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 21:50:41 +0900 Subject: HID: google: hammer: stop hardware on devres action failure hammer_probe() starts the HID hardware before registering the devres action that stops it. If devm_add_action() fails, probe returns an error with the hardware still started because the cleanup action was never registered and the driver's remove callback is not called after a failed probe. Use devm_add_action_or_reset() so the stop action runs immediately on registration failure while preserving the existing devres-managed cleanup path for later probe failures and remove. Signed-off-by: Myeonghun Pak Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 1af477e58480..c99c3c0d442e 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -496,7 +496,7 @@ static int hammer_probe(struct hid_device *hdev, if (error) return error; - error = devm_add_action(&hdev->dev, hammer_stop, hdev); + error = devm_add_action_or_reset(&hdev->dev, hammer_stop, hdev); if (error) return error; -- cgit v1.2.3 From 2c85c61d1332e1e16f020d76951baf167dcb6f7a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 4 May 2026 10:47:22 +0200 Subject: HID: pass the buffer size to hid_report_raw_event commit 0a3fe972a7cb ("HID: core: Mitigate potential OOB by removing bogus memset()") enforced the provided data to be at least the size of the declared buffer in the report descriptor to prevent a buffer overflow. However, we can try to be smarter by providing both the buffer size and the data size, meaning that hid_report_raw_event() can make better decision whether we should plaining reject the buffer (buffer overflow attempt) or if we can safely memset it to 0 and pass it to the rest of the stack. Fixes: 0a3fe972a7cb ("HID: core: Mitigate potential OOB by removing bogus memset()") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Acked-by: Johan Hovold Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jiri Kosina --- drivers/hid/bpf/hid_bpf_dispatch.c | 6 ++++-- drivers/hid/hid-core.c | 42 +++++++++++++++++++++++++------------- drivers/hid/hid-gfrm.c | 4 ++-- drivers/hid/hid-logitech-hidpp.c | 2 +- drivers/hid/hid-multitouch.c | 2 +- drivers/hid/hid-primax.c | 2 +- drivers/hid/hid-vivaldi-common.c | 2 +- drivers/hid/wacom_sys.c | 6 +++--- drivers/staging/greybus/hid.c | 2 +- include/linux/hid.h | 4 ++-- include/linux/hid_bpf.h | 14 ++++++++----- 11 files changed, 53 insertions(+), 33 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 50c7b45c59e3..d0130658091b 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -24,7 +24,8 @@ EXPORT_SYMBOL(hid_ops); u8 * dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type, u8 *data, - u32 *size, int interrupt, u64 source, bool from_bpf) + size_t *buf_size, u32 *size, int interrupt, u64 source, + bool from_bpf) { struct hid_bpf_ctx_kern ctx_kern = { .ctx = { @@ -74,6 +75,7 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type *size = ret; } + *buf_size = ctx_kern.ctx.allocated_size; return ctx_kern.data; } EXPORT_SYMBOL_GPL(dispatch_hid_bpf_device_event); @@ -505,7 +507,7 @@ __hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, u8 *b if (ret) return ret; - return hid_ops->hid_input_report(ctx->hid, type, buf, size, 0, (u64)(long)ctx, true, + return hid_ops->hid_input_report(ctx->hid, type, buf, size, size, 0, (u64)(long)ctx, true, lock_already_taken); } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 61afec5915ec..a806820df7e5 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2033,24 +2033,32 @@ int __hid_request(struct hid_device *hid, struct hid_report *report, } EXPORT_SYMBOL_GPL(__hid_request); -int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, - int interrupt) +int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, + size_t bufsize, u32 size, int interrupt) { struct hid_report_enum *report_enum = hid->report_enum + type; struct hid_report *report; struct hid_driver *hdrv; int max_buffer_size = HID_MAX_BUFFER_SIZE; u32 rsize, csize = size; + size_t bsize = bufsize; u8 *cdata = data; int ret = 0; report = hid_get_report(report_enum, data); if (!report) - goto out; + return 0; + + if (unlikely(bsize < csize)) { + hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", + report->id, csize, bsize); + return -EINVAL; + } if (report_enum->numbered) { cdata++; csize--; + bsize--; } rsize = hid_compute_report_size(report); @@ -2063,11 +2071,16 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * else if (rsize > max_buffer_size) rsize = max_buffer_size; + if (bsize < rsize) { + hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", + report->id, rsize, bsize); + return -EINVAL; + } + if (csize < rsize) { - hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %d)\n", - report->id, rsize, csize); - ret = -EINVAL; - goto out; + dbg_hid("report %d is too short, (%d < %d)\n", report->id, + csize, rsize); + memset(cdata + csize, 0, rsize - csize); } if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event) @@ -2075,7 +2088,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * if (hid->claimed & HID_CLAIMED_HIDRAW) { ret = hidraw_report_event(hid, data, size); if (ret) - goto out; + return ret; } if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) { @@ -2087,15 +2100,15 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * if (hid->claimed & HID_CLAIMED_INPUT) hidinput_report_event(hid, report); -out: + return ret; } EXPORT_SYMBOL_GPL(hid_report_raw_event); static int __hid_input_report(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt, u64 source, bool from_bpf, - bool lock_already_taken) + u8 *data, size_t bufsize, u32 size, int interrupt, u64 source, + bool from_bpf, bool lock_already_taken) { struct hid_report_enum *report_enum; struct hid_driver *hdrv; @@ -2120,7 +2133,8 @@ static int __hid_input_report(struct hid_device *hid, enum hid_report_type type, report_enum = hid->report_enum + type; hdrv = hid->driver; - data = dispatch_hid_bpf_device_event(hid, type, data, &size, interrupt, source, from_bpf); + data = dispatch_hid_bpf_device_event(hid, type, data, &bufsize, &size, interrupt, + source, from_bpf); if (IS_ERR(data)) { ret = PTR_ERR(data); goto unlock; @@ -2149,7 +2163,7 @@ static int __hid_input_report(struct hid_device *hid, enum hid_report_type type, goto unlock; } - ret = hid_report_raw_event(hid, type, data, size, interrupt); + ret = hid_report_raw_event(hid, type, data, bufsize, size, interrupt); unlock: if (!lock_already_taken) @@ -2171,7 +2185,7 @@ unlock: int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt) { - return __hid_input_report(hid, type, data, size, interrupt, 0, + return __hid_input_report(hid, type, data, size, size, interrupt, 0, false, /* from_bpf */ false /* lock_already_taken */); } diff --git a/drivers/hid/hid-gfrm.c b/drivers/hid/hid-gfrm.c index 699186ff2349..d2a56bf92b41 100644 --- a/drivers/hid/hid-gfrm.c +++ b/drivers/hid/hid-gfrm.c @@ -66,7 +66,7 @@ static int gfrm_raw_event(struct hid_device *hdev, struct hid_report *report, switch (data[1]) { case GFRM100_SEARCH_KEY_DOWN: ret = hid_report_raw_event(hdev, HID_INPUT_REPORT, search_key_dn, - sizeof(search_key_dn), 1); + sizeof(search_key_dn), sizeof(search_key_dn), 1); break; case GFRM100_SEARCH_KEY_AUDIO_DATA: @@ -74,7 +74,7 @@ static int gfrm_raw_event(struct hid_device *hdev, struct hid_report *report, case GFRM100_SEARCH_KEY_UP: ret = hid_report_raw_event(hdev, HID_INPUT_REPORT, search_key_up, - sizeof(search_key_up), 1); + sizeof(search_key_up), sizeof(search_key_up), 1); break; default: diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index b1330d23bd2d..b3ff9265377b 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3673,7 +3673,7 @@ static int hidpp10_consumer_keys_raw_event(struct hidpp_device *hidpp, memcpy(&consumer_report[1], &data[3], 4); /* We are called from atomic context */ hid_report_raw_event(hidpp->hid_dev, HID_INPUT_REPORT, - consumer_report, 5, 1); + consumer_report, sizeof(consumer_report), 5, 1); return 1; } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e82a3c4e5b44..eeab0b6e32cc 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -533,7 +533,7 @@ static void mt_get_feature(struct hid_device *hdev, struct hid_report *report) } ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf, - size, 0); + size, size, 0); if (ret) dev_warn(&hdev->dev, "failed to report feature\n"); } diff --git a/drivers/hid/hid-primax.c b/drivers/hid/hid-primax.c index e44d79dff8de..8db054280afb 100644 --- a/drivers/hid/hid-primax.c +++ b/drivers/hid/hid-primax.c @@ -44,7 +44,7 @@ static int px_raw_event(struct hid_device *hid, struct hid_report *report, data[0] |= (1 << (data[idx] - 0xE0)); data[idx] = 0; } - hid_report_raw_event(hid, HID_INPUT_REPORT, data, size, 0); + hid_report_raw_event(hid, HID_INPUT_REPORT, data, size, size, 0); return 1; default: /* unknown report */ diff --git a/drivers/hid/hid-vivaldi-common.c b/drivers/hid/hid-vivaldi-common.c index bf734055d4b6..b12bb5cc091a 100644 --- a/drivers/hid/hid-vivaldi-common.c +++ b/drivers/hid/hid-vivaldi-common.c @@ -85,7 +85,7 @@ void vivaldi_feature_mapping(struct hid_device *hdev, } ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data, - report_len, 0); + report_len, report_len, 0); if (ret) { dev_warn(&hdev->dev, "failed to report feature %d\n", field->report->id); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 0d1c6d90fe21..a32320b351e3 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -90,7 +90,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, kfree(buf); continue; } - err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false); + err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, size, false); if (err) { hid_warn(hdev, "%s: unable to flush event due to error %d\n", __func__, err); @@ -334,7 +334,7 @@ static void wacom_feature_mapping(struct hid_device *hdev, data, n, WAC_CMD_RETRIES); if (ret == n && features->type == HID_GENERIC) { ret = hid_report_raw_event(hdev, - HID_FEATURE_REPORT, data, n, 0); + HID_FEATURE_REPORT, data, n, n, 0); } else if (ret == 2 && features->type != HID_GENERIC) { features->touch_max = data[1]; } else { @@ -395,7 +395,7 @@ static void wacom_feature_mapping(struct hid_device *hdev, data, n, WAC_CMD_RETRIES); if (ret == n) { ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, - data, n, 0); + data, n, n, 0); } else { hid_warn(hdev, "%s: could not retrieve sensor offsets\n", __func__); diff --git a/drivers/staging/greybus/hid.c b/drivers/staging/greybus/hid.c index 1f58c907c036..f1f9f6fbc00e 100644 --- a/drivers/staging/greybus/hid.c +++ b/drivers/staging/greybus/hid.c @@ -201,7 +201,7 @@ static void gb_hid_init_report(struct gb_hid *ghid, struct hid_report *report) * we just need to setup the input fields, so using * hid_report_raw_event is safe. */ - hid_report_raw_event(ghid->hid, report->type, ghid->inbuf, size, 1); + hid_report_raw_event(ghid->hid, report->type, ghid->inbuf, ghid->bufsize, size, 1); } static void gb_hid_init_reports(struct gb_hid *ghid) diff --git a/include/linux/hid.h b/include/linux/hid.h index 442a80d79e89..ac432a2ef415 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1298,8 +1298,8 @@ static inline u32 hid_report_len(struct hid_report *report) return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, - int interrupt); +int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, + size_t bufsize, u32 size, int interrupt); /* HID quirks API */ unsigned long hid_lookup_quirk(const struct hid_device *hdev); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index a2e47dbcf82c..19fffa4574a4 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -72,8 +72,8 @@ struct hid_ops { int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, u64 source, bool from_bpf); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt, u64 source, bool from_bpf, - bool lock_already_taken); + u8 *data, size_t bufsize, u32 size, int interrupt, u64 source, + bool from_bpf, bool lock_already_taken); struct module *owner; const struct bus_type *bus_type; }; @@ -200,7 +200,8 @@ struct hid_bpf { #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, - u32 *size, int interrupt, u64 source, bool from_bpf); + size_t *buf_size, u32 *size, int interrupt, u64 source, + bool from_bpf); int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, u32 size, enum hid_report_type rtype, @@ -215,8 +216,11 @@ int hid_bpf_device_init(struct hid_device *hid); const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 *size, int interrupt, - u64 source, bool from_bpf) { return data; } + u8 *data, size_t *buf_size, u32 *size, + int interrupt, u64 source, bool from_bpf) +{ + return data; +} static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, u8 *buf, u32 size, enum hid_report_type rtype, -- cgit v1.2.3 From 206342541fc887ae919774a43942dc883161fece Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 4 May 2026 10:47:23 +0200 Subject: HID: core: introduce hid_safe_input_report() hid_input_report() is used in too many places to have a commit that doesn't cross subsystem borders. Instead of changing the API, introduce a new one when things matters in the transport layers: - usbhid - i2chid This effectively revert to the old behavior for those two transport layers. Fixes: 0a3fe972a7cb ("HID: core: Mitigate potential OOB by removing bogus memset()") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 25 +++++++++++++++++++++++++ drivers/hid/i2c-hid/i2c-hid-core.c | 7 ++++--- drivers/hid/usbhid/hid-core.c | 11 ++++++----- include/linux/hid.h | 2 ++ 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index a806820df7e5..b3596851c719 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2181,6 +2181,7 @@ unlock: * @interrupt: distinguish between interrupt and control transfers * * This is data entry for lower layers. + * Legacy, please use hid_safe_input_report() instead. */ int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt) @@ -2191,6 +2192,30 @@ int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data } EXPORT_SYMBOL_GPL(hid_input_report); +/** + * hid_safe_input_report - report data from lower layer (usb, bt...) + * + * @hid: hid device + * @type: HID report type (HID_*_REPORT) + * @data: report contents + * @bufsize: allocated size of the data buffer + * @size: useful size of data parameter + * @interrupt: distinguish between interrupt and control transfers + * + * This is data entry for lower layers. + * Please use this function instead of the non safe version because we provide + * here the size of the buffer, allowing hid-core to make smarter decisions + * regarding the incoming buffer. + */ +int hid_safe_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, + size_t bufsize, u32 size, int interrupt) +{ + return __hid_input_report(hid, type, data, bufsize, size, interrupt, 0, + false, /* from_bpf */ + false /* lock_already_taken */); +} +EXPORT_SYMBOL_GPL(hid_safe_input_report); + bool hid_match_one_id(const struct hid_device *hdev, const struct hid_device_id *id) { diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 5a183af3d5c6..e0a302544cef 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -574,9 +574,10 @@ static void i2c_hid_get_input(struct i2c_hid *ihid) if (ihid->hid->group != HID_GROUP_RMI) pm_wakeup_event(&ihid->client->dev, 0); - hid_input_report(ihid->hid, HID_INPUT_REPORT, - ihid->inbuf + sizeof(__le16), - ret_size - sizeof(__le16), 1); + hid_safe_input_report(ihid->hid, HID_INPUT_REPORT, + ihid->inbuf + sizeof(__le16), + ihid->bufsize - sizeof(__le16), + ret_size - sizeof(__le16), 1); } return; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index fbbfc0f60829..5af93b9b1fb5 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -283,9 +283,9 @@ static void hid_irq_in(struct urb *urb) break; usbhid_mark_busy(usbhid); if (!test_bit(HID_RESUME_RUNNING, &usbhid->iofl)) { - hid_input_report(urb->context, HID_INPUT_REPORT, - urb->transfer_buffer, - urb->actual_length, 1); + hid_safe_input_report(urb->context, HID_INPUT_REPORT, + urb->transfer_buffer, urb->transfer_buffer_length, + urb->actual_length, 1); /* * autosuspend refused while keys are pressed * because most keyboards don't wake up when @@ -482,9 +482,10 @@ static void hid_ctrl(struct urb *urb) switch (status) { case 0: /* success */ if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_IN) - hid_input_report(urb->context, + hid_safe_input_report(urb->context, usbhid->ctrl[usbhid->ctrltail].report->type, - urb->transfer_buffer, urb->actual_length, 0); + urb->transfer_buffer, urb->transfer_buffer_length, + urb->actual_length, 0); break; case -ESHUTDOWN: /* unplug */ unplug = 1; diff --git a/include/linux/hid.h b/include/linux/hid.h index ac432a2ef415..bfb9859f391e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1030,6 +1030,8 @@ struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_ty int hid_set_field(struct hid_field *, unsigned, __s32); int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt); +int hid_safe_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, + size_t bufsize, u32 size, int interrupt); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); __s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code); -- cgit v1.2.3 From a991aa5e89365ba1959fae6847fd288125b209e5 Mon Sep 17 00:00:00 2001 From: Xu Rao Date: Sat, 9 May 2026 16:21:32 +0800 Subject: HID: i2c-hid: add reset quirk for BLTP7853 touchpad The BLTP7853 I2C HID touchpad may fail to probe after reboot or reprobe because reset completion is not signalled to the host. The driver then waits for the reset-complete interrupt until it times out and the device probe fails: i2c_hid i2c-BLTP7853:00: failed to reset device. i2c_hid i2c-BLTP7853:00: can't add hid device: -61 i2c_hid: probe of i2c-BLTP7853:00 failed with error -61 Add I2C_HID_QUIRK_NO_IRQ_AFTER_RESET for the device so i2c-hid does not wait for a reset interrupt that may never arrive. Signed-off-by: Xu Rao Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/i2c-hid/i2c-hid-core.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 8cfec7dced66..4657d96fb083 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -277,6 +277,9 @@ #define USB_VENDOR_ID_BIGBEN 0x146b #define USB_DEVICE_ID_BIGBEN_PS3OFMINIPAD 0x0902 +#define I2C_VENDOR_ID_BLTP 0x36b6 +#define I2C_PRODUCT_ID_BLTP7853 0xc001 + #define USB_VENDOR_ID_BTC 0x046e #define USB_DEVICE_ID_BTC_EMPREX_REMOTE 0x5578 #define USB_DEVICE_ID_BTC_EMPREX_REMOTE_2 0x5577 diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index e0a302544cef..3adb16366e93 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -149,6 +149,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_BOGUS_IRQ }, { I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_0D42, I2C_HID_QUIRK_DELAY_WAKEUP_AFTER_RESUME }, + { I2C_VENDOR_ID_BLTP, I2C_PRODUCT_ID_BLTP7853, + I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { 0, 0 } }; -- cgit v1.2.3 From 48d1677779ad6816978ad4a4f7588aec5ec960fe Mon Sep 17 00:00:00 2001 From: Tomasz Pakuła Date: Sun, 10 May 2026 14:23:52 +0200 Subject: HID: pidff: Fix integer overflow in pidff_rescale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rescaling values close to the max (U16_MAX) temporarily creates values that exceed the s32 range. This caused value overflow in case when, for example, a periodic effect phase was higer than 180 degrees. In turn, rescale function could return values outised of the logical range of the HID field. Fix by using 64 bit signed integer to store the value during calculation but still return only 32 bit integer. Closes: https://github.com/JacKeTUs/universal-pidff/issues/116 Fixes: 224ee88fe395 ("Input: add force feedback driver for PID devices") Cc: stable@vger.kernel.org Signed-off-by: Tomasz Pakuła Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-pidff.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c index aee8a4443305..c45f182d0448 100644 --- a/drivers/hid/usbhid/hid-pidff.c +++ b/drivers/hid/usbhid/hid-pidff.c @@ -11,6 +11,7 @@ #include "hid-pidff.h" #include #include +#include #include #include #include @@ -326,8 +327,10 @@ static s32 pidff_clamp(s32 i, struct hid_field *field) */ static int pidff_rescale(int i, int max, struct hid_field *field) { - return i * (field->logical_maximum - field->logical_minimum) / max + - field->logical_minimum; + /* 64 bits needed for big values during rescale */ + s64 result = field->logical_maximum - field->logical_minimum; + + return div_s64(result * i, max) + field->logical_minimum; } /* -- cgit v1.2.3 From 64ffa2e5e02ff54b23221d0282155f37283fabea Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Tue, 12 May 2026 13:22:44 +0000 Subject: HID: logitech-hidpp: Add support for newer Bluetooth keyboards Add product IDs (PIDs) for several newer Logitech Bluetooth keyboards to the hidpp_devices matching table, enabling full HID++ support for them. The added keyboards are: - Logitech Signature K650 & B2B - Logitech Pebble Keys 2 K380S - Logitech Casa Pop-Up Desk & B2B - Logitech Wave Keys & B2B - Logitech Signature Slim K950 & B2B - Logitech MX Keys S & B2B - Logitech Keys-To-Go 2 - Logitech Pop Icon Keys - Logitech MX Keys Mini & B2B - Logitech Signature Slim Solar+ K980 B2B - Logitech Bluetooth Keyboard K250/K251 - Logitech Signature Comfort K880 & B2B Signed-off-by: Alain Michaud Reviewed-by: Olivier Gay Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index b3ff9265377b..ccbf28869a96 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4685,6 +4685,44 @@ static const struct hid_device_id hidpp_devices[] = { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb391) }, { /* MX Master 4 mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb042) }, + { /* Logitech Signature K650 over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb36f) }, + { /* Logitech Signature K650 B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb370) }, + { /* Logitech Pebble Keys 2 K380S over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb377) }, + { /* Logitech Casa Pop-Up Desk over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb371) }, + { /* Logitech Casa Pop-Up Desk B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb374) }, + { /* Logitech Wave Keys over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb383) }, + { /* Logitech Wave Keys B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb384) }, + { /* Logitech Signature Slim K950 over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb386) }, + { /* Logitech Signature Slim K950 B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb388) }, + { /* Logitech MX Keys S over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb378) }, + { /* Logitech MX Keys S B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb380) }, + { /* Logitech Keys-To-Go 2 over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb38c) }, + { /* Logitech Pop Icon Keys over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb38f) }, + { /* Logitech MX Keys Mini over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb369) }, + { /* Logitech MX Keys Mini B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb36e) }, + { /* Logitech Signature Slim Solar+ K980 B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb394) }, + { /* Logitech Bluetooth Keyboard K250/K251 over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb397) }, + { /* Logitech Signature Comfort K880 over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb39c) }, + { /* Logitech Signature Comfort K880 B2B over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb39d) }, {} }; -- cgit v1.2.3 From aa16b2bc0f02709919e2435f531406531e5bcc69 Mon Sep 17 00:00:00 2001 From: Zack McKevitt Date: Thu, 30 Apr 2026 12:39:01 -0700 Subject: accel/qaic: Add overflow check to remap_pfn_range during mmap The call to remap_pfn_range in qaic_gem_object_mmap is susceptible to (re)mapping beyond the VMA if the BO is too large. This can cause use after free issues when munmap() unmaps only the VMA region and not the additional mappings. To prevent this, check the remaining size of the VMA before remapping and truncate the remapped length if sg->length is too large. Reported-by: Lukas Maar Fixes: ff13be830333 ("accel/qaic: Add datapath") Reviewed-by: Karol Wachowski Signed-off-by: Zack McKevitt Reviewed-by: Jeff Hugo [jhugo: fix braces from checkpatch --strict] Signed-off-by: Jeff Hugo Link: https://patch.msgid.link/20260430193858.1178641-1-zachary.mckevitt@oss.qualcomm.com --- drivers/accel/qaic/qaic_data.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 95300c2f7d8a..1e4c579d2725 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -606,8 +606,11 @@ static const struct vm_operations_struct drm_vm_ops = { static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct qaic_bo *bo = to_qaic_bo(obj); + unsigned long remap_start; unsigned long offset = 0; + unsigned long remap_end; struct scatterlist *sg; + unsigned long length; int ret = 0; if (drm_gem_is_imported(obj)) @@ -615,11 +618,27 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { if (sg_page(sg)) { + /* if sg is too large for the VMA, so truncate it to fit */ + if (check_add_overflow(vma->vm_start, offset, &remap_start)) + return -EINVAL; + if (check_add_overflow(remap_start, sg->length, &remap_end)) + return -EINVAL; + + if (remap_end > vma->vm_end) { + if (check_sub_overflow(vma->vm_end, remap_start, &length)) + return -EINVAL; + } else { + length = sg->length; + } + + if (length == 0) + goto out; + ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)), - sg->length, vma->vm_page_prot); + length, vma->vm_page_prot); if (ret) goto out; - offset += sg->length; + offset += length; } } -- cgit v1.2.3 From b7cdd59de5ae8062d2cb0121c429a271eb70daec Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 18:25:17 +0200 Subject: ACPI: PAD: xen: Check ACPI_COMPANION() against NULL Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the Xen variant of the ACPI processor aggregator device (PAD) driver. Fixes: 112b2f978afe ("ACPI: PAD: xen: Convert to a platform driver") Signed-off-by: Rafael J. Wysocki Acked-by: Juergen Gross Link: https://patch.msgid.link/3427762.aeNJFYEL58@rafael.j.wysocki --- drivers/xen/xen-acpi-pad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index 75a39862c1df..5b98e0e93807 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -110,9 +110,13 @@ static void acpi_pad_notify(acpi_handle handle, u32 event, static int acpi_pad_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; acpi_status status; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS); -- cgit v1.2.3 From aed3c3346765e4317bb2ec6ff872e1c952e128ab Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 9 May 2026 11:47:53 +0200 Subject: Documentation: security-bugs: do not systematically Cc the security team With the increase of automated reports, the security team is dealing with way more messages than really needed. The reporting process works well with most teams so there is no need to systematically involve the security team in reports. Let's suggest to keep it for small lists of recipients and new reporters only. This should continue to cover the risk of lost messages while reducing the volume from prolific reporters. Cc: Greg KH Cc: Leon Romanovsky Reviewed-by: Leon Romanovsky Reviewed-by: Greg Kroah-Hartman Signed-off-by: Willy Tarreau Signed-off-by: Jonathan Corbet Message-ID: <20260509094755.2838-2-w@1wt.eu> --- Documentation/process/security-bugs.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 27b028e85861..6dc525858125 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -148,7 +148,15 @@ run additional tests. Reports where the reporter does not respond promptly or cannot effectively discuss their findings may be abandoned if the communication does not quickly improve. -The report must be sent to maintainers, with the security team in ``Cc:``. +The report must be sent to maintainers. If there are two or fewer +recipients in your message, you must also always Cc: the Linux kernel +security team who will ensure the message is delivered to the proper +people, and will be able to assist small maintainer teams with processes +they may not be familiar with. For larger teams, Cc: the Linux kernel +security team for your first few reports or when seeking specific help, +such as when resending a message which got no response within a week. +Once you have become comfortable with the process for a few reports, it is +no longer necessary to Cc: the security list when sending to large teams. The Linux kernel security team can be contacted by email at . This is a private list of security officers who will help verify the bug report and assist developers working on a fix. -- cgit v1.2.3 From a03ef333fbd6cd861c8457c3d055ee3643a9baad Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 9 May 2026 11:47:54 +0200 Subject: Documentation: security-bugs: explain what is and is not a security bug The use of automated tools to find bugs in random locations of the kernel induces a raise of security reports even if most of them should just be reported as regular bugs. This patch is an attempt at drawing a line between what qualifies as a security bug and what does not, hoping to improve the situation and ease decision on the reporter's side. It defers the enumeration to a new file, threat-model.rst, that tries to enumerate various classes of issues that are and are not security bugs. This should permit to more easily update this file for various subsystem-specific rules without having to revisit the security bug reporting guide. Cc: Greg KH Cc: Leon Romanovsky Suggested-by: Leon Romanovsky Suggested-by: Greg KH Reviewed-by: Leon Romanovsky Reviewed-by: Shuah Khan Signed-off-by: Willy Tarreau Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jonathan Corbet Message-ID: <20260509094755.2838-3-w@1wt.eu> --- Documentation/process/index.rst | 1 + Documentation/process/security-bugs.rst | 38 ++++- Documentation/process/threat-model.rst | 236 ++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 Documentation/process/threat-model.rst diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index dbd6ea16aca7..aa7c959a52b8 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -86,6 +86,7 @@ regressions and security problems. debugging/index handling-regressions security-bugs + threat-model cve embargoed-hardware-issues diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 6dc525858125..54260dbfc64d 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -66,6 +66,42 @@ In addition, the following information are highly desirable: the issue appear. It is useful to share them, as they can be helpful to keep end users protected during the time it takes them to apply the fix. +What qualifies as a security bug +-------------------------------- + +It is important that most bugs are handled publicly so as to involve the widest +possible audience and find the best solution. By nature, bugs that are handled +in closed discussions between a small set of participants are less likely to +produce the best possible fix (e.g., risk of missing valid use cases, limited +testing abilities). + +It turns out that the majority of the bugs reported via the security team are +just regular bugs that have been improperly qualified as security bugs due to +a lack of awareness of the Linux kernel's threat model, as described in +Documentation/process/threat-model.rst, and ought to have been sent through +the normal channels described in Documentation/admin-guide/reporting-issues.rst +instead. + +The security list exists for urgent bugs that grant an attacker a capability +they are not supposed to have on a correctly configured production system, and +can be easily exploited, representing an imminent threat to many users. Before +reporting, consider whether the issue actually crosses a trust boundary on such +a system. + +**If you resorted to AI assistance to identify a bug, you must treat it as +public**. While you may have valid reasons to believe it is not, the security +team's experience shows that bugs discovered this way systematically surface +simultaneously across multiple researchers, often on the same day. In this +case, do not publicly share a reproducer, as this could cause unintended harm; +just mention that one is available and maintainers might ask for it privately +if they need it. + +If you are unsure whether an issue qualifies, err on the side of reporting +privately: the security team would rather triage a borderline report than miss +a real vulnerability. Reporting ordinary bugs to the security list, however, +does not make them move faster and consumes triage capacity that other reports +need. + Identifying contacts -------------------- @@ -74,7 +110,7 @@ affected subsystem's maintainers and Cc: the Linux kernel security team. Do not send it to a public list at this stage, unless you have good reasons to consider the issue as being public or trivial to discover (e.g. result of a widely available automated vulnerability scanning tool that can be repeated by -anyone). +anyone, or use of AI-based tools). If you're sending a report for issues affecting multiple parts in the kernel, even if they're fairly similar issues, please send individual messages (think diff --git a/Documentation/process/threat-model.rst b/Documentation/process/threat-model.rst new file mode 100644 index 000000000000..ecb432390e79 --- /dev/null +++ b/Documentation/process/threat-model.rst @@ -0,0 +1,236 @@ +.. _threatmodel: + +The Linux Kernel threat model +============================= + +There are a lot of assumptions regarding what the kernel does and does not +protect against. These assumptions tend to cause confusion for bug reports +(:doc:`security-related ones ` vs :doc:`non-security ones +<../admin-guide/reporting-issues>`), and can complicate security enforcement +when the responsibilities for some boundaries is not clear between the kernel, +distros, administrators and users. + +This document tries to clarify the responsibilities of the kernel in this +domain. + +The kernel's responsibilities +----------------------------- + +The kernel abstracts access to local hardware resources and to remote systems +in a way that allows multiple local users to get a fair share of the available +resources granted to them, and, when the underlying hardware permits, to assign +a level of confidentiality to their communications and to the data they are +processing or storing. + +The kernel assumes that the underlying hardware behaves according to its +specifications. This includes the integrity of the CPU's instruction set, the +transparency of the branch prediction unit and the cache units, the consistency +of the Memory Management Unit (MMU), the isolation of DMA-capable peripherals +(e.g., via IOMMU), state transitions in controllers, ranges of values read from +registers, the respect of documented hardware limitations, etc. + +When hardware fails to maintain its specified isolation (e.g., CPU bugs, +side-channels, hardware response to unexpected inputs), the kernel will usually +attempt to implement reasonable mitigations. These are best-effort measures +intended to reduce the attack surface or elevate the cost of an attack within +the limits of the hardware's facilities; they do not constitute a +kernel-provided safety guarantee. + +Users always perform their activities under the authority of an administrator +who is able to grant or deny various types of permissions that may affect how +users benefit from available resources, or the level of confidentiality of +their activities. Administrators may also delegate all or part of their own +permissions to some users, particularly via capabilities but not only. All this +is performed via configuration (sysctl, file-system permissions etc). + +The Linux Kernel applies a certain collection of default settings that match +its threat model. Distros have their own threat model and will come with their +own configuration presets, that the administrator may have to adjust to better +suit their expectations (relax or restrict). + +By default, the Linux Kernel guarantees the following protections when running +on common processors featuring privilege levels and memory management units: + +* **User-based isolation**: an unprivileged user may restrict access to their + own data from other unprivileged users running on the same system. This + includes: + + * stored data, via file system permissions + * in-memory data (pages are not accessible by default to other users) + * process activity (ptrace is not permitted to other users) + * inter-process communication (other users may not observe data exchanged via + UNIX domain sockets or other IPC mechanisms). + * network communications within the same or with other systems + +* **Capability-based protection**: + + * users not having the ``CAP_SYS_ADMIN`` capability may not alter the + kernel's configuration, memory nor state, change other users' view of the + file system layout, grant any user capabilities they do not have, nor + affect the system's availability (shutdown, reboot, panic, hang, or making + the system unresponsive via unbounded resource exhaustion). + * users not having the ``CAP_NET_ADMIN`` capability may not alter the network + configuration, intercept nor spoof network communications from other users + nor systems. + * users not having ``CAP_SYS_PTRACE`` may not observe other users' processes + activities. + +When ``CONFIG_USER_NS`` is set, the kernel also permits unprivileged users to +create their own user namespace in which they have all capabilities, but with a +number of restrictions (they may not perform actions that have impacts on the +initial user namespace, such as changing time, loading modules or mounting +block devices). Please refer to ``user_namespaces(7)`` for more details, the +possibilities of user namespaces are not covered in this document. + +The kernel also offers a lot of troubleshooting and debugging facilities, which +can constitute attack vectors when placed in wrong hands. While some of them +are designed to be accessible to regular local users with a low risk (e.g. +kernel logs via ``/proc/kmsg``), some would expose enough information to +represent a risk in most places and the decision to expose them is under the +administrator's responsibility (perf events, traces), and others are not +designed to be accessed by non-privileged users (e.g. debugfs). Access to these +facilities by a user who has been explicitly granted permission by an +administrator does not constitute a security breach. + +Bugs that permit to violate the principles above constitute security breaches. +However, bugs that permit one violation only once another one was already +achieved are only weaknesses. The kernel applies a number of self-protection +measures whose purpose is to avoid crossing a security boundary when certain +classes of bugs are found, but a failure of these extra protections do not +constitute a vulnerability alone. + +What does not constitute a security bug +--------------------------------------- + +In the Linux kernel's threat model, the following classes of problems are +**NOT** considered as Linux Kernel security bugs. However, when it is believed +that the kernel could do better, they should be reported, so that they can be +reviewed and fixed where reasonably possible, but they will be handled as any +regular bug: + +* **Configuration**: + + * outdated kernels and particularly end-of-life branches are out of the scope + of the kernel's threat model: administrators are responsible for keeping + their system up to date. For a bug to qualify as a security bug, it must be + demonstrated that it affects actively maintained versions. + + * build-level: changes to the kernel configuration that are explicitly + documented as lowering the security level (e.g. ``CONFIG_NOMMU``), or + targeted at developers only. + + * OS-level: changes to command line parameters, sysctls, filesystem + permissions, user capabilities, exposure of privileged interfaces, that + explicitly increase exposure by either offering non-default access to + unprivileged users, or reduce the kernel's ability to enforce some + protections or mitigations. Example: write access to procfs or debugfs. + + * issues triggered only when using features intended for development or + debugging (e.g., LOCKDEP, KASAN, FAULT_INJECTION): these features are known + to introduce overhead and potential instability and are not intended for + production use. + + * issues affecting drivers exposed under CONFIG_STAGING, as well as features + marked EXPERIMENTAL in the configuration. + + * loading of explicitly insecure/broken/staging modules, and generally any + using any subsystem marked as experimental or not intended for production + use. + + * running out-of-tree modules or unofficial kernel forks; these should be + reported to the relevant vendor. + +* **Excess of initial privileges**: + + * actions performed by a user already possessing the privileges required to + perform that action or modify that state (e.g. ``CAP_SYS_ADMIN``, + ``CAP_NET_ADMIN``, ``CAP_SYS_RAWIO``, ``CAP_SYS_MODULE`` with no further + boundary being crossed). + + * actions performed in user namespace that do not bypass the restrictions + imposed to the initial user (e.g. ptrace usage, signal delivery, resource + usage, access to FS/device/sysctl/memory, network binding, system/network + configuration etc). + + * anything performed by the root user in the initial namespace (e.g. kernel + oops when writing to a privileged device). + +* **Out of production use**: + + This covers theoretical/probabilistic attacks that rely on laboratory + conditions with zero system noise, or those requiring an unrealistic number + of attempts (e.g., billions of trials) that would be detected by standard + system monitoring long before success, such as: + + * prediction of random numbers that only works in a totally silent + environment (such as IP ID, TCP ports or sequence numbers that can only be + guessed in a lab). + + * activity observation and information leaks based on probabilistic + approaches that are prone to measurement noise and not realistically + reproducible on a production system. + + * issues that can only be triggered by heavy attacks (e.g. brute force) whose + impact on the system makes it unlikely or impossible to remain undetected + before they succeed (e.g. consuming all memory before succeeding). + + * problems seen only under development simulators, emulators, or combinations + that do not exist on real systems at the time of reporting (issues + involving tens of millions of threads, tens of thousands of CPUs, + unrealistic CPU frequencies, RAM sizes or disk capacities, network speeds. + + * issues whose reproduction requires hardware modification or emulation, + including fake USB devices that pretend to be another one. + + * as well as issues that can be triggered at a cost that is orders of + magnitude higher than the expected benefits (e.g. fully functional keyboard + emulator only to retrieve 7 uninitialized bytes in a structure, or + brute-force method involving millions of connection attempts to guess a + port number). + +* **Hardening failures**: + + * ability to bypass some of the kernel's hardening measures with no + demonstrable exploit path (e.g. ASLR bypass, events timing or probing with + no demonstrable consequence). These are just weaknesses, not + vulnerabilities. + + * missing argument checks and failure to report certain errors with no + immediate consequence. + +* **Random information leaks**: + + This concerns information leaks of small data parts that happen to be there + and that cannot be chosen by the attacker, or face access restrictions: + + * structure padding reported by syscalls or other interfaces. + + * identifiers, partial data, non-terminated strings reported in error + messages. + + * Leaks of kernel memory addresses/pointers do not constitute an immediately + exploitable vector and are not security bugs, though they must be reported + and fixed. + +* **Crafted file system images**: + + * bugs triggered by mounting a corrupted or maliciously crafted file system + image are generally not security bugs, as the kernel assumes the underlying + storage media is under the administrator's control, unless the filesystem + driver is specifically documented as being hardened against untrusted media. + + * issues that are resolved, mitigated, or detected by running a filesystem + consistency check (fsck) on the image prior to mounting. + +* **Physical access**: + + Issues that require physical access to the machine, hardware modification, or + the use of specialized hardware (e.g., logic analyzers, DMA-attack tools over + PCI-E/Thunderbolt) are out of scope unless the system is explicitly + configured with technologies meant to defend against such attacks + (e.g. IOMMU). + +* **Functional and performance regressions**: + + Any issue that can be mitigated by setting proper permissions and limits + doesn't qualify as a security bug. -- cgit v1.2.3 From 4bf85afb9f3ecd7c3b5d15a85b0902f8e725cd06 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 9 May 2026 11:47:55 +0200 Subject: Documentation: security-bugs: clarify requirements for AI-assisted reports AI tools are increasingly used to assist in bug discovery. While these tools can identify valid issues, reports that are submitted without manual verification often lack context, contain speculative impact assessments, or include unnecessary formatting. Such reports increase triage effort, waste maintainers' time and may be ignored. Reports where the reporter has verified the issue and the proposed fix typically meet quality standards. This documentation outlines specific requirements for length, formatting, and impact evaluation to reduce the effort needed to deal with these reports. Cc: Greg KH Acked-by: Greg Kroah-Hartman Reviewed-by: Leon Romanovsky Signed-off-by: Willy Tarreau Signed-off-by: Jonathan Corbet Message-ID: <20260509094755.2838-4-w@1wt.eu> --- Documentation/process/security-bugs.rst | 57 +++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 54260dbfc64d..f85c65f31f12 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -167,6 +167,63 @@ the Linux kernel security team only. Your message will be triaged, and you will receive instructions about whom to contact, if needed. Your message may equally be forwarded as-is to the relevant maintainers. +Responsible use of AI to find bugs +---------------------------------- + +A significant fraction of bug reports submitted to the security team are +actually the result of code reviews assisted by AI tools. While this can be an +efficient means to find bugs in rarely explored areas, it causes an overload on +maintainers, who are sometimes forced to ignore such reports due to their poor +quality or accuracy. As such, reporters must be particularly cautious about a +number of points which tend to make these reports needlessly difficult to +handle: + + * **Length**: AI-generated reports tend to be excessively long, containing + multiple sections and excessive detail. This makes it difficult to spot + important information such as affected files, versions, and impact. Please + ensure that a clear summary of the problem and all critical details are + presented first. Do not require triage engineers to scan multiple pages of + text. Configure your tools to produce concise, human-style reports. + + * **Formatting**: Most AI-generated reports are littered with Markdown tags. + These decorations complicate the search for important information and do + not survive the quoting processes involved in forwarding or replying. + Please **always convert your report to plain text** without any formatting + decorations before sending it. + + * **Impact Evaluation**: Many AI-generated reports lack an understanding of + the kernel's threat model and go to great lengths inventing theoretical + consequences. This adds noise and complicates triage. Please stick to + verifiable facts (e.g., "this bug permits any user to gain CAP_NET_ADMIN") + without enumerating speculative implications. Have your tool read this + documentation as part of the evaluation process. + + * **Reproducer**: AI-based tools are often capable of generating reproducers. + Please always ensure your tool provides one and **test it thoroughly**. If + the reproducer does not work, or if the tool cannot produce one, the + validity of the report should be seriously questioned. Note that since the + report will be posted to a public list, the reproducer should only be + shared upon maintainers' request. + + * **Propose a Fix**: Many AI tools are actually better at writing code than + evaluating it. Please ask your tool to propose a fix and **test it** before + reporting the problem. If the fix cannot be tested because it relies on + rare hardware or almost extinct network protocols, the issue is likely not + a security bug. In any case, if a fix is proposed, it must adhere to + Documentation/process/submitting-patches.rst and include a 'Fixes:' tag + designating the commit that introduced the bug. + +Failure to consider these points exposes your report to the risk of being +ignored. + +Use common sense when evaluating the report. If the affected file has not been +touched for more than one year and is maintained by a single individual, it is +likely that usage has declined and exposed users are virtually non-existent +(e.g., drivers for very old hardware, obsolete filesystems). In such cases, +there is no need to consume a maintainer's time with an unimportant report. If +the issue is clearly trivial and publicly discoverable, you should report it +directly to the public mailing lists. + Sending the report ------------------ -- cgit v1.2.3 From 6f89d96fff65aec1ff12bc566fca0eb1bb59e16e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 18:26:54 +0200 Subject: Input: atlas - check ACPI_COMPANION() against NULL Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the atlas_btns driver. Fixes: b8303880b641 ("Input: atlas - convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/8696590.T7Z3S40VBb@rafael.j.wysocki Signed-off-by: Dmitry Torokhov --- drivers/input/misc/atlas_btns.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c index 47b31725e850..835ad45a9d65 100644 --- a/drivers/input/misc/atlas_btns.c +++ b/drivers/input/misc/atlas_btns.c @@ -60,11 +60,15 @@ static acpi_status acpi_atlas_button_handler(u32 function, static int atlas_acpi_button_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; acpi_status status; int i; int err; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + input_dev = input_allocate_device(); if (!input_dev) { pr_err("unable to allocate input device\n"); -- cgit v1.2.3 From 793d2a057f7e7bc647c5401413f7bf7d4f08b969 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 May 2026 21:54:51 +0200 Subject: hwmon: (acpi_power_meter) Check ACPI_COMPANION() against NULL Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the acpi_power_meter hwmon driver. Fixes: afc6c4aedea5 ("hwmon: (acpi_power_meter) Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/5068745.GXAFRqVoOG@rafael.j.wysocki Signed-off-by: Guenter Roeck --- drivers/hwmon/acpi_power_meter.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index be7f702dcde9..0c9b9f4180fb 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -884,10 +884,14 @@ static void acpi_power_meter_notify(acpi_handle handle, u32 event, void *data) static int acpi_power_meter_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct acpi_power_meter_resource *resource; + struct acpi_device *device; int res; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + resource = kzalloc_obj(*resource); if (!resource) return -ENOMEM; -- cgit v1.2.3 From f06035ab324e52a845079c2e5f2380fa3cebde9b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 May 2026 21:56:14 +0200 Subject: hwmon: (asus_atk0110) Check ACPI_COMPANION() against NULL Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_HANDLE() check against NULL to the asus_atk0110 hwmon driver. Fixes: ee1752590733 ("hwmon: (asus_atk0110) Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/2261594.irdbgypaU6@rafael.j.wysocki Signed-off-by: Guenter Roeck --- drivers/hwmon/asus_atk0110.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index 5688ff5f7c28..109318b0434d 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -1273,15 +1273,20 @@ static int atk_probe(struct platform_device *pdev) struct acpi_buffer buf; union acpi_object *obj; struct atk_data *data; + acpi_handle handle; dev_dbg(&pdev->dev, "adding...\n"); + handle = ACPI_HANDLE(&pdev->dev); + if (!handle) + return -ENODEV; + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->dev = &pdev->dev; - data->atk_handle = ACPI_HANDLE(&pdev->dev); + data->atk_handle = handle; INIT_LIST_HEAD(&data->sensor_list); data->disable_ec = false; -- cgit v1.2.3 From d289478cfc0bcf81c7914200d6abdcb78bd04ded Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Tue, 12 May 2026 09:29:30 +0200 Subject: libceph: handle rbtree insertion error in decode_choose_args() A message of type CEPH_MSG_OSD_MAP contains an OSD map that itself contains a CRUSH map. The received CRUSH map may optionally contain choose_args that get decoded in decode_choose_args(). In this function, num_choose_arg_maps is read from the message, and a corresponding number of crush_choose_arg_maps gets decoded afterwards. Each crush_choose_arg_map has a choose_args_index, which serves as the key when inserting it into the choose_args rbtree of the decoded crush_map. If a (potentially corrupted) message contains two crush_choose_arg_maps with the same index, the assertion in insert_choose_arg_map() triggers a kernel BUG when trying to insert the second crush_choose_arg_map. This patch fixes the issue by switching to the non-asserting rbtree insertion function and rejecting the message if the insertion fails. [ idryomov: changelog ] Cc: stable@vger.kernel.org Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 2095e73ccf6c..0752a3808b91 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -392,7 +392,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c) goto e_inval; } - insert_choose_arg_map(&c->choose_args, arg_map); + if (!__insert_choose_arg_map(&c->choose_args, arg_map)) { + ret = -EEXIST; + goto fail; + } } return 0; -- cgit v1.2.3 From 28b0a2ab8c82d0bbdeb8013029c67c978ce6e4bf Mon Sep 17 00:00:00 2001 From: Raphael Zimmer Date: Tue, 12 May 2026 18:16:40 +0200 Subject: libceph: Fix potential null-ptr-deref in decode_choose_args() A message of type CEPH_MSG_OSD_MAP contains an OSD map that itself contains a CRUSH map. When decoding this CRUSH map in crush_decode(), an array of max_buckets CRUSH buckets is decoded, where some indices may not refer to actual buckets and are therefore set to NULL. The received CRUSH map may optionally contain choose_args that get decoded in decode_choose_args(). When decoding a crush_choose_arg_map, a series of choose_args for different buckets is decoded, with the bucket_index being read from the incoming message. It is only checked that the bucket index does not exceed max_buckets, but not that it doesn't point to an index with a NULL bucket. If a (potentially corrupted) message contains a crush_choose_arg_map including such a bucket_index, a null pointer dereference may occur in the subsequent processing when attempting to access the bucket with the given index. This patch fixes the issue by extending the affected check. Now, it is only attempted to access the bucket if it is not NULL. Cc: stable@vger.kernel.org Signed-off-by: Raphael Zimmer Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/osdmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 0752a3808b91..8b5b0587a0cf 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -388,7 +388,8 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c) goto fail; if (arg->ids_size && - arg->ids_size != c->buckets[bucket_index]->size) + (!c->buckets[bucket_index] || + arg->ids_size != c->buckets[bucket_index]->size)) goto e_inval; } -- cgit v1.2.3 From e4a640475e43f406fdfd56d370b1f34b0cbbc18d Mon Sep 17 00:00:00 2001 From: Sergio Correia Date: Tue, 12 May 2026 14:28:33 +0100 Subject: audit: fix incorrect inheritable capability in CAPSET records __audit_log_capset() records the effective capability set into the inheritable field due to a copy-paste error. Every CAPSET audit record therefore reports cap_pi (process inheritable) with the value of cap_effective instead of cap_inheritable. This silently corrupts audit data used for compliance and forensic analysis: an attacker who modifies inheritable capabilities to prepare for a privilege-escalating exec would have the change masked in the audit trail. The bug has been present since the original introduction of CAPSET audit records in 2008. Cc: stable@vger.kernel.org Fixes: e68b75a027bb ("When the capset syscall is used it is not possible for audit to record the actual capbilities being added/removed. This patch adds a new record type which emits the target pid and the eff, inh, and perm cap sets.") Reviewed-by: Ricardo Robaina Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Sergio Correia Signed-off-by: Paul Moore --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ab54fccba215..abdf8da3be93 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2786,7 +2786,7 @@ void __audit_log_capset(const struct cred *new, const struct cred *old) context->capset.pid = task_tgid_nr(current); context->capset.cap.effective = new->cap_effective; - context->capset.cap.inheritable = new->cap_effective; + context->capset.cap.inheritable = new->cap_inheritable; context->capset.cap.permitted = new->cap_permitted; context->capset.cap.ambient = new->cap_ambient; context->type = AUDIT_CAPSET; -- cgit v1.2.3 From f9e1c1324b4d98d591a6f7568fdebf5cf456dfc2 Mon Sep 17 00:00:00 2001 From: Sergio Correia Date: Tue, 12 May 2026 14:28:59 +0100 Subject: audit: enforce AUDIT_LOCKED for AUDIT_TRIM and AUDIT_MAKE_EQUIV AUDIT_ADD_RULE and AUDIT_DEL_RULE correctly check for AUDIT_LOCKED and return -EPERM, but AUDIT_TRIM and AUDIT_MAKE_EQUIV do not. This allows a process with CAP_AUDIT_CONTROL to modify directory tree watches and equivalence mappings even when the audit configuration has been locked, undermining the purpose of the lock. Add AUDIT_LOCKED checks to both commands. Cc: stable@vger.kernel.org Reviewed-by: Ricardo Robaina Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Sergio Correia Signed-off-by: Paul Moore --- kernel/audit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/audit.c b/kernel/audit.c index e1d489bc2dff..34dc7cb246ff 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1468,6 +1468,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, err = audit_list_rules_send(skb, seq); break; case AUDIT_TRIM: + if (audit_enabled == AUDIT_LOCKED) + return -EPERM; audit_trim_trees(); audit_log_common_recv_msg(audit_context(), &ab, AUDIT_CONFIG_CHANGE); @@ -1480,6 +1482,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, size_t msglen = data_len; char *old, *new; + if (audit_enabled == AUDIT_LOCKED) + return -EPERM; err = -EINVAL; if (msglen < 2 * sizeof(u32)) break; -- cgit v1.2.3 From 577a8d3bae0531f0e5ccfac919cd8192f920a804 Mon Sep 17 00:00:00 2001 From: Aaron Sacks Date: Tue, 12 May 2026 02:07:42 -0400 Subject: KVM: Reject wrapped offset in kvm_reset_dirty_gfn() kvm_reset_dirty_gfn() guards the gfn range with if (!memslot || (offset + __fls(mask)) >= memslot->npages) return; but offset is u64 and the addition is unchecked. The check can be silently bypassed by a u64 wrap. The dirty ring backing those entries is MAP_SHARED at KVM_DIRTY_LOG_PAGE_OFFSET of the vcpu fd, so the VMM can rewrite the slot and offset fields of any entry between when the kernel pushes them and when KVM_RESET_DIRTY_RINGS consumes them. On reset, kvm_dirty_ring_reset() re-reads the values via READ_ONCE() and feeds them straight back into this check; only the flags handshake is treated as the handover, the slot/offset payload is taken on trust. Crafting two entries entry[i].offset = 0xffffffffffffffc1 entry[i+1].offset = 0 makes the coalescing loop in kvm_dirty_ring_reset() compute delta = (s64)(0 - 0xffffffffffffffc1) = 63 which falls in [0, BITS_PER_LONG), so it folds entry[i+1] into the existing mask by setting bit 63. The trailing kvm_reset_dirty_gfn() call then sees offset = 0xffffffffffffffc1 and __fls(mask) = 63; the sum is 0 in u64 and the bounds check passes. That offset propagates into kvm_arch_mmu_enable_log_dirty_pt_masked() unchanged. On the legacy MMU path -- kvm_memslots_have_rmaps() == true, i.e. shadow paging, any VM that has allocated shadow roots, or a write-tracked slot -- it reaches gfn_to_rmap(), which indexes slot->arch.rmap[0][] with a near-U64_MAX gfn. That is an out-of-bounds load of a kvm_rmap_head, followed by a conditional clear of PT_WRITABLE_MASK in whatever the loaded pointer points at. The path is reachable from any process holding /dev/kvm. Range-check offset on its own first, so the addition cannot wrap. memslot->npages is bounded well below U64_MAX, so once offset < npages holds, offset + __fls(mask) (with __fls(mask) < BITS_PER_LONG) stays in range. Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking") Cc: stable@vger.kernel.org Signed-off-by: Aaron Sacks Link: https://patch.msgid.link/20260512060742.1628959-1-contact@xchglabs.com/ Signed-off-by: Paolo Bonzini --- virt/kvm/dirty_ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index 02bc6b00d76c..572b854edf74 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -63,7 +63,8 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); - if (!memslot || (offset + __fls(mask)) >= memslot->npages) + if (!memslot || offset >= memslot->npages || + offset + __fls(mask) >= memslot->npages) return; KVM_MMU_LOCK(kvm); -- cgit v1.2.3 From 2b72f1674e427c56e3772c5ccf785fdda2138820 Mon Sep 17 00:00:00 2001 From: Qiang Ma Date: Tue, 12 May 2026 09:53:13 +0800 Subject: KVM: x86: Fix Xen hypercall tracepoint argument assignment TRACE_EVENT(kvm_xen_hypercall) stores a5 in __entry->a4 instead of __entry->a5. That overwrites the recorded a4 argument and leaves a5 unset in the trace entry. Fix the typo so both arguments are captured correctly. Signed-off-by: Qiang Ma Link: https://patch.msgid.link/20260512015313.1685784-1-maqianga@uniontech.com/ Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index e7fdbe9efc90..0db25bba17f6 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -154,7 +154,7 @@ TRACE_EVENT(kvm_xen_hypercall, __entry->a2 = a2; __entry->a3 = a3; __entry->a4 = a4; - __entry->a4 = a5; + __entry->a5 = a5; ), TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", -- cgit v1.2.3 From 5bd1ddb7911ba7e94b61cf429970963f1b22dd76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 8 May 2026 14:33:21 -0700 Subject: KVM: nSVM: Never use L0's PAUSE loop exiting while L2 is running Never use L0's (KVM's) PAUSE loop exiting controls while L2 is running, and instead always configure vmcb02 according to L1's exact capabilities and desires. The purpose of intercepting PAUSE after N attempts is to detect when the vCPU may be stuck waiting on a lock, so that KVM can schedule in a different vCPU that may be holding said lock. Barring a very interesting setup, L1 and L2 do not share locks, and it's extremely unlikely that an L1 vCPU would hold a spinlock while running L2. I.e. having a vCPU executing in L1 yield to a vCPU running in L2 will not allow the L1 vCPU to make forward progress, and vice versa. While teaching KVM's "on spin" logic to only yield to other vCPUs in L2 is doable, in all likelihood it would do more harm than good for most setups. KVM has limited visibility into which L2 "vCPUs" belong to the same VM, and thus share a locking domain. And even if L2 vCPUs are in the same VM, KVM has no visilibity into L2 vCPU's that are scheduled out by the L1 hypervisor. Furthermore, KVM doesn't actually steal PAUSE exits from L1. If L1 is intercepting PAUSE, KVM will route PAUSE exits to L1, not L0, as nested_svm_intercept() gives priority to the vmcb12 intercept. As such, overriding the count/threshold fields in vmcb02 with vmcb01's values is nonsensical, as doing so clobbers all the training/learning that has been done in L1. Even worse, if L1 is not intercepting PAUSE, i.e. KVM is handling PAUSE exits, then KVM will adjust the PLE knobs based on L2 behavior, which could very well be detrimental to L1, e.g. due to essentially poisoning L1 PLE training with bad data. And copying the count from vmcb02 to vmcb01 on a nested VM-Exit makes even less sense, because again, the purpose of PLE is to detect spinning vCPUs. Whether or not a vCPU is spinning in L2 at the time of a nested VM-Exit has no relevance as to the behavior of the vCPU when it executes in L1. The only scenarios where any of this actually works is if at least one of KVM or L1 is NOT intercepting PAUSE for the guest. Per the original changelog, those were the only scenarios considered to be supported. Disabling KVM's use of PLE makes it so the VM is always in a "supported" mode. Last, but certainly not least, using KVM's count/threshold instead of the values provided by L1 is a blatant violation of the SVM architecture. Fixes: 74fd41ed16fd ("KVM: x86: nSVM: support PAUSE filtering when L0 doesn't intercept PAUSE") Cc: Maxim Levitsky Tested-by: David Kaplan Signed-off-by: Sean Christopherson Link: https://patch.msgid.link/20260508213321.373309-1-seanjc@google.com/ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 43 ++++++++++++++----------------------------- arch/x86/kvm/svm/svm.c | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 961804df5f45..b340dc9991ad 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -160,6 +160,16 @@ void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm) if (!intercept_smi) vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI); + /* + * Intercept PAUSE if and only if L1 wants to. KVM intercepts PAUSE so + * that a vCPU that may be spinning waiting for a lock can be scheduled + * out in favor of the vCPU that holds said lock. KVM doesn't support + * yielding across L2 vCPUs, as KVM has limited visilibity into which + * L2 vCPUs are in the same L2 VM, i.e. may be contending for locks. + */ + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) + vmcb_clr_intercept(&vmcb02->control, INTERCEPT_PAUSE); + if (nested_vmcb_needs_vls_intercept(svm)) { /* * If the virtual VMLOAD/VMSAVE is not enabled for the L2, @@ -819,7 +829,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct kvm_vcpu *vcpu = &svm->vcpu; - u32 pause_count12, pause_thresh12; nested_svm_transition_tlb_flush(vcpu); @@ -947,31 +956,13 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE; if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER)) - pause_count12 = vmcb12_ctrl->pause_filter_count; + vmcb02->control.pause_filter_count = vmcb12_ctrl->pause_filter_count; else - pause_count12 = 0; + vmcb02->control.pause_filter_count = 0; if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD)) - pause_thresh12 = vmcb12_ctrl->pause_filter_thresh; + vmcb02->control.pause_filter_thresh = vmcb12_ctrl->pause_filter_thresh; else - pause_thresh12 = 0; - if (kvm_pause_in_guest(svm->vcpu.kvm)) { - /* use guest values since host doesn't intercept PAUSE */ - vmcb02->control.pause_filter_count = pause_count12; - vmcb02->control.pause_filter_thresh = pause_thresh12; - - } else { - /* start from host values otherwise */ - vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; - vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; - - /* ... but ensure filtering is disabled if so requested. */ - if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) { - if (!pause_count12) - vmcb02->control.pause_filter_count = 0; - if (!pause_thresh12) - vmcb02->control.pause_filter_thresh = 0; - } - } + vmcb02->control.pause_filter_thresh = 0; /* * Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to @@ -1298,12 +1289,6 @@ void nested_svm_vmexit(struct vcpu_svm *svm) /* in case we halted in L2 */ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); - if (!kvm_pause_in_guest(vcpu->kvm)) { - vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; - vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); - - } - /* * Invalidate last_bus_lock_rip unless KVM is still waiting for the * guest to make forward progress before re-enabling bus lock detection. diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e7fdd7a9c280..e02a38da5296 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -913,7 +913,15 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm)) + /* Adjusting pause_filter_count makes no sense if PLE is disabled. */ + WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm)); + + /* + * While running L2, KVM should intercept PAUSE if and only if L1 wants + * to intercept PAUSE, and L1's intercept should take priority, i.e. + * KVM should never handle a PAUSE intercept from L2. + */ + if (WARN_ON_ONCE(is_guest_mode(vcpu))) return; control->pause_filter_count = __grow_ple_window(old, @@ -934,7 +942,10 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count; - if (kvm_pause_in_guest(vcpu->kvm)) + /* Adjusting pause_filter_count makes no sense if PLE is disabled. */ + WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm)); + + if (is_guest_mode(vcpu)) return; control->pause_filter_count = -- cgit v1.2.3 From 80f4a7b8ce7513c203562191426e4d4cc635b095 Mon Sep 17 00:00:00 2001 From: Ninad Naik Date: Mon, 11 May 2026 23:13:02 +0530 Subject: Documentation: kvm: update links in the references section of AMD Memory Encryption Replace non-working links in the reference section with the working ones. Signed-off-by: Ninad Naik Link: https://patch.msgid.link/20260511174302.811918-1-ninadnaik07@gmail.com/ Reviewed-by: Liam Merwick Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/x86/amd-memory-encryption.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index b2395dd4769d..bd04a908a8db 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -656,8 +656,8 @@ References See [white-paper]_, [api-spec]_, [amd-apm]_, [kvm-forum]_, and [snp-fw-abi]_ for more info. -.. [white-paper] https://developer.amd.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf -.. [api-spec] https://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf -.. [amd-apm] https://support.amd.com/TechDocs/24593.pdf (section 15.34) +.. [white-paper] https://docs.amd.com/v/u/en-US/memory-encryption-white-paper +.. [api-spec] https://docs.amd.com/v/u/en-US/55766_PUB_3.24_SEV_API +.. [amd-apm] https://docs.amd.com/v/u/en-US/24593_3.44_APM_Vol2 (section 15.34) .. [kvm-forum] https://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf -.. [snp-fw-abi] https://www.amd.com/system/files/TechDocs/56860.pdf +.. [snp-fw-abi] https://www.amd.com/content/dam/amd/en/documents/developer/56860.pdf -- cgit v1.2.3 From 87c810160ed738cd983e4a65ebe9709927c702c9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 12 May 2026 08:56:34 -0700 Subject: KVM: selftests: Ensure gmem file sizes are multiple of host page size When creating a guest_memfd file and associated memslot to validate shared guest memory, size the file+memslot to the maximum of the host or guest page size. Attempting to allocate a single guest page will fail if the host page size is greater than the guest page size, as KVM requires that the size of memslots and guest_memfd files are a multiple of the host page size. For simplicity, verify the entire file can be shared between guest and host, e.g. instead of trying to validate "partial" mappings. Fixes: 42188667be38 ("KVM: selftests: Add guest_memfd testcase to fault-in on !mmap()'d memory") Reported-by: Zenghui Yu Closes: https://lore.kernel.org/all/0064952b-048c-455d-ad89-e27e5cb82591@linux.dev Signed-off-by: Sean Christopherson Message-ID: <20260512155634.772602-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/guest_memfd_test.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index d6528c6f5e03..253e748c1d4a 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -510,7 +510,12 @@ static void test_guest_memfd_guest(void) "Default VM type should support INIT_SHARED, supported flags = 0x%x", vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); - size = vm->page_size; + /* + * Use the max of the host or guest page size for all operations, as + * KVM requires guest_memfd files and memslots to be sized to multiples + * of the host page size. + */ + size = max_t(size_t, vm->page_size, page_size); fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); @@ -519,7 +524,7 @@ static void test_guest_memfd_guest(void) memset(mem, 0xaa, size); kvm_munmap(mem, size); - virt_pg_map(vm, gpa, gpa); + virt_map(vm, gpa, gpa, size / vm->page_size); vcpu_args_set(vcpu, 2, gpa, size); vcpu_run(vcpu); -- cgit v1.2.3 From 6b72d0578ca6f77b835d773d7c77c2f872d3e924 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 3 May 2026 23:09:17 +0200 Subject: KVM: x86: use again the flush argument of __link_shadow_page() Except in the case of parentless nested-TDP pages, mmu_page_zap_pte() clears the SPTE but leaves the invalid_list empty. In this case, using kvm_flush_remote_tlbs() as kvm_mmu_remote_flush_or_zap() does is overkill. Avoid flushing the entirety of the remote TLBs unless the invalid_list was populated: instead, use a more efficient gfn-targeting flush (if available) and skip it altogether if the caller guarantees that a TLB flush is not necessary. Based-on: <20260503201029.106481-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini Message-ID: <20260503210917.121840-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 892246204435..f0144ae8d891 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2526,6 +2526,23 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) __shadow_walk_next(iterator, *iterator->sptep); } +/* + * Note: while normally KVM uses a "bool flush" return value to let + * the caller batch flushes, __link_shadow_page() flushes immediately + * before populating the parent PTE with the new shadow page. The + * typical callers, direct_map() and FNAME(fetch)(), are not going + * to zap more than one huge SPTE anyway. + * + * The only exception, where @flush can be false, is when a huge SPTE + * is replaced with a shadow page SPTE with a fully populated page table, + * which can happen from shadow_mmu_split_huge_page(). In this case, + * no memory is unmapped across the change to the page tables and no + * immediate flush is needed for correctness. + * + * Even in that case, calls to kvm_mmu_commit_zap_page() are not + * batched. Doing so would require adding an invalid_list argument + * all the way down to __walk_slot_rmaps(). + */ static void __link_shadow_page(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, u64 *sptep, struct kvm_mmu_page *sp, bool flush) @@ -2541,8 +2558,10 @@ static void __link_shadow_page(struct kvm *kvm, parent_sp = sptep_to_sp(sptep); WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K); - mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list); - kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true); + if (mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list)) + kvm_mmu_commit_zap_page(kvm, &invalid_list); + else if (flush) + kvm_flush_remote_tlbs_sptep(kvm, sptep); } spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp)); -- cgit v1.2.3 From 3098c076c83ea2913245cb915cdcba98eb24214c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 May 2026 14:35:14 -0700 Subject: KVM: x86: Swap the dst and src operand for MOVNTDQA Swap the MOVNTDQA operands, as MOVNTDQA does NOT in fact have "the same characteristics as 0F E7 (MOVNTDQ)"; MOVNTDQA loads from memory and stores to registers, while MOVNTDQ loads from registers and stores to memory. Per the SDM: MOVNTDQ - Move packed integer values in xmm1 to m128 using non-temporal hint. MOVNTDQA - Move double quadword from m128 to xmm1 using non-temporal hint if WC memory type. Reported-by: Josh Eads Fixes: c57d9bafbd0b ("KVM: x86: Add support for emulating MOVNTDQA") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20260506213514.2781948-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c8c6cc0406d6..8013dccb3110 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4481,7 +4481,7 @@ static const struct opcode opcode_map_0f_38[256] = { X16(N), X16(N), /* 0x20 - 0x2f */ X8(N), - X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N, + X2(N), GP(SrcMem | DstReg | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N, /* 0x30 - 0x7f */ X16(N), X16(N), X16(N), X16(N), X16(N), /* 0x80 - 0xef */ -- cgit v1.2.3 From 39e25a2100604320e8d9df54c6c31258f7a3df29 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 May 2026 10:30:00 -1000 Subject: sched_ext: Drop NONE early return in scx_disable_and_exit_task() d3e73a0808dd ("sched_ext: Handle SCX_TASK_NONE in disable/switched_from paths") skipped the trailing scx_set_task_sched(p, NULL) on NONE tasks. After scx_fail_parent() parks a task at NONE/sched=parent and the parent is later freed via queue_rcu_work() during root_disable, the preserved p->scx.sched dangles - print_scx_info() from sched_show_task() reads sch->ops.name from freed memory. Drop the early return. __scx_disable_and_exit_task() already short- circuits on NONE and the SUB_INIT block was cleared by scx_fail_parent()'s earlier call, so clearing p->scx.sched is the only work left - and the one thing the path actually needs. v2: Extend the SUB_INIT block comment to note that the flag is only set on the sub-enable path, so it's always clear on the NONE re-entry (Andrea). Fixes: d3e73a0808dd ("sched_ext: Handle SCX_TASK_NONE in disable/switched_from paths") Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 9354da79e162..68120f679178 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3703,22 +3703,14 @@ static void scx_sub_init_cancel_task(struct scx_sched *sch, struct task_struct * static void scx_disable_and_exit_task(struct scx_sched *sch, struct task_struct *p) { - /* - * %NONE means @p is already detached at the SCX level (e.g. handed - * back to the parent by scx_fail_parent() with no init to undo). - * Skip to avoid clobbering scx_task_sched() and writing %NONE again - * on a state that's already %NONE. - */ - if (scx_get_task_state(p) == SCX_TASK_NONE) - return; - __scx_disable_and_exit_task(sch, p); /* * If set, @p exited between __scx_init_task() and scx_enable_task() in * scx_sub_enable() and is initialized for both the associated sched and * its parent. Exit for the child too - scx_enable_task() never ran for - * it, so undo only init_task. + * it, so undo only init_task. The flag is only set on the sub-enable + * path, so it's always clear when @p arrives here in %SCX_TASK_NONE. */ if (p->scx.flags & SCX_TASK_SUB_INIT) { if (!WARN_ON_ONCE(!scx_enabling_sub_sched)) -- cgit v1.2.3 From b273b75b8d677aea06dd06d80b61b3bb06e94680 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 May 2026 13:18:19 -1000 Subject: sched_ext: INIT_LIST_HEAD() &sch->all in scx_alloc_and_add_sched() On scx_link_sched() error paths (parent disabled, hash insert failure), &sch->all is never added to scx_sched_all. The cleanup path runs scx_unlink_sched() unconditionally, which calls list_del_rcu(&sch->all) on a list_head that was never initialized triggering a corruption warning. Initialize &sch->all. Fixes: 54be8de4236a ("sched_ext: Factor out scx_link_sched() and scx_unlink_sched()") Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 68120f679178..6d69ba29cfd7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6635,6 +6635,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops, rcu_assign_pointer(ops->priv, sch); sch->kobj.kset = scx_kset; + INIT_LIST_HEAD(&sch->all); #ifdef CONFIG_EXT_SUB_SCHED char *buf = kzalloc(PATH_MAX, GFP_KERNEL); -- cgit v1.2.3 From cceb874eee46fe4b3d3c6c496f19125d9a3a9a8f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 May 2026 13:18:23 -1000 Subject: sched_ext: Defer sub_kset base put to scx_sched_free_rcu_work scx_sub_enable_workfn() pins parent->kobj before dropping scx_sched_lock, but that does not pin parent->sub_kset. Concurrent disable can kset_unregister and free sub_kset before scx_alloc_and_add_sched() dereferences it. Split sub_kset teardown: kobject_del() at disable keeps sysfs removal; defer kobject_put() to scx_sched_free_rcu_work so the memory survives. A racing child sees state_in_sysfs=0 with valid memory, sysfs_create_dir() fails, and the existing exit_kind gate in scx_link_sched() turns it away with -ENOENT. Fixes: 411d3ef1a705 ("sched_ext: Unregister sub_kset on scheduler disable") Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6d69ba29cfd7..23f7b3f63b09 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4821,6 +4821,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work) kfree(sch->cgrp_path); if (sch_cgroup(sch)) cgroup_put(sch_cgroup(sch)); + if (sch->sub_kset) + kobject_put(&sch->sub_kset->kobj); #endif /* CONFIG_EXT_SUB_SCHED */ for_each_possible_cpu(cpu) { @@ -5861,7 +5863,7 @@ static void scx_sub_disable(struct scx_sched *sch) if (sch->ops.exit) SCX_CALL_OP(sch, exit, NULL, sch->exit_info); if (sch->sub_kset) - kset_unregister(sch->sub_kset); + kobject_del(&sch->sub_kset->kobj); kobject_del(&sch->kobj); } #else /* CONFIG_EXT_SUB_SCHED */ @@ -5995,7 +5997,7 @@ static void scx_root_disable(struct scx_sched *sch) */ #ifdef CONFIG_EXT_SUB_SCHED if (sch->sub_kset) - kset_unregister(sch->sub_kset); + kobject_del(&sch->sub_kset->kobj); #endif kobject_del(&sch->kobj); -- cgit v1.2.3 From 2c308cf34284420963607d677d576a2b4124d8bd Mon Sep 17 00:00:00 2001 From: Zoran Ilievski Date: Mon, 11 May 2026 08:40:02 +0200 Subject: net: atlantic: preserve PCI wake-from-D3 on shutdown when WOL enabled The shutdown handler aq_pci_shutdown() unconditionally calls pci_wake_from_d3(pdev, false), clearing the PCI PME_En bit even when wake-on-LAN has been configured. While aq_nic_shutdown() correctly programs the NIC firmware via aq_nic_set_power() to listen for magic packets, the PCI subsystem will not propagate the resulting PME wake event from D3, so the system never wakes after poweroff. WOL from suspend (S3) is unaffected because aq_suspend_common() does not touch pci_wake_from_d3() and relies on the PM core's wake configuration via device_may_wakeup(). This affects all atlantic-supported NICs (AQC107/108/111/112/113); users have reported that WOL works if the atlantic driver is never loaded, but breaks once it has run its shutdown path. Pass the configured WOL state to pci_wake_from_d3() instead of a literal false, so the PCI PME_En bit is preserved when the user has armed WOL via ethtool. Fixes: 90869ddfefeb ("net: aquantia: Implement pci shutdown callback") Cc: stable@vger.kernel.org Signed-off-by: Zoran Ilievski Reviewed-by: Sukhdeep Singh Link: https://patch.msgid.link/20260511064002.1857-1-goodboy@rexbytes.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index e9e38af680c3..39e1b606a75a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -371,7 +371,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) pci_disable_device(pdev); if (system_state == SYSTEM_POWER_OFF) { - pci_wake_from_d3(pdev, false); + pci_wake_from_d3(pdev, self->aq_hw->aq_nic_cfg->wol); pci_set_power_state(pdev, PCI_D3hot); } } -- cgit v1.2.3 From e3adf69f8eb121a9128c2b0029efd050d3649153 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 9 May 2026 22:50:46 +0100 Subject: net: ethtool: phy: avoid NULL deref when PHY driver is unbound phydev->drv can become NULL while the phy_device is still attached to its net_device, namely after the PHY driver is unbound via sysfs: echo > /sys/bus/mdio_bus/drivers//unbind phy_remove() clears phydev->drv but doesn't call phy_detach(), so the phy_device stays in the link topology xarray and ethnl_req_get_phydev() still hands it back. ETHTOOL_MSG_PHY_GET then oopses on: rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); drvname is already treated as optional by phy_reply_size(), phy_fill_reply() and phy_cleanup_data(), so just skip the allocation when there is no driver bound. Fixes: 9dd2ad5e92b9 ("net: ethtool: phy: Convert the PHY_GET command to generic phy dump") Cc: stable@vger.kernel.org # 6.13.x Signed-off-by: David Carlier Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/20260509215046.107157-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/phy.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index f76d94d848d6..ddc6eab701ed 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -94,10 +94,12 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, if (!rep_data->name) return -ENOMEM; - rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); - if (!rep_data->drvname) { - ret = -ENOMEM; - goto err_free_name; + if (phydev->drv) { + rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->drvname) { + ret = -ENOMEM; + goto err_free_name; + } } rep_data->upstream_type = pdn->upstream_type; -- cgit v1.2.3 From f9e2342046ef1560d35bcd4a4b1197648ffd151d Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 9 May 2026 20:23:58 +0800 Subject: net: atm: fix skb leak in sigd_send() default branch The default branch in sigd_send() calls sock_put() and returns -EINVAL without freeing the skb, while all other exit paths do so. Add the missing dev_kfree_skb() before sock_put() to fix the leak. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Wei Yang Link: https://patch.msgid.link/20260509122358.1102997-1-albin_yang@163.com Signed-off-by: Jakub Kicinski --- net/atm/signaling.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 358fbe5e4d1d..b991d937205a 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -179,6 +179,7 @@ as_indicate_complete: break; default: pr_alert("bad message type %d\n", (int)msg->type); + dev_kfree_skb(skb); /* Paired with find_get_vcc(msg->vcc) above */ sock_put(sk); return -EINVAL; -- cgit v1.2.3 From a3fdd924d88c30b9f488636ce0e4696012cf5511 Mon Sep 17 00:00:00 2001 From: Nicolò Coccia Date: Sun, 10 May 2026 12:34:13 -0400 Subject: net/smc: fix sleep-inside-lock in __smc_setsockopt() causing local DoS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A logic flaw in __smc_setsockopt() allows a local unprivileged user to cause a Denial of Service (DoS) by holding the socket lock indefinitely. The function __smc_setsockopt() calls copy_from_sockptr() while holding lock_sock(sk). By passing a userfaultfd-monitored memory page (or FUSE-backed memory on systems where unprivileged userfaultfd is disabled) as the optval, an attacker can halt execution during the copy operation, keeping the lock held. Combined with asynchronous tear-down operations like shutdown(), this exhausts the kernel wq (kworkers) and triggers the hung task watchdog. [ 240.123456] INFO: task kworker/u8:2 blocked for more than 120 seconds. [ 240.123489] Call Trace: [ 240.123501] smc_shutdown+... [ 240.123512] lock_sock_nested+... This patch moves the user-space copy outside the lock_sock() critical section to prevent the issue. Fixes: a6a6fe27bab4 ("net/smc: Dynamic control handshake limitation by socket options") Signed-off-by: Nicolò Coccia Reviewed-by: Dust Li Tested-by: Dust Li Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 185dbed7de5d..da28652f6810 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3054,18 +3054,17 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname, smc = smc_sk(sk); + /* pre-fetch user data outside the lock */ + if (optname == SMC_LIMIT_HS) { + if (optlen < sizeof(int)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(int))) + return -EFAULT; + } + lock_sock(sk); switch (optname) { case SMC_LIMIT_HS: - if (optlen < sizeof(int)) { - rc = -EINVAL; - break; - } - if (copy_from_sockptr(&val, optval, sizeof(int))) { - rc = -EFAULT; - break; - } - smc->limit_smc_hs = !!val; rc = 0; break; -- cgit v1.2.3 From 7bf563badd37cb796df5477d2b78bb64148a1268 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Sun, 10 May 2026 15:26:40 -0700 Subject: net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint The smc_msg_event tracepoint class, shared by smc_tx_sendmsg and smc_rx_recvmsg, unconditionally dereferences smc->conn.lnk: __string(name, smc->conn.lnk->ibname) conn->lnk is only set for SMC-R; for SMC-D it is NULL. Other code on these paths already handles this (e.g. !conn->lnk in SMC_STAT_RMB_TX_SIZE_SMALL()). With the tracepoint enabled, the first sendmsg()/recvmsg() on an SMC-D socket crashes: Oops: general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [...] RIP: 0010:strlen+0x1e/0xa0 Call Trace: trace_event_raw_event_smc_msg_event (net/smc/smc_tracepoint.h:44) smc_rx_recvmsg (net/smc/smc_rx.c:515) smc_recvmsg (net/smc/af_smc.c:2859) __sys_recvfrom (net/socket.c:2315) __x64_sys_recvfrom (net/socket.c:2326) do_syscall_64 The faulting address 0x3e0 is offsetof(struct smc_link, ibname), confirming the NULL ->lnk deref. Enabling the tracepoint requires root, but the trigger itself is unprivileged: socket(AF_SMC, ...) has no capability check, and SMC-D negotiation needs no admin step on s390 or on x86 with the loopback ISM device loaded. Log an empty device name for SMC-D instead of dereferencing NULL. Fixes: aff3083f10bf ("net/smc: Introduce tracepoints for tx and rx msg") Reported-by: Weiming Shi Signed-off-by: Xiang Mei Reviewed-by: Dust Li Reviewed-by: Sidraya Jayagond Signed-off-by: Jakub Kicinski --- net/smc/smc_tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index a9a6e3c1113a..53da84f57fd6 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event, __field(const void *, smc) __field(u64, net_cookie) __field(size_t, len) - __string(name, smc->conn.lnk->ibname) + __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "") ), TP_fast_assign( -- cgit v1.2.3 From 3d042592ebd4c7e44974d556de0b727cb7db4dab Mon Sep 17 00:00:00 2001 From: Chenguang Zhao Date: Mon, 11 May 2026 09:43:43 +0800 Subject: ethtool: fix ethnl_bitmap32_not_zero() bit interval semantics ethnl_bitmap32_not_zero() should return true if some bit in [start, end) is set: - Fix inverted memchr_inv() sense: return true when the scan finds a non-zero byte, not when the middle words are all zero. - Return false for an empty interval (end <= start). - When end is 32-bit aligned, indices in [start, end) do not include any bits from map[end_word]; return false after earlier checks found no non-zero data. Fixes: 10b518d4e6dd ("ethtool: netlink bitset handling") Signed-off-by: Chenguang Zhao Signed-off-by: Jakub Kicinski --- net/ethtool/bitset.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 8bb98d3ea3db..a3a2cc6480a0 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -92,7 +92,7 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, u32 mask; if (end <= start) - return true; + return false; if (start % 32) { mask = ethnl_upper_bits(start); @@ -105,11 +105,11 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, start_word++; } - if (!memchr_inv(map + start_word, '\0', - (end_word - start_word) * sizeof(u32))) + if (memchr_inv(map + start_word, '\0', + (end_word - start_word) * sizeof(u32))) return true; if (end % 32 == 0) - return true; + return false; return map[end_word] & ethnl_lower_bits(end); } -- cgit v1.2.3 From f5b2772d14884f4be9e718644f1203d4d0e6f0d6 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Sun, 10 May 2026 12:30:17 +0200 Subject: net: ethernet: ravb: Do not check URAM suspension when WoL is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When updating the driver to match latest datasheet to suspend access to URAM when suspending DMA transfers a corner-case was missed, URAM access will not be suspended if WoL is enabled. This lead to the error message (correctly) being triggered as URAM access is not suspended even tho it's requested as part of stopping DMA. Avoid checking if URAM access is suspended and printing the error message if WoL is enabled when we suspend the system, as we know it will not be. Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdWnjV%3DHGE1o08zLhUfTgOSene5fYx1J5GG10mB%2BToq8qg@mail.gmail.com/ Fixes: 353d8e7989b6 ("net: ethernet: ravb: Suspend and resume the transmission flow") Signed-off-by: Niklas Söderlund Reviewed-by: Sai Krishna Tested-by: Geert Uytterhoeven Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1dbfadb2a881..5f88733094d0 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1108,9 +1108,12 @@ static int ravb_stop_dma(struct net_device *ndev) /* Request for transmission suspension */ ravb_modify(ndev, CCC, CCC_DTSR, CCC_DTSR); - error = ravb_wait(ndev, CSR, CSR_DTS, CSR_DTS); - if (error) - netdev_err(ndev, "failed to stop AXI BUS\n"); + /* Access to URAM will not be suspended if WoL is enabled. */ + if (!priv->wol_enabled) { + error = ravb_wait(ndev, CSR, CSR_DTS, CSR_DTS); + if (error) + netdev_err(ndev, "failed to stop AXI BUS\n"); + } /* Stop AVB-DMAC process */ return ravb_set_opmode(ndev, CCC_OPC_CONFIG); -- cgit v1.2.3 From 3812a9e84265a5cdd90d29fe8d97a023e91fb945 Mon Sep 17 00:00:00 2001 From: Hardik Prakash Date: Tue, 12 May 2026 13:01:38 +0530 Subject: pinctrl-amd: enable IRQ for WACF2200 touchscreen on Lenovo Yoga 7 14AGP11 On Lenovo Yoga 7 14AGP11 (83TD), the WACF2200 touchscreen controller is wired via I2C2 (AMDI0010:02) with its interrupt on GPIO pin 157 (confirmed via ACPI _CRS GpioInt decode). After amd_gpio_irq_init() clears all GPIO interrupts at boot, pin 157 is never re-enabled, preventing the touchscreen from signalling the driver. Windows keeps GPIO 157 INTERRUPT_ENABLE (bit 11) and INTERRUPT_MASK (bit 12) set after initialisation. Add a DMI quirk to restore these bits after amd_gpio_irq_init() on this hardware. Assisted-by: Claude:claude-sonnet-4-6 Assisted-by: GPT-Codex:gpt-5.2-codex Signed-off-by: Hardik Prakash Acked-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index e3128b0045d2..64315b0edf2a 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,39 @@ static struct amd_gpio *pinctrl_dev; #endif +static const struct dmi_system_id amd_gpio_quirk_yoga7_14agp11[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83TD"), + DMI_MATCH(DMI_BOARD_NAME, "LNVNB161216"), + }, + }, + { } +}; + +static void amd_gpio_apply_quirks(struct amd_gpio *gpio_dev) +{ + const unsigned int pin = 157; /* WACF2200 GpioInt per ACPI _CRS */ + unsigned long flags; + u32 reg; + + if (!dmi_check_system(amd_gpio_quirk_yoga7_14agp11)) + return; + if (pin >= gpio_dev->gc.ngpio) + return; + + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + reg = readl(gpio_dev->base + pin * 4); + reg |= BIT(INTERRUPT_ENABLE_OFF) | BIT(INTERRUPT_MASK_OFF); + writel(reg, gpio_dev->base + pin * 4); + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + + dev_info(&gpio_dev->pdev->dev, + "Enabled IRQ for GPIO %u (Yoga 7 14AGP11 touchscreen)\n", + pin); +} + static int amd_gpio_get_direction(struct gpio_chip *gc, unsigned offset) { unsigned long flags; @@ -1219,6 +1253,7 @@ static int amd_gpio_probe(struct platform_device *pdev) /* Disable and mask interrupts */ amd_gpio_irq_init(gpio_dev); + amd_gpio_apply_quirks(gpio_dev); girq = &gpio_dev->gc.irq; gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip); -- cgit v1.2.3 From 5f7c7c63ffb1a187eb90c80864469db45f3bd2a8 Mon Sep 17 00:00:00 2001 From: Yang Xiuwei Date: Wed, 13 May 2026 17:43:03 +0800 Subject: io_uring/rw: drop unused attr_type_mask from io_prep_rw_pi() io_prep_rw_pi() never used the attr_type_mask argument. Callers already validate sqe->attr_type_mask before invoking the helper (only IORING_RW_ATTR_FLAG_PI is supported today). Remove the dead parameter to avoid implying further interpretation happens here. Signed-off-by: Yang Xiuwei Link: https://patch.msgid.link/20260513094303.866533-1-yangxiuwei@kylinos.cn Signed-off-by: Jens Axboe --- io_uring/rw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index e729e0e7657e..0c4834645279 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -230,7 +230,7 @@ static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb) } static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir, - u64 attr_ptr, u64 attr_type_mask) + u64 attr_ptr) { struct io_uring_attr_pi pi_attr; struct io_async_rw *io; @@ -305,7 +305,7 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, return -EINVAL; attr_ptr = READ_ONCE(sqe->attr_ptr); - return io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask); + return io_prep_rw_pi(req, rw, ddir, attr_ptr); } return 0; } -- cgit v1.2.3 From cb6f19552b49be16db5d50d5d59c0ec2b5c38f13 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Apr 2026 16:33:57 +0200 Subject: dt-bindings: display/msm: dp-controller: Correct SM8650 IO range DP on Qualcomm SM8650 come with nine address ranges, so describe the remaining ones as optional to keep ABI backwards compatible. Driver also does not need them to operate correctly. Reviewed-by: Dmitry Baryshkov Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Patchwork: https://patchwork.freedesktop.org/patch/716450/ Link: https://lore.kernel.org/r/20260405-dts-qcom-display-regs-v2-1-34f4024c65dc@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- .../bindings/display/msm/dp-controller.yaml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index 8239adb7f7d3..e4f17d29343b 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -277,7 +277,6 @@ allOf: - qcom,sc8180x-dp - qcom,sdm845-dp - qcom,sm8350-dp - - qcom,sm8650-dp then: properties: reg: @@ -290,6 +289,24 @@ allOf: minItems: 6 maxItems: 6 + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8650-dp + then: + properties: + reg: + minItems: 5 + maxItems: 9 + clocks: + minItems: 6 + maxItems: 6 + clocks-names: + minItems: 6 + maxItems: 6 + - if: properties: compatible: -- cgit v1.2.3 From 557226acef41c0b651588dc9f8911ec950f39101 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Apr 2026 16:33:58 +0200 Subject: dt-bindings: display/msm: dp-controller: Allow DAI on SM8650 and others DisplayPort on Qualcomm SoCs like SM8650 and compatible SM8750 supports audio and there is already DTS having cells and sound-name-prefix. The "else:" clause for non-EDP and non-aux-bus cases already requires '#sound-dai-cells', so it should actually reference the dai-common.yaml for other properties, as pointed out by dtbs_check warnings like: sm8650-hdk-display-card-rear-camera-card.dtb: displayport-controller@af54000 (qcom,sm8650-dp): Unevaluated properties are not allowed ('sound-name-prefix' was unexpected) Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/716452/ Link: https://lore.kernel.org/r/20260405-dts-qcom-display-regs-v2-2-34f4024c65dc@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- Documentation/devicetree/bindings/display/msm/dp-controller.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index e4f17d29343b..b96e1ea40d3f 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -219,6 +219,7 @@ allOf: - required: - "#sound-dai-cells" else: + $ref: /schemas/sound/dai-common.yaml# properties: aux-bus: false required: -- cgit v1.2.3 From bef8a15a6ee2b6c5940add7ef6bf61379905a783 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Apr 2026 16:33:59 +0200 Subject: dt-bindings: display/msm: sm8650: Correct VBIF range in example VBIF register range is 0x3000 long, so correct the example. No practical impact, except when existing code is being re-used in new contributions. Reviewed-by: Dmitry Baryshkov Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Patchwork: https://patchwork.freedesktop.org/patch/716454/ Link: https://lore.kernel.org/r/20260405-dts-qcom-display-regs-v2-3-34f4024c65dc@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml | 2 +- Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml index dccac525d202..134321b50897 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml @@ -70,7 +70,7 @@ examples: display-controller@ae01000 { compatible = "qcom,sm8650-dpu"; reg = <0x0ae01000 0x8f000>, - <0x0aeb0000 0x2008>; + <0x0aeb0000 0x3000>; reg-names = "mdp", "vbif"; clocks = <&gcc_axi_clk>, diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml index a1c53e191033..0f7f79527748 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml @@ -112,7 +112,7 @@ examples: display-controller@ae01000 { compatible = "qcom,sm8650-dpu"; reg = <0x0ae01000 0x8f000>, - <0x0aeb0000 0x2008>; + <0x0aeb0000 0x3000>; reg-names = "mdp", "vbif"; clocks = <&gcc_axi_clk>, -- cgit v1.2.3 From 399f7748789ced4d8bf6a3f9aa550d007283d005 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Apr 2026 16:34:00 +0200 Subject: dt-bindings: display/msm: sm8750-mdss: Correct DPU and DP ranges in example VBIF register range is 0x3000 long. DisplayPort block has few too short ranges and misses four more address spaces. No practical impact, except when existing code is being re-used in new contributions. Reviewed-by: Dmitry Baryshkov Signed-off-by: Krzysztof Kozlowski Patchwork: https://patchwork.freedesktop.org/patch/716453/ Link: https://lore.kernel.org/r/20260405-dts-qcom-display-regs-v2-4-34f4024c65dc@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- .../bindings/display/msm/qcom,sm8750-mdss.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml index a38c2261ef1a..46dc0d28da29 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml @@ -117,7 +117,7 @@ examples: display-controller@ae01000 { compatible = "qcom,sm8750-dpu"; reg = <0x0ae01000 0x93000>, - <0x0aeb0000 0x2008>; + <0x0aeb0000 0x3000>; reg-names = "mdp", "vbif"; @@ -389,11 +389,15 @@ examples: displayport-controller@af54000 { compatible = "qcom,sm8750-dp", "qcom,sm8650-dp"; - reg = <0xaf54000 0x104>, - <0xaf54200 0xc0>, - <0xaf55000 0x770>, - <0xaf56000 0x9c>, - <0xaf57000 0x9c>; + reg = <0x0af54000 0x200>, + <0x0af54200 0x200>, + <0x0af55000 0xc00>, + <0x0af56000 0x400>, + <0x0af57000 0x400>, + <0x0af58000 0x400>, + <0x0af59000 0x400>, + <0x0af5a000 0x600>, + <0x0af5b000 0x600>; interrupts-extended = <&mdss 12>; -- cgit v1.2.3 From 795b19cbcf43414748261b9dcc783bebfbceb89a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Apr 2026 16:34:01 +0200 Subject: dt-bindings: display/msm: qcom,eliza-mdss: Correct DPU and DP ranges in example VBIF register range is 0x3000 long. DisplayPort block has few too short ranges and misses four more address spaces. Similarly first part of DSI space should be 0x300 long. No practical impact, except when existing code is being re-used in new contributions. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Acked-by: Rob Herring (Arm) Patchwork: https://patchwork.freedesktop.org/patch/716460/ Link: https://lore.kernel.org/r/20260405-dts-qcom-display-regs-v2-5-34f4024c65dc@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- .../bindings/display/msm/qcom,eliza-mdss.yaml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/qcom,eliza-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,eliza-mdss.yaml index 47938d13d1ca..bd4ba91a171f 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,eliza-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,eliza-mdss.yaml @@ -119,7 +119,7 @@ examples: mdss_mdp: display-controller@ae01000 { compatible = "qcom,eliza-dpu"; reg = <0x0ae01000 0x93000>, - <0x0aeb0000 0x2008>; + <0x0aeb0000 0x3000>; reg-names = "mdp", "vbif"; @@ -304,7 +304,7 @@ examples: mdss_dsi0_phy: phy@ae95000 { compatible = "qcom,eliza-dsi-phy-4nm", "qcom,sm8650-dsi-phy-4nm"; reg = <0x0ae95000 0x200>, - <0x0ae95200 0x280>, + <0x0ae95200 0x300>, <0x0ae95500 0x400>; reg-names = "dsi_phy", "dsi_phy_lane", @@ -388,7 +388,7 @@ examples: mdss_dsi1_phy: phy@ae97000 { compatible = "qcom,eliza-dsi-phy-4nm", "qcom,sm8650-dsi-phy-4nm"; reg = <0x0ae97000 0x200>, - <0x0ae97200 0x280>, + <0x0ae97200 0x300>, <0x0ae97500 0x400>; reg-names = "dsi_phy", "dsi_phy_lane", @@ -407,11 +407,15 @@ examples: displayport-controller@af54000 { compatible = "qcom,eliza-dp", "qcom,sm8650-dp"; - reg = <0xaf54000 0x104>, - <0xaf54200 0xc0>, - <0xaf55000 0x770>, - <0xaf56000 0x9c>, - <0xaf57000 0x9c>; + reg = <0x0af54000 0x200>, + <0x0af54200 0x200>, + <0x0af55000 0xc00>, + <0x0af56000 0x400>, + <0x0af57000 0x400>, + <0x0af58000 0x400>, + <0x0af59000 0x400>, + <0x0af5a000 0x600>, + <0x0af5b000 0x600>; interrupts-extended = <&mdss 12>; -- cgit v1.2.3 From 933430f1709b089a0bf0b23ef0f047014ef899e7 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 14 Apr 2026 17:14:30 +0200 Subject: drm/msm/dpu: fix UV scanlines calculation for YUV UBWC formats The UV scanlines is calculated with (height + 1) / 2 unlike the Y scanlines, add back the correct scanlines calculation for UBWC YUV formats. Fixes: 2f3ff6ab8f5c ("drm/msm/dpu: use standard functions in _dpu_format_populate_plane_sizes_ubwc()") Fixes: ada4a19ed21c ("drm/msm/dpu: rewrite _dpu_format_populate_plane_sizes_ubwc()") Signed-off-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/718309/ Link: https://lore.kernel.org/r/20260414-topic-sm8x50-msm-dpu1-formats-qc10c-v1-1-0b62325b9030@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c index 6e8883dbfad4..590922c4f69b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c @@ -61,7 +61,7 @@ static int _dpu_format_populate_plane_sizes_ubwc( bool meta = MSM_FORMAT_IS_UBWC(fmt); if (MSM_FORMAT_IS_YUV(fmt)) { - unsigned int stride, sclines; + unsigned int stride, y_sclines, uv_sclines; unsigned int y_tile_width, y_tile_height; unsigned int y_meta_stride, y_meta_scanlines; unsigned int uv_meta_stride, uv_meta_scanlines; @@ -77,23 +77,25 @@ static int _dpu_format_populate_plane_sizes_ubwc( y_tile_width = 32; } - sclines = round_up(fb->height, 16); + y_sclines = round_up(fb->height, 16); + uv_sclines = round_up((fb->height+1)>>1, 16); y_tile_height = 4; } else { stride = round_up(fb->width, 128); y_tile_width = 32; - sclines = round_up(fb->height, 32); + y_sclines = round_up(fb->height, 32); + uv_sclines = round_up((fb->height+1)>>1, 32); y_tile_height = 8; } layout->plane_pitch[0] = stride; layout->plane_size[0] = round_up(layout->plane_pitch[0] * - sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); + y_sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); layout->plane_pitch[1] = stride; layout->plane_size[1] = round_up(layout->plane_pitch[1] * - sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); + uv_sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); if (!meta) return 0; -- cgit v1.2.3 From d03279f0d9fdbe6f6761f191a76093c395930018 Mon Sep 17 00:00:00 2001 From: Mahadevan P Date: Tue, 28 Apr 2026 17:14:25 +0530 Subject: drm/msm/dpu: Fix Kaanapali CWB register configuration The Kaanapali DPU catalog defines kaanapali_cwb[] with the correct CWB base addresses for this platform (0x169200, 0x169600, 0x16a200, 0x16a600), but the dpu_kaanapali_cfg struct was mistakenly pointing to sm8650_cwb instead. The SM8650 CWB blocks sit at completely different offsets (0x66200, 0x66600, 0x7E200, 0x7E600), so using them on Kaanapali would program CWB registers at wrong addresses, corrupting unrelated hardware blocks and breaking writeback capture. Fix this by pointing .cwb to the correct kaanapali_cwb array. Fixes: 83fe2cd56b1d ("drm/msm/dpu: Add support for Kaanapali DPU") Signed-off-by: Mahadevan P Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/721444/ Link: https://lore.kernel.org/r/20260428-kaanapali_cwb-v1-1-51fdb2c65498@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h index b7b06e45b529..06da1583fb1e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h @@ -480,7 +480,7 @@ const struct dpu_mdss_cfg dpu_kaanapali_cfg = { .wb_count = ARRAY_SIZE(kaanapali_wb), .wb = kaanapali_wb, .cwb_count = ARRAY_SIZE(kaanapali_cwb), - .cwb = sm8650_cwb, + .cwb = kaanapali_cwb, .intf_count = ARRAY_SIZE(kaanapali_intf), .intf = kaanapali_intf, .vbif = &sm8650_vbif, -- cgit v1.2.3 From 5b49a46baa853b26dbefa65c6c75dd9ff69f63d4 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 28 Apr 2026 20:21:38 +0300 Subject: drm/msm/dsi: don't dump registers past the mapped region On DSI 6G platforms the IO address space is internally adjusted by io_offset. Later this adjusted address might be used for memory dumping. However the size that is used for memory dumping isn't adjusted to account for the io_offset, leading to the potential access to the unmapped region. Lower ctrl_size by the io_offset value to prevent access past the mapped area. msm_disp_snapshot_add_block+0x1d4/0x3c8 [msm] (P) msm_dsi_host_snapshot+0x4c/0x78 [msm] msm_dsi_snapshot+0x28/0x50 [msm] msm_disp_snapshot_capture_state+0x74/0x140 [msm] msm_disp_snapshot_state_sync+0x60/0x90 [msm] _msm_disp_snapshot_work+0x30/0x90 [msm] kthread_worker_fn+0xdc/0x460 kthread+0x120/0x140 Fixes: bac2c6a62ed9 ("drm/msm: get rid of msm_iomap_size") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/721747/ Link: https://lore.kernel.org/r/20260428-msm-fix-dsi-dump-v1-1-5d4cb5ccfac7@oss.qualcomm.com --- drivers/gpu/drm/msm/dsi/dsi_host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 565d425f88b8..982abaaac00d 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -2033,6 +2033,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) /* fixup base address by io offset */ msm_host->ctrl_base += cfg->io_offset; + msm_host->ctrl_size -= cfg->io_offset; ret = devm_regulator_bulk_get_const(&pdev->dev, cfg->num_regulators, cfg->regulator_data, -- cgit v1.2.3 From c0c70a11365cba7fba25a77463582bcec0f7846e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 5 May 2026 03:24:58 +0300 Subject: drm/msm/dpu: don't mix devm and drmm functions Mixing devm and drmm functions will result in a use-after-free on msm driver teardown if userspace keeps a reference on the drm device: The WB connector data will be destroyed because of the use of devm_kzalloc()), while the usersoace still can try interacting with the WB connector (which uses drmm_ functions). Change dpu_writeback_init() to use drmm_. Fixes: 0b37ac63fc9d ("drm/msm/dpu: use drmm_writeback_connector_init()") Reported-by: Christophe JAILLET Closes: https://lore.kernel.org/r/78c764b8-44cf-4db5-88e7-807a85954518@wanadoo.fr Signed-off-by: Dmitry Baryshkov Reviewed-by: John.Harrison@Igalia.com Patchwork: https://patchwork.freedesktop.org/patch/722656/ Link: https://lore.kernel.org/r/20260505-wb-drop-encoder-v5-1-42567b7c7af2@oss.qualcomm.com --- drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 7545c0293efb..6f2370c9dd98 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -5,6 +5,7 @@ #include #include +#include #include "dpu_writeback.h" @@ -125,7 +126,7 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc, struct dpu_wb_connector *dpu_wb_conn; int rc = 0; - dpu_wb_conn = devm_kzalloc(dev->dev, sizeof(*dpu_wb_conn), GFP_KERNEL); + dpu_wb_conn = drmm_kzalloc(dev, sizeof(*dpu_wb_conn), GFP_KERNEL); if (!dpu_wb_conn) return -ENOMEM; -- cgit v1.2.3 From 2d5d3fc593c9b7e41bee86175d7b9e11f470072e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 May 2026 16:58:48 +0200 Subject: KVM: VMX: introduce module parameter to disable CET There have been reports of host hangs caused by CET virtualization. Until these are analyzed further, introduce a module parameter that makes it possible to easily disable it. Link: https://lore.kernel.org/all/85548beb-1486-40f9-beb4-632c78e3360b@proxmox.com/ Cc: David Riley Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/capabilities.h | 1 + arch/x86/kvm/vmx/vmx.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 56cacc06225e..31568274d8bb 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -14,6 +14,7 @@ extern bool __read_mostly flexpriority_enabled; extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; +extern bool __read_mostly enable_cet; extern bool __read_mostly enable_pml; extern int __read_mostly pt_mode; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5c2c33a5f7dc..49feecb286b2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -108,6 +108,9 @@ module_param_named(unrestricted_guest, bool __read_mostly enable_ept_ad_bits = 1; module_param_named(eptad, enable_ept_ad_bits, bool, 0444); +bool __read_mostly enable_cet = 1; +module_param_named(cet, enable_cet, bool, 0444); + static bool __read_mostly emulate_invalid_guest_state = true; module_param(emulate_invalid_guest_state, bool, 0444); @@ -4476,7 +4479,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) * SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter * 3 and 4 for details. */ - if (cpu_has_load_cet_ctrl()) { + if (enable_cet) { vmcs_writel(HOST_S_CET, kvm_host.s_cet); vmcs_writel(HOST_SSP, 0); vmcs_writel(HOST_INTR_SSP_TABLE, 0); @@ -4532,6 +4535,10 @@ static u32 vmx_get_initial_vmentry_ctrl(void) if (vmx_pt_mode_is_system()) vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); + + if (!enable_cet) + vmentry_ctrl &= ~VM_ENTRY_LOAD_CET_STATE; + /* * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically. */ @@ -4546,6 +4553,9 @@ static u32 vmx_get_initial_vmexit_ctrl(void) { u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; + if (!enable_cet) + vmexit_ctrl &= ~VM_EXIT_LOAD_CET_STATE; + /* * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for * nested virtualization and thus allowed to be set in vmcs12. @@ -8155,7 +8165,7 @@ static __init void vmx_set_cpu_caps(void) * VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code * fails, so disable CET in this case too. */ - if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest || + if (!enable_cet || !enable_unrestricted_guest || !cpu_has_vmx_basic_no_hw_errcode_cc()) { kvm_cpu_cap_clear(X86_FEATURE_SHSTK); kvm_cpu_cap_clear(X86_FEATURE_IBT); @@ -8630,6 +8640,9 @@ __init int vmx_hardware_setup(void) !cpu_has_vmx_invept_global()) enable_ept = 0; + if (!cpu_has_load_cet_ctrl()) + enable_cet = 0; + /* NX support is required for shadow paging. */ if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) { pr_err_ratelimited("NX (Execute Disable) not supported\n"); -- cgit v1.2.3 From 836efd35c472d89c838d7b17ef339ddb3286ffc5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 13 May 2026 20:11:29 +0900 Subject: block: fix handling of dead zone write plugs Shin'ichiro reported hard to reproduce unaligned write errors with zoned block devices. Under normal operation conditions (e.g. running XFS on an SMR disk), these errors are nearly impossible to trigger. But using a "slow" kernel with many debug options enables and some specific use cases (e.g. fio zbd test case 46), the errors can be reproduced fairly easily. The unaligned write errors come from mishandling a valid reference counting pattern of zone write plugs. Such pattern triggers for instance if a process A writes a zone (not necessarilly to the full state), another process B immediately resets the zone and immediately following the completion of the zone reset, starts issuing writes to the zone. With such pattern, in some cases, the zone write plugs worker thread of the device may still be holding a reference to the zone write plug of the zone taken when process A was writing to the zone. The following zone reset from process B marks the zone as dead but does not remove the zone write plug from the device hash table as a reference to the plug still exist. Once process B starts issuing new writes, the zone write plug is seen as dead and the writes from process B are immediately failed, despite this write pattern being perfectly legal. Fix this by allowing restoring a dead zone write plug to a live state if a write is issued to the zone when the zone is: marked as dead, empty and the write sector corresponds to the first sector of the zone (that is, the write is aligned to the zone write pointer). This is done with the new helper function disk_check_zone_wplug_dead(), which restores a dead zone write plug to a live state by clearing the BLK_ZONE_WPLUG_DEAD flag and restoring the initial reference to the zone write plug taken when the plug was added to the device hash table. Reported-by: Shin'ichiro Kawasaki Fixes: b7d4ffb51037 ("block: fix zone write plug removal") Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Link: https://patch.msgid.link/20260513111129.108809-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 30cad2bb9291..42ef830054dc 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -623,6 +623,28 @@ static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug) } } +static inline bool disk_check_zone_wplug_dead(struct blk_zone_wplug *zwplug) +{ + if (!(zwplug->flags & BLK_ZONE_WPLUG_DEAD)) + return false; + + /* + * If a new write is received right after a zone reset completes and + * while the disk_zone_wplugs_worker() thread has not yet released the + * reference on the zone write plug after processing the last write to + * the zone, then the new write BIO will see the zone write plug marked + * as dead. This case is however a false positive and a perfectly valid + * pattern. In such case, restore the zone write plug to a live one. + */ + if (!zwplug->wp_offset && bio_list_empty(&zwplug->bio_list)) { + zwplug->flags &= ~BLK_ZONE_WPLUG_DEAD; + refcount_inc(&zwplug->ref); + return false; + } + + return true; +} + static bool disk_zone_wplug_submit_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug); @@ -1444,12 +1466,12 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) spin_lock_irqsave(&zwplug->lock, flags); /* - * If we got a zone write plug marked as dead, then the user is issuing - * writes to a full zone, or without synchronizing with zone reset or - * zone finish operations. In such case, fail the BIO to signal this - * invalid usage. + * Check if we got a zone write plug marked as dead. If yes, then the + * user is likely issuing writes to a full zone, or without + * synchronizing with zone reset or zone finish operations. In such + * case, fail the BIO to signal this invalid usage. */ - if (zwplug->flags & BLK_ZONE_WPLUG_DEAD) { + if (disk_check_zone_wplug_dead(zwplug)) { spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); bio_io_error(bio); -- cgit v1.2.3 From 87d0740b7c4cc847be1b6f307ab6d8547cb1a726 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 May 2026 18:19:40 +0800 Subject: selftests: ublk: cap nthreads to kernel's actual nr_hw_queues dev->nthreads is derived from the user-requested queue count before the ADD command, but the kernel may reduce nr_hw_queues (capped to nr_cpu_ids). When the VM has fewer CPUs than requested queues, the daemon creates more handler threads than there are kernel queues. In non-batch mode, the extra threads access uninitialized queues (q_depth=0), submit zero io_uring SQEs, and block forever in io_cqring_wait. In batch mode, the extra threads cause similar hangs during device removal. In both cases, the stuck threads prevent the daemon from closing the char device, holding the last ublk_device reference and causing ublk_ctrl_del_dev() to hang in wait_event_interruptible(). Fix by capping dev->nthreads to the kernel-returned nr_hw_queues after the ADD command completes. per_io_tasks mode is excluded because threads interleave across all queues, so nthreads > nr_hw_queues is valid. Fixes: abe54c160346 ("selftests: ublk: kublk: decouple ublk_queues from ublk server threads") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260513101941.1373998-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index fbd9b1e7342a..0b23c09daea5 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1735,6 +1735,17 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) goto fail; } + /* + * The kernel may reduce nr_hw_queues (e.g. capped to nr_cpu_ids). + * Cap nthreads to the actual queue count to avoid creating extra + * handler threads that will hang during device removal. + * + * per_io_tasks mode is excluded: threads interleave across all + * queues so nthreads > nr_hw_queues is valid and intentional. + */ + if (!ctx->per_io_tasks && dev->nthreads > info->nr_hw_queues) + dev->nthreads = info->nr_hw_queues; + ret = ublk_start_daemon(ctx, dev); ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret); if (ret < 0) -- cgit v1.2.3 From 8fb70afe671cc8c1f6237a39aabd50714fcd1189 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 10 May 2026 22:56:05 +0200 Subject: drm/xe: Drop unused ggtt_balloon field During recent GGTT refactoring we missed to drop now unused field from the xe_tile. Drop it now. Fixes: e904c56ba6e0 ("drm/xe: Rewrite GGTT VF initialization") Signed-off-by: Michal Wajdeczko Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20260510205605.642-1-michal.wajdeczko@intel.com (cherry picked from commit 21d5a871f57909dc4d8e4f5d3bf92f9ccf2597b2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_tile_types.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h index 33932fd547d7..0048100ccb72 100644 --- a/drivers/gpu/drm/xe/xe_tile_types.h +++ b/drivers/gpu/drm/xe/xe_tile_types.h @@ -106,8 +106,6 @@ struct xe_tile { struct xe_lmtt lmtt; } pf; struct { - /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node *ggtt_balloon[2]; /** @sriov.vf.self_config: VF configuration data */ struct xe_tile_sriov_vf_selfconfig self_config; } vf; -- cgit v1.2.3 From ea324444ece9f301b5c4ff71b258cc68990c4d61 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 16 Mar 2026 16:12:00 +0100 Subject: x86/mce: Restore MCA polling interval halving RongQing reported that the MCA polling interval doesn't halve when an error gets logged. It was traced down to the commit in Fixes:, because: mce_timer_fn() |-> mce_poll_banks() |-> machine_check_poll() |-> mce_log() which will queue the work and return. Now, back in mce_timer_fn(): /* * Alert userspace if needed. If we logged an MCE, reduce the polling * interval, otherwise increase the polling interval. */ if (mce_notify_irq()) <--- here we haven't ran the notifier chain yet so mce_need_notify is not set yet so this won't hit and we won't halve the interval iv. Now the notifier chain runs. mce_early_notifier() sets the bit, does mce_notify_irq(), that clears the bit and then the notifier chain a little later logs the error. So this is a silly timing issue. But, that's all unnecessary. All it needs to happen here is, the "should we notify of a logged MCE" mce_notify_irq() asks, should be simply a question to the mce gen pool: "Are you empty?" And that then turns into a simple yes or no answer and it all JustWorks(tm). So do that and also distribute the functionality where it belongs: - Print that MCE events have been logged in mce_log() - Trigger the mcelog tool specific work in the first notifier As a result, mce_notify_irq() can go now. Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector") Reported-by: Li RongQing Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Qiuxu Zhuo Tested-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20260112082747.2842-1-lirongqing@baidu.com --- arch/x86/kernel/cpu/mce/core.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 8dd424ac5de8..f3a793e3a6c8 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = { }; static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen); -static unsigned long mce_need_notify; /* * MCA banks polled by the period polling timer for corrected events. @@ -152,8 +151,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm); void mce_log(struct mce_hw_err *err) { - if (mce_gen_pool_add(err)) + if (mce_gen_pool_add(err)) { + pr_info(HW_ERR "Machine check events logged\n"); irq_work_queue(&mce_irq_work); + } } EXPORT_SYMBOL_GPL(mce_log); @@ -585,28 +586,6 @@ bool mce_is_correctable(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_correctable); -/* - * Notify the user(s) about new machine check events. - * Can be called from interrupt context, but not from machine check/NMI - * context. - */ -static bool mce_notify_irq(void) -{ - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - - if (test_and_clear_bit(0, &mce_need_notify)) { - mce_work_trigger(); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - - return true; - } - - return false; -} - static int mce_early_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -618,9 +597,7 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val, /* Emit the trace record: */ trace_mce_record(err); - set_bit(0, &mce_need_notify); - - mce_notify_irq(); + mce_work_trigger(); return NOTIFY_DONE; } @@ -1804,7 +1781,7 @@ static void mce_timer_fn(struct timer_list *t) * Alert userspace if needed. If we logged an MCE, reduce the polling * interval, otherwise increase the polling interval. */ - if (mce_notify_irq()) + if (!mce_gen_pool_empty()) iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); -- cgit v1.2.3 From 6d3790bc689de9f18fae01c21f02e7d6d425534c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 27 Apr 2026 18:25:03 -0700 Subject: KVM: selftests: Include sys/mman.h *and* linux/mman.h, via kvm_syscalls.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include both linux/mman.h (the kernel provided version) and sys/mman.h (the libc provided version) throughout KVM selftests, by way of kvm_syscalls.h (which should have been including sys/mman.h anyways). Pulling in the kernel's version fixes compilation errors with the guest_memfd test on older versions of libc due to a recent commit adding MADV_COLLAPSE testing. In file included from include/kvm_util.h:8, from guest_memfd_test.c:21: guest_memfd_test.c: In function ‘test_collapse’: guest_memfd_test.c:219:47: error: ‘MADV_COLLAPSE’ undeclared (first use in this function); did you mean ‘MADV_COLD’? 219 | TEST_ASSERT_EQ(madvise(mem, pmd_size, MADV_COLLAPSE), -1); | ^~~~~~~~~~~~~ include/test_util.h:62:16: note: in definition of macro ‘TEST_ASSERT_EQ’ 62 | typeof(a) __a = (a); \ | ^ guest_memfd_test.c:219:47: note: each undeclared identifier is reported only once for each function it appears in 219 | TEST_ASSERT_EQ(madvise(mem, pmd_size, MADV_COLLAPSE), -1); | ^~~~~~~~~~~~~ include/test_util.h:62:16: note: in definition of macro ‘TEST_ASSERT_EQ’ 62 | typeof(a) __a = (a); \ | ^ Route the includes through kvm_syscalls.h to try and avoid a future game of whack-a-mole, i.e. so that future expansion of test coverage doesn't run into the same problem. To discourage use of sys/mman.h, opportunistically include the kernel's version of mman.h in test_util.h as it only needs MAP_SHARED, i.e. only needs the full set of kernel defs, not the libc syscall wrappers. Fixes: 9830209b4ae8 ("KVM: selftests: Test MADV_COLLAPSE on guest_memfd") Reported-by: Rick Edgecombe Closes: https://lore.kernel.org/all/20260427204313.50741-1-rick.p.edgecombe@intel.com Link: https://patch.msgid.link/20260428012503.1213654-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/access_tracking_perf_test.c | 2 +- tools/testing/selftests/kvm/guest_memfd_test.c | 2 +- tools/testing/selftests/kvm/include/kvm_syscalls.h | 10 ++++++++++ tools/testing/selftests/kvm/include/test_util.h | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- tools/testing/selftests/kvm/memslot_perf_test.c | 2 +- tools/testing/selftests/kvm/s390/shared_zeropage_test.c | 3 +-- tools/testing/selftests/kvm/s390/tprot.c | 2 +- tools/testing/selftests/kvm/set_memory_region_test.c | 2 +- 9 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index e5bbdb5bbdc3..4415c94b2866 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -41,10 +41,10 @@ #include #include #include -#include #include #include +#include "kvm_syscalls.h" #include "kvm_util.h" #include "test_util.h" #include "memstress.h" diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 253e748c1d4a..832ef4dfb99f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -14,10 +14,10 @@ #include #include #include -#include #include #include +#include "kvm_syscalls.h" #include "kvm_util.h" #include "numaif.h" #include "test_util.h" diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h index 843c9904c46f..067a4c9cf452 100644 --- a/tools/testing/selftests/kvm/include/kvm_syscalls.h +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -2,8 +2,18 @@ #ifndef SELFTEST_KVM_SYSCALLS_H #define SELFTEST_KVM_SYSCALLS_H +/* + * Include both the kernel and libc versions of mman.h. The kernel provides + * the most up-to-date flags and definitions, while libc provides the syscall + * wrappers tests expect. + */ +#include + +#include #include +#include + #define MAP_ARGS0(m,...) #define MAP_ARGS1(m,t,a,...) m(t,a) #define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index d9b433b834f1..a56271c237ae 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -19,9 +19,9 @@ #include #include #include -#include #include "kselftest.h" +#include #include #define msecs_to_usecs(msec) ((msec) * 1000ULL) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 2a76eca7029d..e08967ef7b7b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -5,13 +5,13 @@ * Copyright (C) 2018, Google LLC. */ #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" #include #include -#include #include #include #include diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 3d02db371422..e977e979470f 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -23,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index a9e5a01200b8..478381e6f84e 100644 --- a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c @@ -4,11 +4,10 @@ * * Copyright (C) 2024, Red Hat, Inc. */ -#include - #include #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 8054d2b178f0..d86179827a18 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 2021 */ -#include #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 9b919a231c93..e639a9db51ee 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -8,11 +8,11 @@ #include #include #include -#include #include #include +#include #include #include -- cgit v1.2.3 From fff82ea9d900b6bbebc58d34b7a63789de1ad10d Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Tue, 5 May 2026 04:54:35 +0500 Subject: x86/virt: Silence RCU lockdep splat in emergency virt callback path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit x86_virt_invoke_kvm_emergency_callback() reaches rcu_dereference() through machine_crash_shutdown() with IRQs disabled but with RCU not necessarily watching the crashing CPU, which triggers a suspicious RCU usage splat on debug kernels (CONFIG_PROVE_RCU=y) during panic/kdump: WARNING: suspicious RCU usage arch/x86/virt/hw.c:52 suspicious rcu_dereference_check() usage! rcu_scheduler_active = 2, debug_locks = 1 1 lock held by tee/11119: #0: ffff8881fa32c440 (sb_writers#3){.+.+}-{0:0}, at: ksys_write Call Trace: dump_stack_lvl+0x84/0xd0 lockdep_rcu_suspicious.cold+0x37/0x8f x86_virt_invoke_kvm_emergency_callback+0x5f/0x70 x86_svm_emergency_disable_virtualization_cpu+0x2a/0x30 x86_virt_emergency_disable_virtualization_cpu+0x6b/0x90 native_machine_crash_shutdown+0x72/0x170 __crash_kexec+0x137/0x280 panic+0xce/0xd0 sysrq_handle_crash+0x1f/0x20 __handle_sysrq.cold+0x192/0x335 write_sysrq_trigger+0x8c/0xc0 proc_reg_write+0x1c3/0x3c0 vfs_write+0x1d0/0xf80 ksys_write+0x116/0x250 do_syscall_64+0x11c/0x1480 entry_SYSCALL_64_after_hwframe+0x76/0x7e A truly correct fix is non-trivial: the RCU usage genuinely is wrong in panic context (RCU may ignore the crashing CPU during synchronization), and a concurrent KVM module unload could in principle race with the callback read; see commit 2baa33a8ddd6 ("KVM: x86: Leave user-return notifier registered on reboot/shutdown") which notes that nothing prevents module unload during panic/reboot. However, the alternatives are worse: - smp_store_release()/smp_load_acquire() handles ordering but not liveness; the kernel still needs to keep the module text alive while the callback is in flight. - Taking a lock in the panic path is risky — any lock could be held by a CPU that has already been NMI'd to a halt. Use rcu_dereference_raw() to silence the splat and accept the vanishingly small remaining race. Panic context inherently cannot guarantee complete correctness; the goal here is to keep debug builds quiet on the kdump path so the splat doesn't obscure the actual kernel state being captured. Reproducible on a debug kernel (CONFIG_PROVE_LOCKING=y, CONFIG_PROVE_RCU=y) with kvm_amd or kvm_intel loaded by triggering kdump: echo c > /proc/sysrq-trigger Suggested-by: Sean Christopherson Fixes: 428afac5a8ea ("KVM: x86: Move bulk of emergency virtualizaton logic to virt subsystem") Signed-off-by: Mikhail Gavrilov Acked-by: Sean Christopherson Link: https://patch.msgid.link/20260504235435.90957-1-mikhail.v.gavrilov@gmail.com Signed-off-by: Sean Christopherson --- arch/x86/virt/hw.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/virt/hw.c b/arch/x86/virt/hw.c index f647557d38ac..7e9091c640be 100644 --- a/arch/x86/virt/hw.c +++ b/arch/x86/virt/hw.c @@ -49,7 +49,20 @@ static void x86_virt_invoke_kvm_emergency_callback(void) { cpu_emergency_virt_cb *kvm_callback; - kvm_callback = rcu_dereference(kvm_emergency_callback); + /* + * RCU may not be watching the crashing CPU here, so rcu_dereference() + * triggers a suspicious-RCU-usage splat. In principle, a concurrent + * KVM module unload could race with this read; see commit 2baa33a8ddd6 + * ("KVM: x86: Leave user-return notifier registered on reboot/shutdown") + * which notes that nothing prevents module unload during panic/reboot. + * + * However, taking a lock here would be riskier than the current race: + * the system is going down via NMI shootdown, and any lock could be + * held by an already-stopped CPU. Use rcu_dereference_raw() to silence + * the lockdep splat and accept the comically small remaining race; + * panic context inherently cannot guarantee complete correctness. + */ + kvm_callback = rcu_dereference_raw(kvm_emergency_callback); if (kvm_callback) kvm_callback(); } -- cgit v1.2.3 From 8fe2e698fce4a95a3ac2c25fe59832a3c22534c6 Mon Sep 17 00:00:00 2001 From: Lei Chen Date: Thu, 9 Apr 2026 22:22:26 +0800 Subject: KVM: x86: Rate-limit global clock updates on vCPU load commit 446fcce2a52b ("Revert "x86: kvm: rate-limit global clock updates"") dropped the rate limiting for KVM_REQ_GLOBAL_CLOCK_UPDATE. As a result, kvm_arch_vcpu_load() can queue global clock update requests every time a vCPU is scheduled when the master clock is disabled or when the vCPU is loaded for the first time. Restore the throttling with a per-VM ratelimit state and gate KVM_REQ_GLOBAL_CLOCK_UPDATE through __ratelimit(), so frequent vCPU scheduling does not generate a steady stream of redundant clock update requests. Fixes: 446fcce2a52b ("Revert "x86: kvm: rate-limit global clock updates"") Signed-off-by: Lei Chen Reported-by: Jaroslav Pulchart Closes: https://lore.kernel.org/all/CAK8fFZ5gY8_Mw2A=iZVFNVKQNrXQzVsn-HTd+Me9K6ZfmdgA+Q@mail.gmail.com/ Link: https://patch.msgid.link/20260409142226.2581-1-lei.chen@smartx.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c470e40a00aa..f14009f25a3b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1504,6 +1504,7 @@ struct kvm_arch { bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; + struct ratelimit_state kvmclock_update_rs; #ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a1b63c63d1a..e01d6984ed04 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5227,8 +5227,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * On a host with synchronized TSC, there is no need to update * kvmclock on vcpu->cpu migration */ - if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) - kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); + if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) { + if (__ratelimit(&vcpu->kvm->arch.kvmclock_update_rs)) + kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); + else + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + } + if (vcpu->cpu != cpu) kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu); vcpu->cpu = cpu; @@ -13366,6 +13371,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock); + ratelimit_state_init(&kvm->arch.kvmclock_update_rs, HZ, 10); + ratelimit_set_flags(&kvm->arch.kvmclock_update_rs, RATELIMIT_MSG_ON_RELEASE); kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); -- cgit v1.2.3 From 34065a5f3cf94886e59e2a8b5db00515f32d6cf2 Mon Sep 17 00:00:00 2001 From: Hisam Mehboob Date: Thu, 9 Apr 2026 20:38:47 +0500 Subject: KVM: selftests: Guard execinfo.h inclusion for non-glibc builds The backtrace() function and execinfo.h are GNU extensions available in glibc but not in non-glibc C libraries such as musl. Building KVM selftests with musl-gcc fails with: lib/assert.c:9:10: fatal error: execinfo.h: No such file or directory Fix this by guarding the inclusion of execinfo.h and the stack dumping logic under #ifdef __GLIBC__. For non-glibc builds, provide a local stub for test_dump_stack(). Suggested-by: Aqib Faruqui Suggested-by: Sean Christopherson Signed-off-by: Hisam Mehboob Link: https://patch.msgid.link/20260409153846.1502656-2-hisamshar@gmail.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/assert.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index b49690658c60..8be0d09ecf0f 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -6,11 +6,14 @@ */ #include "test_util.h" -#include + #include #include "kselftest.h" +#ifdef __GLIBC__ +#include + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) @@ -57,6 +60,9 @@ static void test_dump_stack(void) system(cmd); #pragma GCC diagnostic pop } +#else +static void test_dump_stack(void) {} +#endif static pid_t _gettid(void) { -- cgit v1.2.3 From 950953f774b3f69da6f413e045ef075e1f3da2df Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 May 2026 16:44:44 +0200 Subject: drm/gma500/oaktrail_hdmi: fix i2c adapter leak on setup Make sure to drop the reference taken to the I2C adapter (and its module) when setting up HDMI to allow the adapter to be deregistered. Fixes: 1b082ccf5901 ("gma500: Add Oaktrail support") Cc: stable@vger.kernel.org # 3.3 Signed-off-by: Johan Hovold Signed-off-by: Patrik Jakobsson Link: https://patch.msgid.link/20260508144446.59722-2-johan@kernel.org --- drivers/gpu/drm/gma500/oaktrail_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 58d7e191fd56..403d21cbb3a2 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -580,6 +580,7 @@ static int oaktrail_hdmi_get_modes(struct drm_connector *connector) } else { edid = (struct edid *)raw_edid; /* FIXME ? edid = drm_get_edid(connector, i2c_adap); */ + i2c_put_adapter(i2c_adap); } if (edid) { -- cgit v1.2.3 From 657a091ab6d01d0091b77660c75cfed573c9a53e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 May 2026 16:44:45 +0200 Subject: drm/gma500/oaktrail_lvds: fix hang on init failure The LVDS init code looks up an I2C adapter using i2c_get_adapter() and tries to read the EDID before falling back to allocating and registering its own adapter. The error handling does not separate these cases so on a late init failure it will try to deregister and free also an adapter that had previously been registered. Since i2c_get_adapter() takes another reference to the adapter, deregistration hangs indefinitely while waiting for the reference to be released. Fix this by only destroying adapters allocated during LVDS init on errors. Fixes: a57ebfc0b4da ("drm/gma500: Make oaktrail lvds use ddc adapter from drm_connector") Cc: stable@vger.kernel.org # 6.0 Cc: Patrik Jakobsson Signed-off-by: Johan Hovold Signed-off-by: Patrik Jakobsson Link: https://patch.msgid.link/20260508144446.59722-3-johan@kernel.org --- drivers/gpu/drm/gma500/oaktrail_lvds.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c index 884d324f0044..983cc60a1e69 100644 --- a/drivers/gpu/drm/gma500/oaktrail_lvds.c +++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c @@ -293,7 +293,7 @@ void oaktrail_lvds_init(struct drm_device *dev, { struct gma_encoder *gma_encoder; struct gma_connector *gma_connector; - struct gma_i2c_chan *ddc_bus; + struct gma_i2c_chan *ddc_bus = NULL; struct drm_connector *connector; struct drm_encoder *encoder; struct drm_psb_private *dev_priv = to_drm_psb_private(dev); @@ -421,7 +421,8 @@ out: err_unlock: mutex_unlock(&dev->mode_config.mutex); - gma_i2c_destroy(to_gma_i2c_chan(connector->ddc)); + if (!IS_ERR_OR_NULL(ddc_bus)) + gma_i2c_destroy(ddc_bus); drm_encoder_cleanup(encoder); err_connector_cleanup: drm_connector_cleanup(connector); -- cgit v1.2.3 From 84d1c9b416d54afe760ca4c378bd95c89261254c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 May 2026 16:44:46 +0200 Subject: drm/gma500/oaktrail_lvds: fix i2c adapter leaks on init The LVDS init code looks up an I2C adapter using i2c_get_adapter() and tries to read the EDID before falling back to allocating and registering its own adapter. Make sure to drop the references taken by i2c_get_adapter() when falling back to allocating an adapter as well as on late errors to allow the looked up adapter to be deregistered. Fixes: 1b082ccf5901 ("gma500: Add Oaktrail support") Cc: stable@vger.kernel.org # 3.3 Signed-off-by: Johan Hovold Signed-off-by: Patrik Jakobsson Link: https://patch.msgid.link/20260508144446.59722-4-johan@kernel.org --- drivers/gpu/drm/gma500/oaktrail_lvds.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c index 983cc60a1e69..e194d0cce067 100644 --- a/drivers/gpu/drm/gma500/oaktrail_lvds.c +++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c @@ -367,6 +367,8 @@ void oaktrail_lvds_init(struct drm_device *dev, if (edid == NULL && dev_priv->lpc_gpio_base) { ddc_bus = oaktrail_lvds_i2c_init(dev); if (!IS_ERR(ddc_bus)) { + if (i2c_adap) + i2c_put_adapter(i2c_adap); i2c_adap = &ddc_bus->base; edid = drm_get_edid(connector, i2c_adap); } @@ -423,6 +425,8 @@ err_unlock: mutex_unlock(&dev->mode_config.mutex); if (!IS_ERR_OR_NULL(ddc_bus)) gma_i2c_destroy(ddc_bus); + else if (i2c_adap) + i2c_put_adapter(i2c_adap); drm_encoder_cleanup(encoder); err_connector_cleanup: drm_connector_cleanup(connector); -- cgit v1.2.3 From 0b28000b64f40dd29a730507aa0447231960cfb8 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Thu, 7 May 2026 20:50:10 +0800 Subject: RDMA/nldev: Add mutual exclusion in nldev_dellink() We must serialize calls to nldev_dellink() or risk a crash as syzbot reported: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] Call Trace: udp_tunnel_sock_release+0x6d/0x80 net/ipv4/udp_tunnel_core.c:197 rxe_release_udp_tunnel drivers/infiniband/sw/rxe/rxe_net.c:294 [inline] rxe_sock_put drivers/infiniband/sw/rxe/rxe_net.c:639 [inline] rxe_net_del+0xfb/0x290 drivers/infiniband/sw/rxe/rxe_net.c:660 rxe_dellink+0x15/0x20 drivers/infiniband/sw/rxe/rxe.c:254 Fixes: a60e3f3d6fba ("RDMA/nldev: Add dellink function pointer") Reported-by: syzbot+d8f76778263ab65c2b21@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d8f76778263ab65c2b21 Tested-by: syzbot+d8f76778263ab65c2b21@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Link: https://patch.msgid.link/tencent_611BEB4B141B1A2526BAA3BBB2335F9E9108@qq.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 96c745d5bac4..5aaba2b9746b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -51,6 +51,7 @@ * a controlled QKEY. */ static bool privileged_qkey; +static DEFINE_MUTEX(nldev_dellink_mutex); typedef int (*res_fill_func_t)(struct sk_buff*, bool, struct rdma_restrack_entry*, uint32_t); @@ -1846,7 +1847,9 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, * implicitly scoped to the driver supporting dynamic link deletion like RXE. */ if (device->link_ops && device->link_ops->dellink) { + mutex_lock(&nldev_dellink_mutex); err = device->link_ops->dellink(device); + mutex_unlock(&nldev_dellink_mutex); if (err) return err; } -- cgit v1.2.3 From 0bf1b4dda2d0c89980eab816778722cf51aa404c Mon Sep 17 00:00:00 2001 From: Yi Lai Date: Thu, 7 May 2026 20:51:06 +0800 Subject: selftests/rdma: explicitly skip tests when required modules are missing Currently, the rdma rxe selftests fail with an exit code of 1 when required kernel modules are not present. This causes spurious failures in environments where these modules might not be compiled or available. Include the standard kselftest 'ktap_helpers.sh' and replace the hardcoded error exits with '$KSFT_SKIP'. This ensures the tests are properly marked as skipped rather than failed. Fixes: e01027cab38a ("RDMA/rxe: Add testcase for net namespace rxe") Signed-off-by: Yi Lai Link: https://patch.msgid.link/20260507125106.3114167-1-yi1.lai@intel.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- tools/testing/selftests/rdma/rxe_ipv6.sh | 6 ++++-- tools/testing/selftests/rdma/rxe_rping_between_netns.sh | 7 +++++++ tools/testing/selftests/rdma/rxe_socket_with_netns.sh | 6 ++++++ tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh | 6 ++++-- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/rdma/rxe_ipv6.sh b/tools/testing/selftests/rdma/rxe_ipv6.sh index b7059bfd6d7c..32dad687a044 100755 --- a/tools/testing/selftests/rdma/rxe_ipv6.sh +++ b/tools/testing/selftests/rdma/rxe_ipv6.sh @@ -8,6 +8,8 @@ RXE_NAME="rxe6" PORT=4791 IP6_ADDR="2001:db8::1/64" +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # Cleanup function to run on exit (even on failure) @@ -21,8 +23,8 @@ trap cleanup EXIT # 1. Prerequisites check for mod in tun veth rdma_rxe; do if ! modinfo "$mod" >/dev/null 2>&1; then - echo "Error: Kernel module '$mod' not found." - exit 1 + echo "SKIP: Kernel module '$mod' not found." >&2 + exit $KSFT_SKIP fi done diff --git a/tools/testing/selftests/rdma/rxe_rping_between_netns.sh b/tools/testing/selftests/rdma/rxe_rping_between_netns.sh index e5b876f58c6e..e7554fbb8951 100755 --- a/tools/testing/selftests/rdma/rxe_rping_between_netns.sh +++ b/tools/testing/selftests/rdma/rxe_rping_between_netns.sh @@ -8,6 +8,8 @@ IP_A="1.1.1.1" IP_B="1.1.1.2" PORT=4791 +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # --- Cleanup Routine --- @@ -27,6 +29,11 @@ if [[ $EUID -ne 0 ]]; then exit 1 fi +if ! modinfo rdma_rxe >/dev/null 2>&1; then + echo "SKIP: Kernel module 'rdma_rxe' not found." >&2 + exit $KSFT_SKIP +fi + modprobe rdma_rxe || { echo "Failed to load rdma_rxe"; exit 1; } # --- Setup Network Topology --- diff --git a/tools/testing/selftests/rdma/rxe_socket_with_netns.sh b/tools/testing/selftests/rdma/rxe_socket_with_netns.sh index 002e5098f751..9478657c02c1 100755 --- a/tools/testing/selftests/rdma/rxe_socket_with_netns.sh +++ b/tools/testing/selftests/rdma/rxe_socket_with_netns.sh @@ -4,6 +4,8 @@ PORT=4791 MODS=("tun" "rdma_rxe") +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # --- Helper: Cleanup Routine --- @@ -26,6 +28,10 @@ if [[ $EUID -ne 0 ]]; then fi for m in "${MODS[@]}"; do + if ! modinfo "$m" >/dev/null 2>&1; then + echo "SKIP: Kernel module '$m' not found." >&2 + exit $KSFT_SKIP + fi modprobe "$m" || { echo "Error: Failed to load $m"; exit 1; } done diff --git a/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh b/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh index 021ca451499d..8c18cea7535c 100755 --- a/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh +++ b/tools/testing/selftests/rdma/rxe_test_NETDEV_UNREGISTER.sh @@ -5,6 +5,8 @@ DEV_NAME="tun0" RXE_NAME="rxe0" RDMA_PORT=4791 +source "$(dirname "$0")/../kselftest/ktap_helpers.sh" + exec > /dev/null # --- Cleanup Routine --- @@ -19,8 +21,8 @@ trap cleanup EXIT # 1. Dependency Check if ! modinfo rdma_rxe >/dev/null 2>&1; then - echo "Error: rdma_rxe module not found." - exit 1 + echo "SKIP: rdma_rxe module not found." >&2 + exit $KSFT_SKIP fi modprobe rdma_rxe -- cgit v1.2.3 From f6b079629becfa977f9c51fe53ad2e6dcc55ef44 Mon Sep 17 00:00:00 2001 From: Lord Ulf Henrik Holmberg Date: Sat, 9 May 2026 10:40:11 +0200 Subject: RDMA/bnxt_re: zero shared page before exposing to userspace bnxt_re_alloc_ucontext() allocates uctx->shpg via __get_free_page(GFP_KERNEL). The buddy allocator does not zero pages without __GFP_ZERO, so the page contains stale kernel data from whatever object most recently freed it. The page is then mapped into userspace via vm_insert_page() under BNXT_RE_MMAP_SH_PAGE in bnxt_re_mmap(). The driver only ever writes 4 bytes (a u32 AVID) at offset BNXT_RE_AVID_OFFT (0x10) inside bnxt_re_create_ah(); the remaining 4092 bytes of the page are exposed to userspace unsanitised, leaking kernel memory contents. Any user with access to /dev/infiniband/uverbsX on a host with a bnxt_re device (typically rdma group membership) can read this data via a single mmap() at pgoff 0 after IB_USER_VERBS_CMD_GET_CONTEXT. Other shared pages in the same file already use get_zeroed_page() correctly: drivers/infiniband/hw/bnxt_re/ib_verbs.c srq->uctx_srq_page = (void *)get_zeroed_page(GFP_KERNEL); cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL); uctx->shpg is the only outlier. Bring it in line with the existing convention by switching to get_zeroed_page(). Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Lord Ulf Henrik Holmberg Link: https://patch.msgid.link/20260509084011.11971-1-pomzm67@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 7ed294516b7e..365ec2767d25 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4638,7 +4638,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) uctx->rdev = rdev; - uctx->shpg = (void *)__get_free_page(GFP_KERNEL); + uctx->shpg = (void *)get_zeroed_page(GFP_KERNEL); if (!uctx->shpg) { rc = -ENOMEM; goto fail; -- cgit v1.2.3 From 7d8f3158a51cb40fc710d2a781549141a139b796 Mon Sep 17 00:00:00 2001 From: Yu Miao Date: Wed, 13 May 2026 10:39:07 +0800 Subject: selftests/cgroup: Fix error path leaks in test_percpu_basic When cg_name_indexed() returns NULL partway through the child creation loop, the code returned -1 without running cleanup_children and cleanup. That left the `parent` pathname allocation unreleased and did not remove child cgroup directories already created under the parent. Fix by jumping to cleanup_children instead of returning. When cg_create() fails, `child` (the pathname from cg_name_indexed()) was not freed before cleanup_children. Fix by freeing `child` before branching to cleanup_children. Fixes: 90631e1dea55 ("kselftests: cgroup: add perpcu memory accounting test") Signed-off-by: Yu Miao Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_kmem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index eeabd34bf083..12f59925500b 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -368,11 +368,15 @@ static int test_percpu_basic(const char *root) for (i = 0; i < 1000; i++) { child = cg_name_indexed(parent, "child", i); - if (!child) - return -1; + if (!child) { + ret = -1; + goto cleanup_children; + } - if (cg_create(child)) + if (cg_create(child)) { + free(child); goto cleanup_children; + } free(child); } -- cgit v1.2.3 From 345f40166694e60db6d5cf02233814bb27ac5dec Mon Sep 17 00:00:00 2001 From: sunshaojie Date: Wed, 13 May 2026 18:37:38 +0800 Subject: cgroup/cpuset: Return only actually allocated CPUs during partition invalidation In update_parent_effective_cpumask() with partcmd_invalidate, the CPUs to return to the parent are computed as: adding = cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); where xcpus = user_xcpus(cs) which returns cs->exclusive_cpus (if set) or cs->cpus_allowed. When exclusive_cpus is not set, user_xcpus(cs) can contain CPUs that were never actually granted to the partition due to sibling exclusion in compute_excpus(). Consequently, the invalidation may return CPUs to the parent that remain in use by sibling partitions, causing overlapping effective_cpus and triggering the WARN_ON_ONCE(1) in generate_sched_domains(). Use cs->effective_xcpus instead, which reflects the CPUs actually granted to this partition. Reproducer (on a 4-CPU machine): cd /sys/fs/cgroup mkdir a1 b1 # a1 becomes partition root with CPUs 0-1 echo "0-1" > a1/cpuset.cpus echo "root" > a1/cpuset.cpus.partition # b1 becomes partition root with CPUs 1-2, but sibling exclusion # reduces its effective_xcpus to CPU 2 only echo "1-2" > b1/cpuset.cpus echo "root" > b1/cpuset.cpus.partition # b1 changes cpus_allowed to 0-1 -> partition invalidation echo "0-1" > b1/cpuset.cpus # Expected: CPUs 2-3 (only CPU 2 returned from b1) # Actual: CPUs 1-3 (CPU 0-1 returned, overlapping with a1) cat cpuset.cpus.effective dmesg will also show a WARNING from generate_sched_domains() reporting overlapping partition root effective_cpus. Fixes: 2a3602030d80 ("cgroup/cpuset: Don't invalidate sibling partitions on cpuset.cpus conflict") Cc: stable@vger.kernel.org # v7.0+ Signed-off-by: sunshaojie Tested-by: Chen Ridong Reviewed-by: Chen Ridong Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e84e801e22cf..5c33ab20cc20 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1718,7 +1718,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, */ if (is_partition_valid(parent)) adding = cpumask_and(tmp->addmask, - xcpus, parent->effective_xcpus); + cs->effective_xcpus, + parent->effective_xcpus); if (old_prs > 0) new_prs = -old_prs; -- cgit v1.2.3 From d6a2d7b04b5a093021a7a0e2e69e9d5237dfa8cc Mon Sep 17 00:00:00 2001 From: Nicholas Carlini Date: Mon, 11 May 2026 18:02:16 +0000 Subject: io-wq: check that the predecessor is hashed in io_wq_remove_pending() io_wq_remove_pending() needs to fix up wq->hash_tail[] if the cancelled work was the tail of its hash bucket. When doing this, it checks whether the preceding entry in acct->work_list has the same hash value, but never checks that the predecessor is hashed at all. io_get_work_hash() is simply atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT, and the hash bits are never set for non-hashed work, so it returns 0. Thus, when a hashed bucket-0 work is cancelled while a non-hashed work is its list predecessor, the check spuriously passes and a pointer to the non-hashed io_kiocb is stored in wq->hash_tail[0]. Because non-hashed work is dequeued via the fast path in io_get_next_work(), which never touches hash_tail[], the stale pointer is never cleared. Therefore, after the non-hashed io_kiocb completes and is freed back to req_cachep, wq->hash_tail[0] is a dangling pointer. The io_wq is per-task (tctx->io_wq) and survives ring open/close, so the dangling pointer persists for the lifetime of the task; the next hashed bucket-0 enqueue dereferences it in io_wq_insert_work() and wq_list_add_after() writes through freed memory. Add the missing io_wq_is_hashed() check so a non-hashed predecessor never inherits a hash_tail[] slot. Cc: stable@vger.kernel.org Fixes: 204361a77f40 ("io-wq: fix hang after cancelling pending hashed work") Signed-off-by: Nicholas Carlini Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 7a9f94a0ce6f..8cc7b47d3089 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1124,7 +1124,8 @@ static inline void io_wq_remove_pending(struct io_wq *wq, if (io_wq_is_hashed(work) && work == wq->hash_tail[hash]) { if (prev) prev_work = container_of(prev, struct io_wq_work, list); - if (prev_work && io_get_work_hash(prev_work) == hash) + if (prev_work && io_wq_is_hashed(prev_work) && + io_get_work_hash(prev_work) == hash) wq->hash_tail[hash] = prev_work; else wq->hash_tail[hash] = NULL; -- cgit v1.2.3 From ee047fc7a2da90554410128195058c409a391d43 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 24 Apr 2026 14:41:13 -0700 Subject: Documentation: intel_pstate: Fix description of asymmetric packing with SMT Patchset [1], including commits 046a5a95c3b0 ("x86/sched/itmt: Give all SMT siblings of a core the same priority") 995998ebdebd ("x86/sched: Remove SD_ASYM_PACKING from the SMT domain flags") overhauled asym_packing handling in the scheduler on x86 hybrid processors with SMT. It removed SD_ASYM_PACKING from the x86 SMT scheduling domain and made all SMT siblings of a core share the same priority. As a result, asym_packing operates only across physical cores, spreading tasks among them and only using idle SMT siblings once all physical cores are busy. Fix the documentation to reflect this behavior. Fixes: f20af84c29b2 ("cpufreq: intel_pstate: Document hybrid processor support") Link: https://lore.kernel.org/r/20230406203148.19182-1-ricardo.neri-calderon@linux.intel.com [1] Signed-off-by: Ricardo Neri [ rjw: Changelog edits ] Link: https://patch.msgid.link/20260424-rneri-fix-intel-pstate-doc-smt-asym-packing-v1-1-317bf7d5c362@linux.intel.com Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_pstate.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index fde967b0c2e0..25fe5d88fea6 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -355,11 +355,12 @@ HyperThreading (HT) in the context of Intel processors, is enabled on at least one core, ``intel_pstate`` assigns performance-based priorities to CPUs. Namely, the priority of a given CPU reflects its highest HWP performance level which causes the CPU scheduler to generally prefer more performant CPUs, so the less -performant CPUs are used when the other ones are fully loaded. However, SMT -siblings (that is, logical CPUs sharing one physical core) are treated in a -special way such that if one of them is in use, the effective priority of the -other ones is lowered below the priorities of the CPUs located in the other -physical cores. +performant CPUs are used when the other ones are fully loaded. SMT siblings +(that is, logical CPUs sharing one physical core) are given the same priority. +The scheduler can pull tasks from lower-priority cores and place them on any +sibling. Since the scheduler spreads tasks among physical cores, tasks will be +placed on the SMT siblings of physical cores only after all physical cores are +busy. This approach maximizes performance in the majority of cases, but unfortunately it also leads to excessive energy usage in some important scenarios, like video -- cgit v1.2.3 From 0e7c710478b3089cdfe8669347f77b163e836c4f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 21:20:30 +0200 Subject: cpufreq: intel_pstate: Use correct scaling factor on Raptor Lake-E Raptor Lake-E has the same processor ID as Raptor Lake-S, so there is an entry in intel_hybrid_scaling_factor[] for it. It does not contain E-cores though and hybrid_get_cpu_type() returns 0 for its P-cores, so they get the default "core" scaling factor. However, the original Raptor Lake scaling factor for P-cores still needs to be used for mapping the HWP performance levels of the P-cores in Raptor Lake-E to frequency, as though they were part of a real hybrid system. To address this, update hwp_get_cpu_scaling() to return hybrid_scaling_factor, which is the P-core scaling factor retrieved from intel_hybrid_scaling_factor[], for all CPUs that are not enumerated as E-cores. Fixes: 9b18d536b124 ("cpufreq: intel_pstate: Use CPPC to get scaling factors") Link: https://lore.kernel.org/all/20260511235328.2018458-1-srinivas.pandruvada@linux.intel.com/ Reported-by: Henry Tseng Closes: https://lore.kernel.org/linux-pm/20260508063032.3248602-1-henrytseng@qnap.com/ Signed-off-by: Rafael J. Wysocki Cc: All applicable Link: https://patch.msgid.link/4523296.ejJDZkT8p0@rafael.j.wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1292da53e5fc..978e2b604ac8 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2279,7 +2279,7 @@ static int hwp_get_cpu_scaling(int cpu) * Return the hybrid scaling factor for P-cores and use the * default core scaling for E-cores. */ - if (hybrid_get_cpu_type(cpu) == INTEL_CPU_TYPE_CORE) + if (hybrid_get_cpu_type(cpu) != INTEL_CPU_TYPE_ATOM) return hybrid_scaling_factor; return core_get_scaling(); -- cgit v1.2.3 From 85cffdb21bc1a928f4c89f88dcdf42a460b04e70 Mon Sep 17 00:00:00 2001 From: Henry Tseng Date: Wed, 13 May 2026 18:28:46 +0800 Subject: cpufreq: intel_pstate: Use HYBRID_SCALING_FACTOR_ADL for Bartlett Lake Bartlett Lake P-core only SKUs (e.g. Intel Core 9 273PE) do not report X86_FEATURE_HYBRID_CPU and are not in intel_hybrid_scaling_factor[]. In hwp_get_cpu_scaling(), the non-hybrid fallback then applies core_get_scaling() (100000), producing cpuinfo_max_freq values that exceed the documented Max Turbo Frequency: intel_pstate: CPU0: PERF_CTL turbo = 57 intel_pstate: CPU0: HWP_CAP guaranteed = 30 intel_pstate: CPU0: HWP_CAP highest = 70 intel_pstate: CPU0: HWP-to-frequency scaling factor: 100000 intel_pstate: set_policy cpuinfo.max 7000000 policy->max 7000000 ... intel_pstate: CPU12: PERF_CTL turbo = 57 intel_pstate: CPU12: HWP_CAP guaranteed = 30 intel_pstate: CPU12: HWP_CAP highest = 73 intel_pstate: CPU12: HWP-to-frequency scaling factor: 100000 intel_pstate: set_policy cpuinfo.max 7300000 policy->max 7300000 ... Per the Intel datasheet [1], the Intel Core 9 273PE specifies: Performance-cores: 12 Efficient-cores: 0 Max Turbo Frequency: 5.7 GHz Intel Thermal Velocity Boost Frequency: 5.7 GHz Intel Turbo Boost Max Technology 3.0 Frequency: 5.6 GHz Performance-core Max Turbo Frequency: 5.4 GHz Performance-core Base Frequency: 2.3 GHz Bartlett Lake P-cores are Raptor Cove cores, per commit d466304c4322 ("x86/cpu: Add CPU model number for Bartlett Lake CPUs with Raptor Cove cores"). The Alder Lake and Raptor Lake P-core entries in intel_hybrid_scaling_factor[] use HYBRID_SCALING_FACTOR_ADL (78741). The same factor applies to Bartlett Lake. Add Bartlett Lake to intel_hybrid_scaling_factor[] with HYBRID_SCALING_FACTOR_ADL so HWP performance levels map to the correct CPU frequencies matching the datasheet's Max Turbo Frequency. Link: https://www.intel.com/content/www/us/en/products/sku/245717/intel-core-9-processor-273pe-36m-cache-up-to-5-70-ghz/specifications.html [1] Signed-off-by: Henry Tseng [ rjw: Changelog tweak ] Link: https://patch.msgid.link/20260513102847.75179-1-henrytseng@qnap.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 978e2b604ac8..1f093e346430 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3734,6 +3734,7 @@ static const struct x86_cpu_id intel_hybrid_scaling_factor[] = { X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL), + X86_MATCH_VFM(INTEL_BARTLETTLAKE, HYBRID_SCALING_FACTOR_ADL), X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL), X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL), {} -- cgit v1.2.3 From 32d5019ed3b6ff4439cb075fb275f655c8a2059c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2026 07:01:47 +0200 Subject: block: pass a minsize argument to bio_iov_iter_bounce When bouncing for block size > PAGE_SIZE file systems that require file system block size alignment (e.g. zoned XFS), the bio needs to be big enough to fit an entire block. Fixes: 8dd5e7c75d7b ("block: add helpers to bounce buffer an iov_iter into bios") Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260507050153.1298375-2-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 23 +++++++++++++---------- fs/iomap/direct-io.c | 2 +- include/linux/bio.h | 3 ++- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/block/bio.c b/block/bio.c index b8972dba68a0..f3e5d8bea08c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1279,11 +1279,12 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, return bio_iov_iter_align_down(bio, iter, len_align_mask); } -static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size) +static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size, + size_t minsize) { struct folio *folio; - while (*size > PAGE_SIZE) { + while (*size > minsize) { folio = folio_alloc(gfp | __GFP_NORETRY, get_order(*size)); if (folio) return folio; @@ -1307,7 +1308,7 @@ static void bio_free_folios(struct bio *bio) } static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter, - size_t maxlen) + size_t maxlen, size_t minsize) { size_t total_len = min(maxlen, iov_iter_count(iter)); @@ -1322,13 +1323,13 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter, size_t this_len = min(total_len, SZ_1M); struct folio *folio; - if (this_len > PAGE_SIZE * 2) + if (this_len > minsize * 2) this_len = rounddown_pow_of_two(this_len); if (bio->bi_iter.bi_size > BIO_MAX_SIZE - this_len) break; - folio = folio_alloc_greedy(GFP_KERNEL, &this_len); + folio = folio_alloc_greedy(GFP_KERNEL, &this_len, minsize); if (!folio) break; bio_add_folio_nofail(bio, folio, this_len, 0); @@ -1348,12 +1349,12 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter, } static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter, - size_t maxlen) + size_t maxlen, size_t minsize) { size_t len = min3(iov_iter_count(iter), maxlen, SZ_1M); struct folio *folio; - folio = folio_alloc_greedy(GFP_KERNEL, &len); + folio = folio_alloc_greedy(GFP_KERNEL, &len, minsize); if (!folio) return -ENOMEM; @@ -1390,6 +1391,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter, * @bio: bio to send * @iter: iter to read from / write into * @maxlen: maximum size to bounce + * @minsize: minimum folio allocation size * * Helper for direct I/O implementations that need to bounce buffer because * we need to checksum the data or perform other operations that require @@ -1397,11 +1399,12 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter, * copies the data into it. Needs to be paired with bio_iov_iter_unbounce() * called on completion. */ -int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen) +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen, + size_t minsize) { if (op_is_write(bio_op(bio))) - return bio_iov_iter_bounce_write(bio, iter, maxlen); - return bio_iov_iter_bounce_read(bio, iter, maxlen); + return bio_iov_iter_bounce_write(bio, iter, maxlen, minsize); + return bio_iov_iter_bounce_read(bio, iter, maxlen, minsize); } static void bvec_unpin(struct bio_vec *bv, bool mark_dirty) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b0a6549b3848..b36ee619cdcd 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -355,7 +355,7 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter, if (dio->flags & IOMAP_DIO_BOUNCE) ret = bio_iov_iter_bounce(bio, dio->submit.iter, - iomap_max_bio_size(&iter->iomap)); + iomap_max_bio_size(&iter->iomap), alignment); else ret = bio_iov_iter_get_pages(bio, dio->submit.iter, alignment - 1); diff --git a/include/linux/bio.h b/include/linux/bio.h index 97d747320b35..dc17780d6c1e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -475,7 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen); +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen, + size_t minsize); void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, -- cgit v1.2.3 From e7b8b3c5b2a65595d506ffedafac66f0a11fbdc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2026 07:01:48 +0200 Subject: block: align down bounces bios Just like for the extract user pages path, we need to align down the size to the supported boundary. Fixes: 8dd5e7c75d7b ("block: add helpers to bounce buffer an iov_iter into bios") Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260507050153.1298375-3-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bio.c b/block/bio.c index f3e5d8bea08c..5f10900b3f42 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1345,7 +1345,7 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter, if (!bio->bi_iter.bi_size) return -ENOMEM; - return 0; + return bio_iov_iter_align_down(bio, iter, minsize - 1); } static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter, @@ -1383,7 +1383,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter, bvec_set_folio(&bio->bi_io_vec[0], folio, bio->bi_iter.bi_size, 0); if (iov_iter_extract_will_pin(iter)) bio_set_flag(bio, BIO_PAGE_PINNED); - return 0; + return bio_iov_iter_align_down(bio, iter, minsize - 1); } /** -- cgit v1.2.3 From c64a647c84f30be368404f50f9052ed6c75c0f17 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 7 May 2026 08:35:46 -0600 Subject: vfio/pci: fix dma-buf kref underflow after revoke MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vfio_pci_dma_buf_move(revoked=true) and vfio_pci_dma_buf_cleanup() ran the same drain sequence: set priv->revoked, invalidate mappings, wait for fences, drop the registered kref, wait for completion. When the VFIO device fd was closed after PCI_COMMAND_MEMORY had been cleared, both ran in turn -- the second kref_put underflowed and the subsequent wait_for_completion() blocked on a completion that the first run had already consumed: refcount_t: underflow; use-after-free. WARNING: lib/refcount.c:28 at refcount_warn_saturate+0x59/0x90 Call Trace: vfio_pci_dma_buf_cleanup+0x163/0x168 [vfio_pci_core] vfio_pci_core_close_device+0x67/0xe0 [vfio_pci_core] vfio_df_close+0x4c/0x80 [vfio] vfio_df_group_close+0x36/0x80 [vfio] vfio_device_fops_release+0x21/0x40 [vfio] __fput+0xe6/0x2b0 __x64_sys_close+0x3d/0x80 Collapse the duplication: vfio_pci_dma_buf_cleanup() now delegates the drain to vfio_pci_dma_buf_move(true), which is idempotent for already-revoked dma-bufs. cleanup retains only list removal and the device registration drop; the dma_resv_lock that bracketed those is dropped along with the in-line drain that required it, memory_lock continues to protect them. Re-arm the kref and the completion at the end of move()'s revoke branch so post-revoke state matches post-creation (kref == 1, completion ready). This keeps cleanup's call into move() a no-op when revoke already ran, and replaces the explicit kref_init() that the un-revoke branch used to perform for the un-revoke -> remap path. Fixes: 1a8a5227f229 ("vfio: Wait for dma-buf invalidation to complete") Reported-by: Joonas Kylmälä Closes: https://lore.kernel.org/all/GVXPR02MB12019AA6014F27EF5D773E89BFB372@GVXPR02MB12019.eurprd02.prod.outlook.com/ Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Reviewed-by: Leon Romanovsky Signed-off-by: Alex Williamson Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20260507143548.1018405-1-alex.williamson@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_dmabuf.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index f87fd32e4a01..fdc22e8b4656 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -354,19 +354,18 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) if (revoked) { kref_put(&priv->kref, vfio_pci_dma_buf_done); wait_for_completion(&priv->comp); - } else { /* - * Kref is initialize again, because when revoke - * was performed the reference counter was decreased - * to zero to trigger completion. + * Re-arm the registered kref reference and the + * completion so the post-revoke state matches the + * post-creation state. An un-revoke followed by a + * new mapping needs the kref to be non-zero before + * kref_get(), and vfio_pci_dma_buf_cleanup() + * delegates its drain back through this revoke + * path on a possibly-already-revoked dma-buf. */ kref_init(&priv->kref); - /* - * There is no need to wait as no mapping was - * performed when the previous status was - * priv->revoked == true. - */ reinit_completion(&priv->comp); + } else { dma_resv_lock(priv->dmabuf->resv, NULL); priv->revoked = false; dma_resv_unlock(priv->dmabuf->resv); @@ -382,21 +381,22 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev) struct vfio_pci_dma_buf *tmp; down_write(&vdev->memory_lock); + + /* + * Drain any active mappings via the revoke path. The move is + * idempotent for dma-bufs already in the revoked state and + * leaves every priv with the kref re-armed and the completion + * ready, so cleanup itself does not need to participate in kref + * bookkeeping. + */ + vfio_pci_dma_buf_move(vdev, true); + list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) { if (!get_file_active(&priv->dmabuf->file)) continue; - dma_resv_lock(priv->dmabuf->resv, NULL); list_del_init(&priv->dmabufs_elm); priv->vdev = NULL; - priv->revoked = true; - dma_buf_invalidate_mappings(priv->dmabuf); - dma_resv_wait_timeout(priv->dmabuf->resv, - DMA_RESV_USAGE_BOOKKEEP, false, - MAX_SCHEDULE_TIMEOUT); - dma_resv_unlock(priv->dmabuf->resv); - kref_put(&priv->kref, vfio_pci_dma_buf_done); - wait_for_completion(&priv->comp); vfio_device_put_registration(&vdev->vdev); fput(priv->dmabuf->file); } -- cgit v1.2.3 From 6ae315d37924435516d697ea7dde0b799a5928e0 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 13 May 2026 13:24:38 +0200 Subject: sched_ext: Use HK_TYPE_DOMAIN_BOOT to detect isolcpus= domain isolation scx_enable() refuses to attach a BPF scheduler when isolcpus=domain is in effect by comparing housekeeping_cpumask(HK_TYPE_DOMAIN) against cpu_possible_mask. Since commit 27c3a5967f05 ("sched/isolation: Convert housekeeping cpumasks to rcu pointers"), HK_TYPE_DOMAIN's cpumask is RCU protected and dereferencing it requires either RCU read lock, the cpu_hotplug write lock, or the cpuset lock; scx_enable() holds none of these, so booting with isolcpus=domain and attaching any BPF scheduler triggers the following lockdep splat: ============================= WARNING: suspicious RCU usage ----------------------------- kernel/sched/isolation.c:60 suspicious rcu_dereference_check() usage! 1 lock held by scx_flash/281: #0: ffffffff8379fce0 (update_mutex){+.+.}-{4:4}, at: bpf_struct_ops_link_create+0x134/0x1c0 Call Trace: dump_stack_lvl+0x6f/0xb0 lockdep_rcu_suspicious.cold+0x37/0x70 housekeeping_cpumask+0xcd/0xe0 scx_enable.isra.0+0x17/0x120 bpf_scx_reg+0x5e/0x80 bpf_struct_ops_link_create+0x151/0x1c0 __sys_bpf+0x1e4b/0x33c0 __x64_sys_bpf+0x21/0x30 do_syscall_64+0x117/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f In addition, commit 03ff73510169 ("cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset") made HK_TYPE_DOMAIN include cpuset isolated partitions as well, which means the current check also rejects BPF schedulers when a cpuset partition is active. That contradicts the original intent of commit 9f391f94a173 ("sched_ext: Disallow loading BPF scheduler if isolcpus= domain isolation is in effect"), which explicitly noted that cpuset partitions are honored through per-task cpumasks and should not be rejected. Switch to housekeeping_enabled(HK_TYPE_DOMAIN_BOOT), which reads only the housekeeping flag bit (no RCU dereference) and reflects exactly the boot-time isolcpus= configuration that the error message refers to. Fixes: 27c3a5967f05 ("sched/isolation: Convert housekeeping cpumasks to rcu pointers") Cc: stable@vger.kernel.org # v7.0+ Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo Acked-by: Frederic Weisbecker --- kernel/sched/ext.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 23f7b3f63b09..a6d0a93d8174 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -7415,8 +7415,7 @@ static s32 scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) static DEFINE_MUTEX(helper_mutex); struct scx_enable_cmd cmd; - if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), - cpu_possible_mask)) { + if (housekeeping_enabled(HK_TYPE_DOMAIN_BOOT)) { pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n"); return -EINVAL; } -- cgit v1.2.3 From df733ddc263dbe5f471e7c80c8b669532f56bf76 Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Mon, 11 May 2026 07:46:42 -0700 Subject: vfio/pci: Make VFIO_PCI_OFFSET_TO_INDEX() return unsigned VFIO_PCI_OFFSET_TO_INDEX() is used in several places with a signed parameter (e.g. loff_t). Because it makes no sense for a BAR/resource index to be negative, enforce this in the macro. This fixes at least one current issue, where vfio_pci_ioeventfd() uses this macro with an unvalidated signed loff_t returned into a signed type, leading to a possible negative array access. This instance does test against an out-of-bounds positive value, so treating the index as unsigned fixes this issue. Fixes: 89e1f7d4c66d8 ("vfio: Add PCI device driver") Signed-off-by: Matt Evans Link: https://lore.kernel.org/r/20260511144642.2926799-1-mattev@meta.com Signed-off-by: Alex Williamson --- include/linux/vfio_pci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 2ebba746c18f..89165b769e5c 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -21,7 +21,7 @@ #define VFIO_PCI_CORE_H #define VFIO_PCI_OFFSET_SHIFT 40 -#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_TO_INDEX(off) ((u64)(off) >> VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) -- cgit v1.2.3 From 46e351e84853dda726072bb3d38ba7bd63e7532b Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Sat, 14 Mar 2026 04:14:50 +0000 Subject: drm/msm: Fix GMEM_BASE for A650 Commit dc220915ddb2 ("drm/msm: Fix GMEM_BASE for gen8") changed the GMEM_BASE check from adreno_is_a650_family() & adreno_is_a740_family() to family >= ADRENO_6XX_GEN4. This inadvertently excluded A650 (ADRENO_6XX_GEN3), causing it to report an incorrect GMEM_BASE which results in severe rendering corruption. Update check to also include ADRENO_6XX_GEN3 to fix A650. Fixes: dc220915ddb2 ("drm/msm: Fix GMEM_BASE for gen8") Signed-off-by: Alexander Koskovich Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/711880/ Message-ID: <20260314-fix-gmem-base-a650-v1-1-3308f60cf74c@pm.me> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 66f80f2d12f9..a812a4590cc0 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -376,7 +376,7 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = adreno_gpu->info->gmem; return 0; case MSM_PARAM_GMEM_BASE: - if (adreno_gpu->info->family >= ADRENO_6XX_GEN4) + if (adreno_gpu->info->family >= ADRENO_6XX_GEN3) *value = 0; else *value = 0x100000; -- cgit v1.2.3 From e64bca63647db1d5518198d6c5ca2dbcc66b182b Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Sat, 24 Jan 2026 00:37:38 +0800 Subject: drm/msm/adreno: Fix a reference leak in a6xx_gpu_init() In a6xx_gpu_init(), node is obtained via of_parse_phandle(). While there was a manual of_node_put() at the end of the common path, several early error returns would bypass this call, resulting in a reference leak. Fix this by using the __free(device_node) cleanup handler to release the reference when the variable goes out of scope. Fixes: 5a903a44a984 ("drm/msm/a6xx: Introduce GMU wrapper support") Signed-off-by: Felix Gu Patchwork: https://patchwork.freedesktop.org/patch/700661/ Message-ID: <20260124-a6xx_gpu-v1-1-fa0c8b2dcfb1@gmail.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 615509c8917e..1e455391fbac 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -2621,7 +2621,6 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) struct platform_device *pdev = priv->gpu_pdev; struct adreno_platform_config *config = pdev->dev.platform_data; const struct adreno_info *info = config->info; - struct device_node *node; struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; @@ -2643,7 +2642,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = NULL; /* Check if there is a GMU phandle and set it up */ - node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); + struct device_node *node __free(device_node) = + of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); /* FIXME: How do we gracefully handle this? */ BUG_ON(!node); @@ -2690,7 +2690,6 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); else ret = a6xx_gmu_init(a6xx_gpu, node); - of_node_put(node); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); return ERR_PTR(ret); -- cgit v1.2.3 From af92ee994cc7f7e83a41c2025f32257a2f82a7ef Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Mon, 11 May 2026 21:18:16 +0800 Subject: ksmbd: fix SID memory leak in set_posix_acl_entries_dacl() on overflow Commit 299f962c0b02 ("ksmbd: use check_add_overflow() to prevent u16 DACL size overflow") added check_add_overflow() guards that break out of the ACE-building loops in set_posix_acl_entries_dacl() when the accumulated DACL size would wrap past 65535. However, each iteration allocates a struct smb_sid via kmalloc_obj() at the top of the loop and relies on the kfree(sid) call at the end of the loop body (the 'pass_same_sid' label in the first loop, and the explicit kfree at the tail of the second loop) to release it. The newly introduced 'break' statements bypass those kfree() calls, leaking the sid buffer every time an overflow is detected. A malicious or malformed file with enough POSIX ACL entries to trip the overflow check will leak one or more struct smb_sid allocations on every request that touches the file's DACL, providing a trivial kernel memory exhaustion vector. Free sid before breaking out of the loops to plug the leak. Fixes: 299f962c0b02 ("ksmbd: use check_add_overflow() to prevent u16 DACL size overflow") Cc: stable@vger.kernel.org Signed-off-by: Ferry Meng Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index c1d1f34581d6..9161e9d7ed24 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -643,8 +643,10 @@ static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, ntace = (struct smb_ace *)((char *)pndace + *size); ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, flags, pace->e_perm, 0777); - if (check_add_overflow(*size, ace_sz, size)) + if (check_add_overflow(*size, ace_sz, size)) { + kfree(sid); break; + } (*num_aces)++; if (pace->e_tag == ACL_USER) ntace->access_req |= @@ -655,8 +657,10 @@ static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap, ntace = (struct smb_ace *)((char *)pndace + *size); ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x03, pace->e_perm, 0777); - if (check_add_overflow(*size, ace_sz, size)) + if (check_add_overflow(*size, ace_sz, size)) { + kfree(sid); break; + } (*num_aces)++; if (pace->e_tag == ACL_USER) ntace->access_req |= @@ -698,8 +702,10 @@ posix_default_acl: ntace = (struct smb_ace *)((char *)pndace + *size); ace_sz = fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x0b, pace->e_perm, 0777); - if (check_add_overflow(*size, ace_sz, size)) + if (check_add_overflow(*size, ace_sz, size)) { + kfree(sid); break; + } (*num_aces)++; if (pace->e_tag == ACL_USER) ntace->access_req |= -- cgit v1.2.3 From 904901561e61a2b559070b20c74a8c95491f30aa Mon Sep 17 00:00:00 2001 From: Jeremy Laratro Date: Wed, 13 May 2026 08:23:26 +0900 Subject: ksmbd: fix null pointer dereference in proc_show_files() When a SMB2 client opens a file with a durable v2 handle and then issues SMB2 SESSION_LOGOFF, session_fd_check() clears fp->tcon = NULL on the reconnectable file pointer but leaves the fp registered in global_ft.idr until the durable scavenger fires (up to fp->durable_timeout seconds later). During that window any read of /proc/fs/ksmbd/files (mode 0400) panics the kernel because proc_show_files() walks global_ft.idr and unconditionally dereferences fp->tcon->id with no NULL guard. Reproducer requires only a successful SMB2 SESSION_SETUP and a share configured with 'durable handles = yes'. KASAN report on mainline 70390501d194: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:proc_show_files+0x118/0x740 Call Trace: proc_show_files+0x118/0x740 seq_read_iter+0x4ef/0xe10 proc_reg_read_iter+0x1b7/0x280 ... Guard the dereference. A durable-disconnected fp legitimately has no tcon; report its tree id as 0 rather than oopsing. Fixes: b38f99c1217a ("ksmbd: add procfs interface for runtime monitoring and statistics") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Laratro Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 354c4d8a1cfb..913164c958b1 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -81,7 +81,7 @@ static int proc_show_files(struct seq_file *m, void *v) read_lock(&global_ft.lock); idr_for_each_entry(global_ft.idr, fp, id) { seq_printf(m, "%#-10x %#-10llx %#-10llx %#-10x", - fp->tcon->id, + fp->tcon ? fp->tcon->id : 0, fp->persistent_id, fp->volatile_id, atomic_read(&fp->refcount)); -- cgit v1.2.3 From 4b83cbc4c15f09b000cc06f033f64b0824b6dc87 Mon Sep 17 00:00:00 2001 From: Jeremy Laratro Date: Wed, 13 May 2026 08:26:16 +0900 Subject: ksmbd: fix null pointer dereference in compare_guid_key() session_fd_check() walks the per-inode m_op_list during durable-handle session teardown and sets op->conn = NULL for every opinfo whose conn matched the closing session's connection. The matching opinfo, however, stays linked in its per-ClientGuid lease_table_list entry's lb->lease_list because destroy_lease_table() only runs on full TCP-connection teardown, not on SESSION_LOGOFF. If the same TCP connection then negotiates a fresh session with the same ClientGuid (ClientGuid is bound to NEGOTIATE, not the session, and is unchanged across LOGOFF + SETUP) and issues a SMB2 CREATE with a lease context on a different inode, find_same_lease_key() walks lb->lease_list, reaches the stale opinfo, and calls compare_guid_key(), which unconditionally dereferences opinfo->conn->ClientGUID. The conn pointer is NULL and the kernel panics. Reproducer requires only a successful SMB2 SESSION_SETUP and a share configured with 'durable handles = yes'. KASAN report on mainline 70390501d194: general protection fault, probably for non-canonical address 0xdffffc0000000069: 0000 [#1] SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000348-0x000000000000034f] Workqueue: ksmbd-io handle_ksmbd_work RIP: 0010:bcmp+0x5b/0x230 Call Trace: compare_guid_key+0x4b/0xd0 find_same_lease_key+0x324/0x690 smb2_open+0x6aea/0x8e60 handle_ksmbd_work+0x796/0xee0 ... Faulting address 0x348 is the offset of ClientGUID within struct ksmbd_conn, confirming opinfo->conn was NULL. Read opinfo->conn once and bail out if it has been cleared by a concurrent session_fd_check(). A half-detached opinfo cannot be the owner of an active lease, so returning 0 is the correct match result. Fixes: c8efcc786146 ("ksmbd: add support for durable handles v1/v2") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Laratro Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 8feca02ddbf2..0f5c18520eff 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -481,8 +481,12 @@ static inline int compare_guid_key(struct oplock_info *opinfo, const char *guid1, const char *key1) { const char *guid2, *key2; + struct ksmbd_conn *conn; - guid2 = opinfo->conn->ClientGUID; + conn = READ_ONCE(opinfo->conn); + if (!conn) + return 0; + guid2 = conn->ClientGUID; key2 = opinfo->o_lease->lease_key; if (!memcmp(guid1, guid2, SMB2_CLIENT_GUID_SIZE) && !memcmp(key1, key2, SMB2_LEASE_KEY_SIZE)) -- cgit v1.2.3 From 2b4abf879360ea00a9e2b46d2d15dcdbc0687eed Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 11 Apr 2026 17:59:15 +0300 Subject: drm/msm/adreno: fix userspace-triggered crash on a2xx-a4xx Before a5xx Adreno driver will not try fetching UBWC params (because those generations didn't support UBWC anyway), however it's still possible to query UBWC-related params from the userspace, triggering possible NULL pointer dereference. Check for UBWC config in adreno_get_param() and return sane defaults if there is none. Fixes: a452510aad53 ("drm/msm/adreno: Switch to the common UBWC config struct") Signed-off-by: Dmitry Baryshkov Reviewed-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/717778/ Message-ID: <20260411-adreno-fix-ubwc-v3-1-4983156f3f80@oss.qualcomm.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index a812a4590cc0..03f96a1154e1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -424,15 +424,21 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = vm->mm_range; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->highest_bank_bit; return 0; case MSM_PARAM_RAYTRACING: *value = adreno_gpu->has_ray_tracing; return 0; case MSM_PARAM_UBWC_SWIZZLE: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->ubwc_swizzle; return 0; case MSM_PARAM_MACROTILE_MODE: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->macrotile_mode; return 0; case MSM_PARAM_UCHE_TRAP_BASE: -- cgit v1.2.3 From 7a529ff48b99011c946e6d8addd071c06d3ccdae Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 11 Apr 2026 08:03:12 -0700 Subject: drm/msm/a6xx: Restore sysprof_active This got lost in the shuffle somehow when moving the vfunc table to catalogue. Fixes inhibiting IFPC when userspace is collecting perfcntr data. Fixes: 491fadb2b818 ("drm/msm/adreno: Move adreno_gpu_func to catalogue") Signed-off-by: Rob Clark Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/717780/ Message-ID: <20260411150312.257937-1-robin.clark@oss.qualcomm.com> --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 1e455391fbac..99712f58e792 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -2739,6 +2739,7 @@ const struct adreno_gpu_funcs a6xx_gpu_funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, @@ -2807,6 +2808,7 @@ const struct adreno_gpu_funcs a7xx_gpu_funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, -- cgit v1.2.3 From 78d79c614aaa172ae1ddaea19a3885a9ff3ba857 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 18 Apr 2026 08:08:47 -0700 Subject: drm/msm: Correct modparam description Preemption is enabled for gen8 as well. Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/719256/ Message-ID: <20260418150847.157246-1-robin.clark@oss.qualcomm.com> --- drivers/gpu/drm/msm/adreno/adreno_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 4edfe80c5be7..fc38331ce640 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -17,7 +17,7 @@ MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredum module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600); int enable_preemption = -1; -MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx only) (1=on , 0=disable, -1=auto (default))"); +MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx+ only) (1=on , 0=disable, -1=auto (default))"); module_param(enable_preemption, int, 0600); bool disable_acd; -- cgit v1.2.3 From 55e0f0d1c1a4ee1e46da7da4d443eb3044fb3851 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Tue, 21 Apr 2026 13:02:38 +0900 Subject: drm/msm: Fix iommu_map_sgtable() return value check and avoid WARN Commit "iommu: return full error code from iommu_map_sg[_atomic]()" changed iommu_map_sgtable() to return an ssize_t and negative values in error cases, rather than a size_t and a zero. Store the return value in the appropriate type and in case of error, return it rather than WARNing. Fixes: ad8f36e4b6b1 ("iommu: return full error code from iommu_map_sg[_atomic]()") Signed-off-by: Mikko Perttunen Patchwork: https://patchwork.freedesktop.org/patch/719685/ Message-ID: <20260421-iommu_map_sgtable-return-v1-3-fb484c07d2a1@nvidia.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 271baf4dc4e8..895d03b59da6 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -677,7 +677,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, int prot) { struct msm_iommu *iommu = to_msm_iommu(mmu); - size_t ret; + ssize_t ret; WARN_ON(off != 0); @@ -686,7 +686,8 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, iova |= GENMASK_ULL(63, 49); ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot); - WARN_ON(!ret); + if (ret < 0) + return ret; return (ret == len) ? 0 : -EINVAL; } -- cgit v1.2.3 From b5c7a7f452b885bfbe102bd3a057a5f496802f8b Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 28 Apr 2026 15:35:58 +0800 Subject: drm/msm/a6xx: Check kzalloc return in a8xx_hfi_send_perf_table Check the return value of kzalloc() to prevent a NULL pointer dereference on allocation failure. Fixes: 06cfbca0e1c6 ("drm/msm/a6xx: Share dependency vote table with GMU") Signed-off-by: Chen Ni Reviewed-by: Dmitry Baryshkov Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/721342/ Message-ID: <20260428073558.1234238-1-nichen@iscas.ac.cn> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index 487c2736f2b3..186a73c0b99c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -289,6 +289,8 @@ static int a8xx_hfi_send_perf_table(struct a6xx_gmu *gmu) (gmu->nr_gpu_freqs * num_gx_votes * sizeof(gmu->gx_arc_votes[0])) + (gmu->nr_gmu_freqs * num_cx_votes * sizeof(gmu->cx_arc_votes[0])); tbl = kzalloc(size, GFP_KERNEL); + if (!tbl) + return -ENOMEM; tbl->type = HFI_TABLE_GPU_PERF; /* First fill GX votes */ -- cgit v1.2.3 From 50030d63b4d3deda4cbea85bb43cfc1785267621 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 13 May 2026 17:04:48 +0200 Subject: driver core: platform: remove software node on release() If we pass a software node to a newly created device using struct platform_device_info, it will not be removed when the device is released. This may happen when a module creating the device is removed or on failure in platform_device_add(). When we try to reuse that software node in a subsequent call to platform_device_register_full(), it will fail with -EBUSY. Provide a wrapper around the existing platform_device_release() that additionally calls device_remove_software_node() and use it to replace the former if we end up adding a software node. While at it: check all three possible situations in which two software nodes for a single platform device can be created/assigned in platform_device_register_full() and bail-out early. Fixes: 0fc434bc2c45 ("driver core: platform: allow attaching software nodes when creating devices") Signed-off-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260513-swnode-remove-on-dev-unreg-v6-1-f9c58939df27@oss.qualcomm.com Signed-off-by: Danilo Krummrich --- drivers/base/platform.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 75b4698d0e58..a19dd22deef2 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -606,6 +606,12 @@ static void platform_device_release(struct device *dev) kfree(pa); } +static void platform_device_release_full(struct device *dev) +{ + device_remove_software_node(dev); + platform_device_release(dev); +} + /** * platform_device_alloc - create a platform device * @name: base name of the device we're adding @@ -848,7 +854,13 @@ struct platform_device *platform_device_register_full(const struct platform_devi int ret; struct platform_device *pdev; - if (pdevinfo->swnode && pdevinfo->properties) + /* + * Only one software node per device is allowed. Make sure we don't + * accept or create two. + */ + if ((pdevinfo->swnode && pdevinfo->properties) || + (pdevinfo->swnode && is_software_node(pdevinfo->fwnode)) || + (pdevinfo->properties && is_software_node(pdevinfo->fwnode))) return ERR_PTR(-EINVAL); pdev = platform_device_alloc(pdevinfo->name, pdevinfo->id); @@ -878,6 +890,8 @@ struct platform_device *platform_device_register_full(const struct platform_devi ret = device_add_software_node(&pdev->dev, pdevinfo->swnode); if (ret) goto err; + + pdev->dev.release = platform_device_release_full; } else if (pdevinfo->properties) { ret = device_create_managed_software_node(&pdev->dev, pdevinfo->properties, NULL); -- cgit v1.2.3 From 57cf4e8d6a57dc2ef5810f4852a23ba4c71b74bb Mon Sep 17 00:00:00 2001 From: Saurav Sachidanand Date: Thu, 7 May 2026 22:11:44 +0000 Subject: i2c: tegra: fix pm_runtime leak on mutex_lock failure If tegra_i2c_mutex_lock() fails, the function returns without calling pm_runtime_put(), leaking the runtime PM reference acquired by the preceding pm_runtime_get_sync(). This prevents the device from ever entering runtime suspend. Add the missing pm_runtime_put() before returning on lock failure. Fixes: 6077cfd716fb ("i2c: tegra: Add support for SW mutex register") Signed-off-by: Saurav Sachidanand Cc: # v7.0+ Reviewed-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260507221145.62183-2-sauravsc@amazon.com --- drivers/i2c/busses/i2c-tegra.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 9fd5ade774a0..c24b8de0a9c7 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1666,8 +1666,10 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], } ret = tegra_i2c_mutex_lock(i2c_dev); - if (ret) + if (ret) { + pm_runtime_put(i2c_dev->dev); return ret; + } for (i = 0; i < num; i++) { enum msg_end_type end_type = MSG_END_STOP; -- cgit v1.2.3 From 30792d12842901f5276f466a960962d5bfa15cc8 Mon Sep 17 00:00:00 2001 From: Saurav Sachidanand Date: Thu, 7 May 2026 22:11:45 +0000 Subject: i2c: tegra: make tegra_i2c_mutex_unlock() return void tegra_i2c_mutex_unlock() returning an error that overwrites the transfer result causes silent loss of I2C transfer errors. If the transfer failed but the unlock succeeded, the error was lost and the function incorrectly reported success. Rather than propagating the unlock error (which is not actionable by the caller - the I2C message may have been sent regardless), convert the function to return void and WARN on the unexpected condition. If the unlock fails, subsequent lock attempts will fail anyway, making the error visible on the next transfer. Fixes: 6077cfd716fb ("i2c: tegra: Add support for SW mutex register") Signed-off-by: Saurav Sachidanand Cc: # v7.0+ Reviewed-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260507221145.62183-3-sauravsc@amazon.com --- drivers/i2c/busses/i2c-tegra.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index c24b8de0a9c7..479a1667e88d 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -589,25 +589,22 @@ static int tegra_i2c_mutex_lock(struct tegra_i2c_dev *i2c_dev) return ret; } -static int tegra_i2c_mutex_unlock(struct tegra_i2c_dev *i2c_dev) +static void tegra_i2c_mutex_unlock(struct tegra_i2c_dev *i2c_dev) { unsigned int reg = i2c_dev->hw->regs->sw_mutex; u32 val, id; if (!i2c_dev->hw->has_mutex) - return 0; + return; val = readl(i2c_dev->base + reg); id = FIELD_GET(I2C_SW_MUTEX_GRANT, val); - if (id && id != I2C_SW_MUTEX_ID_CCPLEX) { - dev_warn(i2c_dev->dev, "unable to unlock mutex, mutex is owned by: %u\n", id); - return -EPERM; - } + if (WARN(id && id != I2C_SW_MUTEX_ID_CCPLEX, + "unable to unlock mutex, mutex is owned by: %u\n", id)) + return; writel(0, i2c_dev->base + reg); - - return 0; } static void tegra_i2c_mask_irq(struct tegra_i2c_dev *i2c_dev, u32 mask) @@ -1700,7 +1697,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], break; } - ret = tegra_i2c_mutex_unlock(i2c_dev); + tegra_i2c_mutex_unlock(i2c_dev); pm_runtime_put(i2c_dev->dev); return ret ?: i; -- cgit v1.2.3 From 4694efc4164123580f19467141cdcfb73f7a740a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 9 May 2026 22:04:50 +0100 Subject: FDDI: defza: Sanitise the reset safety timer The reset actions of the DEFZA adapters are exceedingly slow, taking up to 30 seconds to complete by the device spec and typically in the range of 10 seconds in reality, as required for the device RTOS to boot, still quite a lot. Therefore a state machine is used that's interrupt driven, however a safety mechanism is required in case of adapter malfunction, so that if no state change interrupt has arrived in time, then the situation is taken care of. The safety mechanism depends on the origin of the reset. For regular adapter initialisation at the device probe time a sleep is requested. However a reset is also required by the device spec when the adapter has transitioned into the halted state, such as in response to a PC Trace event in the course of ring fault recovery, possibly a common network event. In that case no sleep is possible as a device halt is reported at the hardirq level. A timer is therefore set up to ensure progress in case no adapter state change interrupt has arrived in time, but as from commit 168f6b6ffbee ("timers: Use del_timer_sync() even on UP") a warning is issued as the timer is deleted in the hardirq handler upon an expected state change: defza: v.1.1.4 Oct 6 2018 Maciej W. Rozycki tc2: DEC FDDIcontroller 700 or 700-C at 0x18000000, irq 4 tc2: resetting the board... ------------[ cut here ]------------ WARNING: kernel/time/timer.c:1611 at __timer_delete_sync+0x104/0x120, CPU#0: swapper/0/0 Modules linked in: CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 7.0.0-dirty #2 VOLUNTARY Stack : 9800000002027d08 00000000140120e0 0000000000000000 ffffffff8089d468 0000000000000000 0000000000000000 ffffffff807ed6b8 ffffffff80897458 ffffffff80897400 9800000002027b88 0000000000000000 7070617773203a6d 0000000000000000 9800000002027ba4 0000000000001000 6465746e69617420 0000000000000000 ffffffff807ed6b8 00000000140120e0 0000000000000009 000000000000064b ffffffff800dd14c 0000000000000036 9800000002184000 0000000000000000 0000000000000020 0000000000000000 ffffffff80910000 ffffffff8085c000 9800000002027c70 0000000000000001 ffffffff80045fa0 0000000000000000 0000000000000000 0000000000000000 0000000000000009 000000000000064b ffffffff800502b8 ffffffff807ed6b8 ffffffff80045fa0 ... Call Trace: [] show_stack+0x28/0xf0 [] dump_stack_lvl+0x48/0x7c [] __warn+0xa0/0x128 [] warn_slowpath_fmt+0x64/0xa4 [] __timer_delete_sync+0x104/0x120 [] fza_interrupt+0xc74/0xeb8 [] __handle_irq_event_percpu+0x70/0x228 [] handle_irq_event_percpu+0x18/0x78 [] handle_percpu_irq+0x50/0x80 [] generic_handle_irq+0x90/0xd0 [] do_IRQ+0x1c/0x30 [] handle_int+0x148/0x154 [] do_idle+0x40/0x108 [] cpu_startup_entry+0x2c/0x38 [] kernel_init+0x0/0x108 ---[ end trace 0000000000000000 ]--- tc2: OK tc2: model 700 (DEFZA-AA), MMF PMD, address 08-00-2b-xx-xx-xx tc2: ROM rev. 1.0, firmware rev. 1.2, RMC rev. A, SMT ver. 1 tc2: link unavailable ------------[ cut here ]------------ WARNING: kernel/time/timer.c:1611 at __timer_delete_sync+0x104/0x120, CPU#0: swapper/0/0 Modules linked in: CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Tainted: G W 7.0.0-dirty #2 VOLUNTARY Tainted: [W]=WARN Stack : 9800000002027d08 00000000140120e0 0000000000000000 ffffffff8089d468 0000000000000000 0000000000000000 ffffffff807ed6b8 ffffffff80897458 ffffffff80897400 9800000002027b88 0000000000000000 0000000000000000 0000000000000000 9800000002027ba4 0000000000001000 0000000000000000 0000000000000000 ffffffff807ed6b8 00000000140120e0 0000000000000009 000000000000064b ffffffff800dd14c 0000000000000036 9800000002184000 0000000000000000 0000000000000020 0000000000000000 ffffffff80910000 ffffffff8085c000 9800000002027c70 0000000000000001 ffffffff80045fa0 0000000000000000 0000000000000000 0000000000000000 0000000000000009 000000000000064b ffffffff800502b8 ffffffff807ed6b8 ffffffff80045fa0 ... Call Trace: [] show_stack+0x28/0xf0 [] dump_stack_lvl+0x48/0x7c [] __warn+0xa0/0x128 [] warn_slowpath_fmt+0x64/0xa4 [] __timer_delete_sync+0x104/0x120 [] fza_interrupt+0xc74/0xeb8 [] __handle_irq_event_percpu+0x70/0x228 [] handle_irq_event_percpu+0x18/0x78 [] handle_percpu_irq+0x50/0x80 [] generic_handle_irq+0x90/0xd0 [] do_IRQ+0x1c/0x30 [] handle_int+0x148/0x154 [] arch_local_irq_disable+0x4/0x28 [] do_idle+0x50/0x108 [] cpu_startup_entry+0x2c/0x38 [] kernel_init+0x0/0x108 ---[ end trace 0000000000000000 ]--- tc2: registered as fddi0 The immediate origin of the new warning is the switch away from aliasing del_timer_sync() to del_timer() (timer_delete_sync() to timer_delete() in terms of current function names) for UP configurations, which however is the only choice for this driver anyway as no SMP hardware supports the TURBOchannel bus this device interfaces to. Therefore there is a very remote issue only this is a sign of. Specifically if an adapter reset issued upon a transition to the halted state times out and first triggers fza_reset_timer() for another reset assertion, which then schedules fza_reset_timer() for reset deassertion and then that second call is pre-empted after poking at the hardware, but before the timer has been rearmed and owing to high system load causing exceedingly high scheduling latency control is not handed back before a transition to the uninitialised state has caused the timer to be deleted even before it has been started, then fza_reset_timer() will be called yet again and issue another reset even though by then the adapter has already recovered. Prevent this situation from happening by switching to timer_delete() for the transition to the halted state and protect the code region affected with a spinlock, also to make sure add_timer() has not been called twice in a row due to an execution race between the interrupt handler and the timer handler (though it could only happen on SMP, but let's keep the driver clean). It's a very unlikely sequence of events to happen and therefore there's no point in trying to be overly clever about it, such as by placing printk() calls outside the protection. For the transition to the uninitialised state switch to timer_delete_sync_try() instead, so that a timer isn't deleted that's just been rearmed by the timer handler and needs to watch for the device to come out of reset again (again, an SMP scenario only). Retain timer_delete_sync() invocations outside the hardirq context for a stray timer not to fire once device structures have been released. Fixes: 61414f5ec9834 ("FDDI: defza: Add support for DEC FDDIcontroller 700 TURBOchannel adapter") Signed-off-by: Maciej W. Rozycki Signed-off-by: Jakub Kicinski --- drivers/net/fddi/defza.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 064fa484f797..9bfecc87d6b2 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -984,7 +984,7 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id) case FZA_STATE_UNINITIALIZED: netif_carrier_off(dev); - timer_delete_sync(&fp->reset_timer); + timer_delete_sync_try(&fp->reset_timer); fp->ring_cmd_index = 0; fp->ring_uns_index = 0; fp->ring_rmc_tx_index = 0; @@ -1018,7 +1018,9 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id) fp->queue_active = 0; netif_stop_queue(dev); pr_debug("%s: queue stopped\n", fp->name); - timer_delete_sync(&fp->reset_timer); + + spin_lock(&fp->lock); + timer_delete(&fp->reset_timer); pr_warn("%s: halted, reason: %x\n", fp->name, FZA_STATUS_GET_HALT(status)); fza_regs_dump(fp); @@ -1027,6 +1029,8 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id) fp->timer_state = 0; fp->reset_timer.expires = jiffies + 45 * HZ; add_timer(&fp->reset_timer); + spin_unlock(&fp->lock); + break; default: @@ -1046,7 +1050,9 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id) static void fza_reset_timer(struct timer_list *t) { struct fza_private *fp = timer_container_of(fp, t, reset_timer); + unsigned long flags; + spin_lock_irqsave(&fp->lock, flags); if (!fp->timer_state) { pr_err("%s: RESET timed out!\n", fp->name); pr_info("%s: trying harder...\n", fp->name); @@ -1069,6 +1075,7 @@ static void fza_reset_timer(struct timer_list *t) fp->reset_timer.expires = jiffies + 45 * HZ; } add_timer(&fp->reset_timer); + spin_unlock_irqrestore(&fp->lock, flags); } static int fza_set_mac_address(struct net_device *dev, void *addr) -- cgit v1.2.3 From 63451de16e0a08be40f9ab5e7c5c8f5c79676fb1 Mon Sep 17 00:00:00 2001 From: Sunny Patel Date: Sat, 25 Apr 2026 19:05:27 +0530 Subject: mm/migrate_device: fix spinlock leak in migrate_vma_insert_huge_pmd_page When check_stable_address_space() fails after the PMD spinlock has been acquired via pmd_lock(), the code jumps directly to the abort label, bypassing the spin_unlock() call in unlock_abort. This causes the PMD spinlock to be permanently held, leading to a deadlock. Change the goto target from abort to unlock_abort to ensure the spinlock is always released on this error path. Link: https://lore.kernel.org/20260425133537.17463-1-nueralspacetech@gmail.com Fixes: a30b48bf1b24 ("mm/migrate_device: implement THP migration of zone device pages") Signed-off-by: Sunny Patel Reviewed-by: Andrew Morton Acked-by: Zi Yan Acked-by: Balbir Singh Acked-by: David Hildenbrand (Arm) Cc: Alistair Popple Cc: Byungchul Park Cc: Gregory Price Cc: "Huang, Ying" Cc: Joshua Hahn Cc: Matthew Brost Cc: Rakie Kim Cc: Signed-off-by: Andrew Morton --- mm/migrate_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index fbfe5715f635..ab49d4dcdb60 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -850,7 +850,7 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate, ptl = pmd_lock(vma->vm_mm, pmdp); csa_ret = check_stable_address_space(vma->vm_mm); if (csa_ret) - goto abort; + goto unlock_abort; /* * Check for userfaultfd but do not deliver the fault. Instead, -- cgit v1.2.3 From d4e7b5c4cc353f154d5ab8bb2e1ce7714d77a6e9 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 26 Apr 2026 10:36:12 -0700 Subject: mm/damon/sysfs-schemes: call missing mem_cgroup_iter_break() damon_sysfs_memcg_path_to_id() breaks mem_cgroup_iter() loop without calling mem_cgroup_iter_break(). This leaks the cgroup reference. Fix the issue by calling mem_cgroup_iter_break() before the break. The issue was discovered [1] by Sashiko. Link: https://lore.kernel.org/20260426173625.86521-1-sj@kernel.org Link: https://lore.kernel.org/20260423004148.74722-1-sj@kernel.org [1] Fixes: 29cbb9a13f05 ("mm/damon/sysfs-schemes: implement scheme filters") Signed-off-by: SeongJae Park Cc: # 6.3.x Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 245d63808411..04746cbb3327 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2594,6 +2594,7 @@ static int damon_sysfs_memcg_path_to_id(char *memcg_path, u64 *id) if (damon_sysfs_memcg_path_eq(memcg, path, memcg_path)) { *id = mem_cgroup_id(memcg); found = true; + mem_cgroup_iter_break(NULL, memcg); break; } } -- cgit v1.2.3 From 620072fd783290ad92c2d445a47b0a61b161f352 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 26 Apr 2026 12:31:17 -0700 Subject: mm/damon: fix damos_stat tracepoint format for sz_applied The print format is wrongly marking sz_applied as sz_tried. Fix it. Link: https://lore.kernel.org/20260426193119.88095-1-sj@kernel.org Fixes: 804c26b961da ("mm/damon/core: add trace point for damos stat per apply interval") Signed-off-by: SeongJae Park Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: # 7.0.x Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 24fc402ab3c8..7e25f4469b81 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -41,7 +41,7 @@ TRACE_EVENT(damos_stat_after_apply_interval, ), TP_printk("ctx_idx=%u scheme_idx=%u nr_tried=%lu sz_tried=%lu " - "nr_applied=%lu sz_tried=%lu sz_ops_filter_passed=%lu " + "nr_applied=%lu sz_applied=%lu sz_ops_filter_passed=%lu " "qt_exceeds=%lu nr_snapshots=%lu", __entry->context_idx, __entry->scheme_idx, __entry->nr_tried, __entry->sz_tried, -- cgit v1.2.3 From c416aee7e7d04fec2d2d30786b3c8393108b85d2 Mon Sep 17 00:00:00 2001 From: Illia Ostapyshyn Date: Mon, 27 Apr 2026 16:24:47 +0200 Subject: scripts/gdb: mm: cast untyped symbols in x86_page_ops The symbols phys_base, _text, and _end, used in x86_page_ops are either defined in assembly or implicitly by the linker. Thus, they lack type information and cause a conversion error after gdb.parse_and_eval. Explicitly cast these expressions to unsigned long. Link: https://lore.kernel.org/20260427142448.666117-2-illia@yshyn.com Fixes: 55f8b4518d14 ("scripts/gdb: implement x86_page_ops in mm.py") Signed-off-by: Illia Ostapyshyn Cc: Florian Fainelli Cc: Jan Kiszka Cc: Kieran Bingham Cc: Vlastimil Babka Cc: Hao Li Cc: Harry Yoo Cc: Seongjun Hong Cc: Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index d78908f6664d..dffadccbb01d 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -40,11 +40,11 @@ class x86_page_ops(): self.PAGE_OFFSET = int(gdb.parse_and_eval("page_offset_base")) self.VMEMMAP_START = int(gdb.parse_and_eval("vmemmap_base")) - self.PHYS_BASE = int(gdb.parse_and_eval("phys_base")) + self.PHYS_BASE = int(gdb.parse_and_eval("(unsigned long) phys_base")) self.START_KERNEL_map = 0xffffffff80000000 - self.KERNEL_START = gdb.parse_and_eval("_text") - self.KERNEL_END = gdb.parse_and_eval("_end") + self.KERNEL_START = gdb.parse_and_eval("(unsigned long) &_text") + self.KERNEL_END = gdb.parse_and_eval("(unsigned long) &_end") self.VMALLOC_START = int(gdb.parse_and_eval("vmalloc_base")) if self.VMALLOC_START == 0xffffc90000000000: -- cgit v1.2.3 From 228e25e33325865ebe589da5366449a8ecf7d0da Mon Sep 17 00:00:00 2001 From: Illia Ostapyshyn Date: Mon, 27 Apr 2026 16:24:48 +0200 Subject: scripts/gdb: slab: update field names of struct kmem_cache The commit 5ba6bc27b1f9 ("slab: decouple pointer to barn from kmem_cache_node") reorganized the struct kmem_cache to factor out the per-node fields to the new struct kmem_cache_per_node_ptrs. This causes the gdb scripts for lx-slabinfo and lx-slabtrace fail as they still reference the old structure. Adjust the gdb scripts to match the current state of struct kmem_cache. Link: https://lore.kernel.org/20260427142448.666117-3-illia@yshyn.com Fixes: 5ba6bc27b1f9 ("slab: decouple pointer to barn from kmem_cache_node") Signed-off-by: Illia Ostapyshyn Acked-by: Harry Yoo (Oracle) Acked-by: Vlastimil Babka (SUSE) Cc: Florian Fainelli Cc: Hao Li Cc: Jan Kiszka Cc: Kieran Bingham Cc: Seongjun Hong Signed-off-by: Andrew Morton --- scripts/gdb/linux/slab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/slab.py b/scripts/gdb/linux/slab.py index 0e2d93867fe2..ddde25aeca8d 100644 --- a/scripts/gdb/linux/slab.py +++ b/scripts/gdb/linux/slab.py @@ -196,7 +196,7 @@ def slabtrace(alloc, cache_name): if target_cache['flags'] & SLAB_STORE_USER: for i in range(0, nr_node_ids): - cache_node = target_cache['node'][i] + cache_node = target_cache['per_node']['node'][i] if cache_node['nr_slabs']['counter'] == 0: continue process_slab(loc_track, cache_node['partial'], alloc, target_cache) @@ -300,7 +300,7 @@ def slabinfo(): nr_free = 0 nr_slabs = 0 for i in range(0, nr_node_ids): - cache_node = cache['node'][i] + cache_node = cache['per_node']['node'][i] try: nr_slabs += cache_node['nr_slabs']['counter'] nr_objs = int(cache_node['total_objects']['counter']) -- cgit v1.2.3 From 3432cbb291aabf85f8af4b9d1ec37179168ff999 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 27 Apr 2026 12:03:51 -0400 Subject: selftests/mm: run_vmtests.sh: fix destructive tests invocation Destructive tests should be invoked with -d command-line option, but this won't work today since 'd' is missing in getopts command-line. This commit fixes it. Link: https://lore.kernel.org/214fd9e4-5398-4c26-859e-c982c2e277c3@redhat.com Fixes: f16ff3b692ad ("selftests/mm: run_vmtests.sh: add missing tests") Signed-off-by: Luiz Capitulino Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index d8468451b3a3..c17b133a81d2 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -103,7 +103,7 @@ RUN_ALL=false RUN_DESTRUCTIVE=false TAP_PREFIX="# " -while getopts "aht:n" OPT; do +while getopts "aht:nd" OPT; do case ${OPT} in "a") RUN_ALL=true ;; "h") usage ;; -- cgit v1.2.3 From 77dcdff56d0b52947f110e9e43a1fc846ee8d94a Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 28 Apr 2026 15:48:32 +0300 Subject: MAINTAINERS: add tree for KDUMP and KEXEC Patch series "MAINTAINERS: update KEXEC, KDUMP and LIVE UPDATE". KHO and LiveUpdate team is going to pick kdump and kexec patches to their tree at https://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git Update MAINTAINERS to reflect this change and add kexec@ list to LIVE UPDATE entry. This patch (of 2): KHO and LiveUpdate team is going to pick kdump and kexec patches to their tree at https://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git Update MAINTAINERS to reflect it. Link: https://lore.kernel.org/20260428124833.1903302-1-rppt@kernel.org Link: https://lore.kernel.org/20260428124833.1903302-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Pasha Tatashin Acked-by: Baoquan He Acked-by: Pratyush Yadav Cc: Mike Rapoport Cc: Dave Young Cc: Eric W. Biederman Signed-off-by: Andrew Morton --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b2040011a386..44833dc85827 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13863,6 +13863,7 @@ M: Pratyush Yadav R: Dave Young L: kexec@lists.infradead.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git F: Documentation/admin-guide/kdump/ F: fs/proc/vmcore.c F: include/linux/crash_core.h @@ -14179,6 +14180,7 @@ M: Pasha Tatashin M: Pratyush Yadav L: kexec@lists.infradead.org W: http://kernel.org/pub/linux/utils/kernel/kexec/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git F: include/linux/kexec.h F: include/uapi/linux/kexec.h F: kernel/kexec* -- cgit v1.2.3 From ec9f2ee9a4046b2d5e5a6b6fa6a2ed1542250e73 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 28 Apr 2026 15:48:33 +0300 Subject: MAINTAINERS: add kexec@ list to LIVE UPDATE ENTRY Link: https://lore.kernel.org/20260428124833.1903302-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Pasha Tatashin Acked-by: Baoquan He Cc: Dave Young Cc: Eric W. Biederman Cc: Pratyush Yadav Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 44833dc85827..3a1cc86e2c0a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14897,6 +14897,7 @@ LIVE UPDATE M: Pasha Tatashin M: Mike Rapoport M: Pratyush Yadav +L: kexec@lists.infradead.org L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/liveupdate/linux.git -- cgit v1.2.3 From 6a288a4ddb4a994490505ab5f41c445f8e6b6467 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Arm)" Date: Tue, 21 Apr 2026 17:39:07 +0200 Subject: mm/page_alloc: fix initialization of tags of the huge zero folio with init_on_free __GFP_ZEROTAGS semantics are currently a bit weird, but effectively this flag is only ever set alongside __GFP_ZERO and __GFP_SKIP_KASAN. If we run with init_on_free, we will zero out pages during __free_pages_prepare(), to skip zeroing on the allocation path. However, when allocating with __GFP_ZEROTAG set, post_alloc_hook() will consequently not only skip clearing page content, but also skip clearing tag memory. Not clearing tags through __GFP_ZEROTAGS is irrelevant for most pages that will get mapped to user space through set_pte_at() later: set_pte_at() and friends will detect that the tags have not been initialized yet (PG_mte_tagged not set), and initialize them. However, for the huge zero folio, which will be mapped through a PMD marked as special, this initialization will not be performed, ending up exposing whatever tags were still set for the pages. The docs (Documentation/arch/arm64/memory-tagging-extension.rst) state that allocation tags are set to 0 when a page is first mapped to user space. That no longer holds with the huge zero folio when init_on_free is enabled. Fix it by decoupling __GFP_ZEROTAGS from __GFP_ZERO, passing to tag_clear_highpages() whether we want to also clear page content. Invert the meaning of the tag_clear_highpages() return value to have clearer semantics. Reproduced with the huge zero folio by modifying the check_buffer_fill arm64/mte selftest to use a 2 MiB area, after making sure that pages have a non-0 tag set when freeing (note that, during boot, we will not actually initialize tags, but only set KASAN_TAG_KERNEL in the page flags). $ ./check_buffer_fill 1..20 ... not ok 17 Check initial tags with private mapping, sync error mode and mmap memory not ok 18 Check initial tags with private mapping, sync error mode and mmap/mprotect memory ... This code needs more cleanups; we'll tackle that next, like decoupling __GFP_ZEROTAGS from __GFP_SKIP_KASAN. [akpm@linux-foundation.org: s/__GPF_ZERO/__GFP_ZERO/, per David] Link: https://lore.kernel.org/20260421-zerotags-v2-1-05cb1035482e@kernel.org Fixes: adfb6609c680 ("mm/huge_memory: initialise the tags of the huge zero folio") Signed-off-by: David Hildenbrand (Arm) Reviewed-by: Catalin Marinas Tested-by: Lance Yang Cc: Brendan Jackman Cc: Dev Jain Cc: Johannes Weiner Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Mark Brown Cc: Michal Hocko Cc: Mike Rapoport Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Will Deacon Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- arch/arm64/include/asm/page.h | 2 +- arch/arm64/mm/fault.c | 11 +++++++---- include/linux/gfp_types.h | 10 +++++----- include/linux/highmem.h | 7 ++++--- mm/page_alloc.c | 8 ++++---- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index e25d0d18f6d7..58200de8a221 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,7 +33,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr); #define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio -bool tag_clear_highpages(struct page *to, int numpages); +bool tag_clear_highpages(struct page *to, int numpages, bool clear_pages); #define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 0f3c5c7ca054..739800835920 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -1018,7 +1018,7 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, return vma_alloc_folio(flags, 0, vma, vaddr); } -bool tag_clear_highpages(struct page *page, int numpages) +bool tag_clear_highpages(struct page *page, int numpages, bool clear_pages) { /* * Check if MTE is supported and fall back to clear_highpage(). @@ -1026,13 +1026,16 @@ bool tag_clear_highpages(struct page *page, int numpages) * post_alloc_hook() will invoke tag_clear_highpages(). */ if (!system_supports_mte()) - return false; + return clear_pages; /* Newly allocated pages, shouldn't have been tagged yet */ for (int i = 0; i < numpages; i++, page++) { WARN_ON_ONCE(!try_page_mte_tagging(page)); - mte_zero_clear_page_tags(page_address(page)); + if (clear_pages) + mte_zero_clear_page_tags(page_address(page)); + else + mte_clear_page_tags(page_address(page)); set_page_mte_tagged(page); } - return true; + return false; } diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 6c75df30a281..cd4972a7c97c 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -273,11 +273,11 @@ enum { * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself - * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that - * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting - * memory tags at the same time as zeroing memory has minimal additional - * performance impact. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time. Setting memory tags at + * the same time as zeroing memory (e.g., with __GFP_ZERO) has minimal + * additional performance impact. However, __GFP_ZEROTAGS also zeroes the tags + * even if memory is not getting zeroed at allocation time (e.g., + * with init_on_free). * * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. * Used for userspace and vmalloc pages; the latter are unpoisoned by diff --git a/include/linux/highmem.h b/include/linux/highmem.h index af03db851a1d..d7aac9de1c8a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -347,10 +347,11 @@ static inline void clear_highpage_kasan_tagged(struct page *page) #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES -/* Return false to let people know we did not initialize the pages */ -static inline bool tag_clear_highpages(struct page *page, int numpages) +/* Returns true if the caller has to initialize the pages */ +static inline bool tag_clear_highpages(struct page *page, int numpages, + bool clear_pages) { - return false; + return clear_pages; } #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 227d58dc3de6..23c7298d3be2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1808,9 +1808,9 @@ static inline bool should_skip_init(gfp_t flags) inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { + const bool zero_tags = gfp_flags & __GFP_ZEROTAGS; bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); - bool zero_tags = init && (gfp_flags & __GFP_ZEROTAGS); int i; set_page_private(page, 0); @@ -1832,11 +1832,11 @@ inline void post_alloc_hook(struct page *page, unsigned int order, */ /* - * If memory tags should be zeroed - * (which happens only when memory should be initialized as well). + * Clearing tags can efficiently clear the memory for us as well, if + * required. */ if (zero_tags) - init = !tag_clear_highpages(page, 1 << order); + init = tag_clear_highpages(page, 1 << order, /* clear_pages= */init); if (!should_skip_kasan_unpoison(gfp_flags) && kasan_unpoison_pages(page, order, init)) { -- cgit v1.2.3 From efdadbc180e53fe257a6e85f6bc706cb58088653 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Tue, 21 Apr 2026 09:07:07 +0200 Subject: lib: kunit_iov_iter: fix test fail on powerpc Increase buffer size to accommodate machines with 64K PAGE_SIZE. Link: https://lore.kernel.org/20260421070707.992873-1-lk@c--e.de Fixes: 0913b7554726 ("lib: kunit_iov_iter: add tests for extract_iter_to_sg") Signed-off-by: Christian A. Ehrhardt Reported-by: David Gow Closes: https://lore.kernel.org/34a81ec2-af84-465d-9b5e-7bb5bf01680f@davidgow.net Tested-by: David Gow Tested-by: Josh Law Reviewed-by: Josh Law Signed-off-by: Andrew Morton --- lib/tests/kunit_iov_iter.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/tests/kunit_iov_iter.c b/lib/tests/kunit_iov_iter.c index 37bd6eb25896..f02f7b7aa796 100644 --- a/lib/tests/kunit_iov_iter.c +++ b/lib/tests/kunit_iov_iter.c @@ -1128,7 +1128,7 @@ static void __init iov_kunit_iter_to_sg_kvec(struct kunit *test) struct kvec kvec; size_t bufsize; - bufsize = 0x100000; + bufsize = 0x200000; iov_kunit_iter_to_sg_init(test, bufsize, false, &data); kvec.iov_base = data.buffer; @@ -1146,7 +1146,7 @@ static void __init iov_kunit_iter_to_sg_bvec(struct kunit *test) struct bio_vec *bvec; struct iov_iter iter; - bufsize = 0x100000; + bufsize = 0x200000; iov_kunit_iter_to_sg_init(test, bufsize, false, &data); bvec = kunit_kmalloc_array(test, data.npages, sizeof(*bvec), @@ -1173,7 +1173,7 @@ static void __init iov_kunit_iter_to_sg_folioq(struct kunit *test) struct iov_iter iter; size_t bufsize; - bufsize = 0x100000; + bufsize = 0x200000; iov_kunit_iter_to_sg_init(test, bufsize, false, &data); folioq = iov_kunit_create_folioq(test); @@ -1190,7 +1190,7 @@ static void __init iov_kunit_iter_to_sg_xarray(struct kunit *test) struct iov_iter iter; size_t bufsize; - bufsize = 0x100000; + bufsize = 0x200000; iov_kunit_iter_to_sg_init(test, bufsize, false, &data); xarray = iov_kunit_create_xarray(test); @@ -1206,7 +1206,7 @@ static void __init iov_kunit_iter_to_sg_ubuf(struct kunit *test) struct iov_iter iter; size_t bufsize; - bufsize = 0x100000; + bufsize = 0x200000; iov_kunit_iter_to_sg_init(test, bufsize, true, &data); iov_iter_ubuf(&iter, READ, data.ubuf, bufsize); -- cgit v1.2.3 From 93866f55f7e292fe3d47d36c9efe5ee10213a06b Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Apr 2026 16:52:17 +0800 Subject: mm/memory_hotplug: fix memory block reference leak on remove Patch series "mm: Fix memory block leaks and locking", v2. This series fixes two memory block device reference leaks and one locking issue around the per-memory_block hwpoison counter. This patch (of 2): remove_memory_blocks_and_altmaps() looks up each memory block with find_memory_block(), which acquires a reference to the memory block device. That reference is never dropped on this path, resulting in a leaked device reference when removing memory blocks and their altmaps. Drop the reference after retrieving mem->altmap and clearing mem->altmap, before removing the memory block device. Link: https://lore.kernel.org/20260428085219.1316047-1-songmuchun@bytedance.com Link: https://lore.kernel.org/20260428085219.1316047-2-songmuchun@bytedance.com Fixes: 6b8f0798b85a ("mm/memory_hotplug: split memmap_on_memory requests across memblocks") Signed-off-by: Muchun Song Acked-by: Oscar Salvador Acked-by: David Hildenbrand (Arm) Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: "Huang, Ying" Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: "Rafael J. Wysocki" Cc: Vishal Verma Cc: Signed-off-by: Andrew Morton --- mm/memory_hotplug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2a943ec57c85..40c7915dabe0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1422,6 +1422,8 @@ static void remove_memory_blocks_and_altmaps(u64 start, u64 size) altmap = mem->altmap; mem->altmap = NULL; + /* drop the ref. we got via find_memory_block() */ + put_device(&mem->dev); remove_memory_block_devices(cur_start, memblock_size); -- cgit v1.2.3 From 03a2cc1756a0570f887d624cd6c535ea0cbd4951 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Apr 2026 16:52:18 +0800 Subject: drivers/base/memory: fix memory block reference leak in poison accounting memblk_nr_poison_inc() and memblk_nr_poison_sub() look up a memory block via find_memory_block_by_id(), which acquires a reference to the memory block device. Both helpers use the returned memory block without dropping that reference, leaking the device reference on each successful lookup. Drop the reference after updating nr_hwpoison. Link: https://lore.kernel.org/20260428085219.1316047-3-songmuchun@bytedance.com Fixes: 5033091de814 ("mm/hwpoison: introduce per-memory_block hwpoison counter") Signed-off-by: Muchun Song Reviewed-by: Miaohe Lin Acked-by: Oscar Salvador Acked-by: David Hildenbrand (Arm) Cc: Danilo Krummrich Cc: Greg Kroah-Hartman Cc: "Huang, Ying" Cc: Naoya Horiguchi Cc: "Rafael J. Wysocki" Cc: Vishal Verma Cc: Signed-off-by: Andrew Morton --- drivers/base/memory.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f806a683b767..6981b55d582a 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -1230,8 +1230,10 @@ void memblk_nr_poison_inc(unsigned long pfn) const unsigned long block_id = pfn_to_block_id(pfn); struct memory_block *mem = find_memory_block_by_id(block_id); - if (mem) + if (mem) { atomic_long_inc(&mem->nr_hwpoison); + put_device(&mem->dev); + } } void memblk_nr_poison_sub(unsigned long pfn, long i) @@ -1239,8 +1241,10 @@ void memblk_nr_poison_sub(unsigned long pfn, long i) const unsigned long block_id = pfn_to_block_id(pfn); struct memory_block *mem = find_memory_block_by_id(block_id); - if (mem) + if (mem) { atomic_long_sub(i, &mem->nr_hwpoison); + put_device(&mem->dev); + } } static unsigned long memblk_nr_poison(struct memory_block *mem) -- cgit v1.2.3 From c0c6ccd9828c3a1950623b546fa57292a77b5c73 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Arm)" Date: Thu, 30 Apr 2026 13:31:22 +0200 Subject: mm: fix __vm_normal_page() to handle missing support for pmd_special()/pud_special() On x86 32-bit with THP enabled, zap_huge_pmd() is seen to generate a "WARNING: mm/memory.c:735 at __vm_normal_page+0x6a/0x7d", from the VM_WARN_ON_ONCE(is_zero_pfn(pfn) || is_huge_zero_pfn(pfn)); followed by "BUG: Bad rss-counter state"s, then later "BUG: Bad page state"s when reclaim gets to call shrink_huge_zero_folio_scan(). It's as if the _PAGE_SPECIAL bit never got set in the huge_zero pmd: and indeed, whereas pte_special() and pte_mkspecial() are subject to a dedicated CONFIG_ARCH_HAS_PTE_SPECIAL, pmd_special() and pmd_mkspecial() are subject to CONFIG_ARCH_SUPPORTS_PMD_PFNMAP, which is never enabled on any 32-bit architecture. While the problem was exposed through commit d80a9cb1a64a ("mm/huge_memory: add and use normal_or_softleaf_folio_pmd()"), it was an oversight in commit af38538801c6 ("mm/memory: factor out common code from vm_normal_page_*()") and would result in other problems: * huge zero folio accounted in smaps, pagemap (PAGE_IS_FILE) and numamaps as file-backed THP * folio_walk_start() returning the folio even without FW_ZEROPAGE set. Callers seem to tolerate that, though. ... and triggering the VM_WARN_ON_ONE(), although never reported so far. To fix it, teach vm_normal_page_pmd()/vm_normal_page_pud() to consider whether pmd_special/pud_special is actually implemented. Link: https://lore.kernel.org/20260430-pmd_special-v1-1-dbcbcfd72c20@kernel.org Fixes: af38538801c6 ("mm/memory: factor out common code from vm_normal_page_*()") Signed-off-by: David Hildenbrand (Arm) Reported-by: Hugh Dickins Closes: https://lore.kernel.org/r/74a75b59-2e13-3985-ee99-d5521f39df2a@google.com Reported-by: Bibo Mao Closes: https://lore.kernel.org/r/20260430041121.2839350-1-maobibo@loongson.cn Debugged-by: Hugh Dickins Reviewed-by: Lance Yang Tested-by: Bibo Mao Reviewed-by: Baolin Wang Reviewed-by: Oscar Salvador Reviewed-by: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index ea6568571131..c51ad671b95f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -612,6 +612,21 @@ static void print_bad_page_map(struct vm_area_struct *vma, dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } + +static inline bool pgtable_level_has_pxx_special(enum pgtable_level level) +{ + switch (level) { + case PGTABLE_LEVEL_PTE: + return IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL); + case PGTABLE_LEVEL_PMD: + return IS_ENABLED(CONFIG_ARCH_SUPPORTS_PMD_PFNMAP); + case PGTABLE_LEVEL_PUD: + return IS_ENABLED(CONFIG_ARCH_SUPPORTS_PUD_PFNMAP); + default: + return false; + } +} + #define print_bad_pte(vma, addr, pte, page) \ print_bad_page_map(vma, addr, pte_val(pte), page, PGTABLE_LEVEL_PTE) @@ -684,7 +699,7 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, bool special, unsigned long long entry, enum pgtable_level level) { - if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { + if (pgtable_level_has_pxx_special(level)) { if (unlikely(special)) { #ifdef CONFIG_FIND_NORMAL_PAGE if (vma->vm_ops && vma->vm_ops->find_normal_page) @@ -699,8 +714,9 @@ static inline struct page *__vm_normal_page(struct vm_area_struct *vma, return NULL; } /* - * With CONFIG_ARCH_HAS_PTE_SPECIAL, any special page table - * mappings (incl. shared zero folios) are marked accordingly. + * With working pte_special()/pmd_special()..., any special page + * table mappings (incl. shared zero folios) are marked + * accordingly. */ } else { if (unlikely(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) { -- cgit v1.2.3 From be3f38d05cc5a7c3f13e51994c5dd043ab604d28 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 1 May 2026 16:51:16 +1000 Subject: mm/memory: fix spurious warning when unmapping device-private/exclusive pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Device private and exclusive entries are only supported for anonymous folios. This condition is tested in __migrate_device_pages() and make_device_exclusive() using folio_test_anon(). However the unmap path tests this assumption using vma_is_anonymous(). This is wrong because whilst anonymous VMAs can only contain folios where folio_test_anon() is true the opposite relation does not hold. A folio for which folio_test_anon() is true does not imply vma_is_anonymous() is true. Such a condition can occur if for example a folio is part of a private filebacked mapping. In this case vma_is_anonymous() is false as the mapping is filebacked, but folio_test_anon() may be true, thus permitting devices to migrate the folio to device private memory. This can lead to the following spurious warnings during process teardown: [ 772.737706] ------------[ cut here ]------------ [ 772.739201] WARNING: mm/memory.c:1754 at unmap_page_range.cold+0x26/0x18a, CPU#17: hmm-tests/2041 [ 772.742050] Modules linked in: test_hmm nvidia_uvm(O) nvidia(O) [ 772.743959] CPU: 17 UID: 0 PID: 2041 Comm: hmm-tests Tainted: G W O 7.0.0+ #387 PREEMPT(full) [ 772.747104] Tainted: [W]=WARN, [O]=OOT_MODULE [ 772.748509] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014 [ 772.752117] RIP: 0010:unmap_page_range.cold+0x26/0x18a [ 772.753780] Code: 7e fe ff ff 48 89 4c 24 78 4c 89 44 24 38 e8 f2 ff b1 00 48 8b 4c 24 78 4c 8b 44 24 38 48 8b 44 24 18 48 83 78 48 00 74 04 90 <0f> 0b 90 48 89 ca b8 ff ff 37 00 48 c1 ea 03 48 c1 e0 2a 80 3c 02 [ 772.759602] RSP: 0018:ffff888112607550 EFLAGS: 00010286 [ 772.761310] RAX: ffff88811bbf4dc0 RBX: dffffc0000000000 RCX: ffffea03e9bfffd8 [ 772.763583] RDX: 1ffff1102377e9c1 RSI: 0000000000000008 RDI: ffff88811bbf4e08 [ 772.765914] RBP: 0000000000000006 R08: ffff8881059f7448 R09: ffffed10224c0e68 [ 772.768184] R10: ffff888112607347 R11: 0000000000000001 R12: 0000000000000001 [ 772.770461] R13: ffffea03e9bfffc0 R14: ffff888112607908 R15: ffffea03e9bfffc0 [ 772.772782] FS: 00007f327caa2780(0000) GS:ffff888427b7d000(0000) knlGS:0000000000000000 [ 772.775328] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 772.777187] CR2: 00007f327ca89000 CR3: 00000001994d5000 CR4: 00000000000006f0 [ 772.779135] Call Trace: [ 772.779792] [ 772.780317] ? dmirror_interval_invalidate+0x1a3/0x290 [test_hmm] [ 772.781873] ? vm_normal_page_pud+0x2b0/0x2b0 [ 772.782992] ? __rwlock_init+0x150/0x150 [ 772.784006] ? lock_release+0x216/0x2b0 [ 772.785008] ? __mmu_notifier_invalidate_range_start+0x505/0x6e0 [ 772.786522] ? lock_release+0x216/0x2b0 [ 772.787498] ? unmap_single_vma+0xb6/0x210 [ 772.788573] unmap_vmas+0x27d/0x520 [ 772.789506] ? unmap_single_vma+0x210/0x210 [ 772.790607] ? mas_update_gap.part.0+0x620/0x620 [ 772.791834] unmap_region+0x19e/0x350 [ 772.792769] ? remove_vma+0x130/0x130 [ 772.793684] ? mas_alloc_nodes+0x1f2/0x300 [ 772.794730] vms_complete_munmap_vmas+0x8c1/0xe20 [ 772.795926] ? unmap_region+0x350/0x350 [ 772.796917] do_vmi_align_munmap+0x36a/0x4e0 [ 772.798018] ? lock_release+0x216/0x2b0 [ 772.799024] ? vma_shrink+0x620/0x620 [ 772.799983] do_vmi_munmap+0x150/0x2c0 [ 772.800939] __vm_munmap+0x161/0x2c0 [ 772.801872] ? expand_downwards+0xd60/0xd60 [ 772.802948] ? clockevents_program_event+0x1ef/0x540 [ 772.804217] ? lock_release+0x216/0x2b0 [ 772.805158] __x64_sys_munmap+0x59/0x80 [ 772.805776] do_syscall_64+0xfc/0x670 [ 772.806336] ? irqentry_exit+0xda/0x580 [ 772.806976] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 772.807772] RIP: 0033:0x7f327cbb2717 [ 772.808323] Code: 73 01 c3 48 8b 0d f9 76 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c9 76 0d 00 f7 d8 64 89 01 48 [ 772.811337] RSP: 002b:00007ffde7f57d38 EFLAGS: 00000202 ORIG_RAX: 000000000000000b [ 772.812564] RAX: ffffffffffffffda RBX: 00007f327cc9c000 RCX: 00007f327cbb2717 [ 772.813733] RDX: 0000000000000000 RSI: 0000000000400000 RDI: 00007f327c289000 [ 772.814867] RBP: 0000000000421360 R08: 000000000000001a R09: 0000000000000000 [ 772.815991] R10: 0000000000000003 R11: 0000000000000202 R12: 00007ffde7f57d74 [ 772.817121] R13: 00007f327c689010 R14: 0000000000100000 R15: 00007f327c289000 [ 772.818272] [ 772.818614] irq event stamp: 0 [ 772.819159] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [ 772.820174] hardirqs last disabled at (0): [] copy_process+0x19f3/0x6440 [ 772.821511] softirqs last enabled at (0): [] copy_process+0x1a40/0x6440 [ 772.822869] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 772.823871] ---[ end trace 0000000000000000 ]--- Fix this by using the same check for folio_test_anon() in zap_nonpresent_ptes(). Also add a hmm-test case for this. Link: https://lore.kernel.org/20260501065116.2057242-1-apopple@nvidia.com Fixes: 999dad824c39 ("mm/shmem: persist uffd-wp bit across zapping for file-backed") Signed-off-by: Alistair Popple Reported-by: Arsen Arsenović Reviewed-by: Balbir Singh Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: John Hubbard Cc: Leon Romanovsky Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Peter Xu Cc: Matthew Brost Cc: Michal Hocko Cc: Mike Rapoport Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Thomas Hellström Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 2 +- tools/testing/selftests/mm/hmm-tests.c | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index c51ad671b95f..86a973119bd4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1755,7 +1755,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, * consider uffd-wp bit when zap. For more information, * see zap_install_uffd_wp_if_needed(). */ - WARN_ON_ONCE(!vma_is_anonymous(vma)); + WARN_ON_ONCE(!folio_test_anon(folio)); rss[mm_counter(folio)]--; folio_remove_rmap_pte(folio, page, vma); folio_put(folio); diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 788689497e92..77fb4c5d871b 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -985,6 +985,56 @@ TEST_F(hmm, migrate) hmm_buffer_free(buffer); } +/* + * Migrate private file memory to device private memory. + */ +TEST_F(hmm, migrate_file_private) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + /* * Migrate anonymous memory to device private memory and fault some of it back * to system memory, then try migrating the resulting mix of system and device -- cgit v1.2.3 From 5a30862dec5a70da0a9d259de3f87a7542cc95b2 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Tue, 12 May 2026 11:03:53 -0300 Subject: ASoC: sdw_utils: Check speaker component string allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_kasprintf() can fail while building the temporary speaker component string. If that happens, spk_components is set to NULL, but the current code can still pass it to strlen() on a later loop iteration or after the loop when appending the speaker component list to card->components. Use NULL to represent the initial "no speaker components" state, and return -ENOMEM immediately if building spk_components fails. Fixes: 0f60ecffbfe3 ("ASoC: sdw_utils: generate combined spk components string") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260512-asoc-sdw-utils-spk-components-alloc-v1-1-c9bbd6d2e123@gmail.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index be45a2e62d3e..e440c2327100 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -1114,7 +1114,7 @@ int asoc_sdw_rtd_init(struct snd_soc_pcm_runtime *rtd) struct asoc_sdw_codec_info *codec_info; struct snd_soc_dai *dai; struct sdw_slave *sdw_peripheral; - const char *spk_components=""; + const char *spk_components = NULL; int dai_index; int ret; int i; @@ -1197,7 +1197,7 @@ skip_add_controls_widgets: else component = codec_info->dais[dai_index].component_name; - if (strlen (spk_components) == 0) + if (!spk_components) spk_components = devm_kasprintf(card->dev, GFP_KERNEL, "%s", component); else @@ -1205,13 +1205,15 @@ skip_add_controls_widgets: spk_components = devm_kasprintf(card->dev, GFP_KERNEL, "%s+%s", spk_components, component); + + if (!spk_components) + return -ENOMEM; } codec_info->dais[dai_index].rtd_init_done = true; - } - if (strlen (spk_components) > 0) { + if (spk_components) { /* Update card components for speaker components */ card->components = devm_kasprintf(card->dev, GFP_KERNEL, "%s spk:%s", card->components, spk_components); -- cgit v1.2.3 From 320fb29ea23cfa1aeef32563da8748247db896ea Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 11 May 2026 14:30:57 -0400 Subject: net/sched: sch_cbs: Call qdisc_reset for child qdisc During a reset, CBS is not calling reset on its child qdisc, which might cause qlen/backlog accounting issues. For example, if we have CBS with a QFQ parent and a netem child with delay, we can create a scenario where the parent's qlen underflows. QFQ, specifically, uses qlen to check whether it should deference a pointer, so this scenario may cause a null-ptr deref in QFQ: [ 43.875639][ T319] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000009: 0000 [#1] SMP KASAN NOPTI [ 43.876124][ T319] KASAN: null-ptr-deref in range [0x0000000000000048-0x000000000000004f] [ 43.876417][ T319] CPU: 10 UID: 0 PID: 319 Comm: ping Not tainted 7.0.0-13039-ge728258debd5 #773 PREEMPT(full) [ 43.876751][ T319] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 43.876949][ T319] RIP: 0010:qfq_dequeue+0x35c/0x1650 [ 43.877123][ T319] Code: 00 fc ff df 80 3c 02 00 0f 85 17 0e 00 00 4c 8d 73 48 48 89 9d b8 02 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <80> 3c 02 00 0f 85 76 0c 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b [ 43.877648][ T319] RSP: 0018:ffff8881017ef4f0 EFLAGS: 00010216 [ 43.877845][ T319] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: dffffc0000000000 [ 43.878073][ T319] RDX: 0000000000000009 RSI: 0000000c40000000 RDI: ffff88810eef02b0 [ 43.878306][ T319] RBP: ffff88810eef0000 R08: ffff88810eef0280 R09: 1ffff1102120fd63 [ 43.878523][ T319] R10: 1ffff1102120fd66 R11: 1ffff1102120fd67 R12: 0000000c40000000 [ 43.878742][ T319] R13: ffff88810eef02b8 R14: 0000000000000048 R15: 0000000020000000 [ 43.878959][ T319] FS: 00007f9c51c47c40(0000) GS:ffff88817a0be000(0000) knlGS:0000000000000000 [ 43.879214][ T319] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 43.879403][ T319] CR2: 000055e69a2230a8 CR3: 000000010c07a000 CR4: 0000000000750ef0 [ 43.879621][ T319] PKRU: 55555554 [ 43.879735][ T319] Call Trace: [ 43.879844][ T319] [ 43.879924][ T319] __qdisc_run+0x169/0x1900 [ 43.880075][ T319] ? dev_qdisc_enqueue+0x8b/0x210 [ 43.880222][ T319] __dev_queue_xmit+0x2346/0x37a0 [ 43.880376][ T319] ? register_lock_class+0x3f/0x800 [ 43.880531][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.880684][ T319] ? __pfx___dev_queue_xmit+0x10/0x10 [ 43.880834][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.880977][ T319] ? __lock_acquire+0x819/0x1df0 [ 43.881124][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.881275][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.881418][ T319] ? __asan_memcpy+0x3c/0x60 [ 43.881563][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.881708][ T319] ? eth_header+0x165/0x1a0 [ 43.881853][ T319] ? lockdep_hardirqs_on_prepare+0xdb/0x1a0 [ 43.882031][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.882174][ T319] ? neigh_resolve_output+0x3cc/0x7e0 [ 43.882325][ T319] ? srso_alias_return_thunk+0x5/0xfbef5 [ 43.882471][ T319] ip_finish_output2+0x6b6/0x1e10 Fix this by calling qdisc_reset for CBS' child qdisc. Sashiko caught an issue which could result in a null ptr deref if qdisc_create_dflt() is invoked on an unitialised cbs qdisc which is exposed by this patch. We add an early return if the qdisc is null to address this. This is a similar approach used by two other fixes[1][2]. The proper fix for this specific issue elucidated by sashiko is to remove the call to qdisc_reset when qdisc_create_dflt fails. Since the dflt qdisc isn't attached anywhere yet at that point, calling the reset callback doesn't make much sense (and as stated has been a source of two other bugs). We plan on submitting this fix in a later patch. [1] https://lore.kernel.org/netdev/20221018063201.306474-2-shaozhengchao@huawei.com/ [2] https://lore.kernel.org/netdev/20221018063201.306474-4-shaozhengchao@huawei.com/ Fixes: 585d763af09c ("net/sched: Introduce Credit Based Shaper (CBS) qdisc") Reported-by: Junyoung Jang Tested-by: Junyoung Jang Tested-by: Victor Nogueira Acked-by: Vinicius Costa Gomes Signed-off-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- net/sched/sch_cbs.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 8c9a0400c862..0f953bd46b58 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -243,6 +243,20 @@ static struct sk_buff *cbs_dequeue(struct Qdisc *sch) return q->dequeue(sch); } +static void cbs_reset(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + /* Nothing to do if we couldn't create the underlying qdisc */ + if (!q->qdisc) + return; + + qdisc_reset(q->qdisc); + qdisc_watchdog_cancel(&q->watchdog); + q->credits = 0; + q->last = 0; +} + static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, }; @@ -540,7 +554,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .dequeue = cbs_dequeue, .peek = qdisc_peek_dequeued, .init = cbs_init, - .reset = qdisc_reset_queue, + .reset = cbs_reset, .destroy = cbs_destroy, .change = cbs_change, .dump = cbs_dump, -- cgit v1.2.3 From 59afae20080a9681014bdc87897cbfd30bedd261 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Mon, 11 May 2026 14:30:58 -0400 Subject: selftests/tc-testing: Add QFQ/CBS qlen underflow test Since CBS was not calling reset for its child qdisc, there are scenarios where it could cause an underflow on its parent's qlen/backlog. When the parent is QFQ, a null-ptr deref could occur. Add a test case that reproduces the underflow followed by a null-ptr deref scenario. Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index b1f856cf62c1..848696c373fc 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1284,5 +1284,46 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "3a62", + "name": "Try to create a qlen underflow with QFQ/CBS", + "category": [ + "qdisc", + "qfq", + "cbs" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: qfq", + "$TC class add dev $DUMMY classid 1:1 parent 1: qfq", + "$TC class add dev $DUMMY classid 1:2 parent 1: qfq", + "$TC qdisc add dev $DUMMY handle 2: parent 1:1 cbs", + "$TC qdisc add dev $DUMMY handle 3: parent 2: netem delay 5000000000", + "$TC filter add dev $DUMMY parent 1: prio 1 u32 match ip dst 10.10.10.1 classid 1:1 action ok", + "$TC filter add dev $DUMMY parent 1: prio 2 u32 match ip dst 10.10.10.2 classid 1:2 action ok", + "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true", + "$IP l set $DUMMY down", + "$IP l set $DUMMY up", + "$TC qdisc replace dev $DUMMY handle 4: parent 2: pfifo" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -W0.01 -I$DUMMY", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:1", + "matchJSON": [ + { + "kind": "cbs", + "handle": "2:", + "bytes": 0, + "packets": 0 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] -- cgit v1.2.3 From 6c7674b5b7ae513cecae22aa9dcdcf533862cf5c Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 27 Apr 2026 19:41:05 -0700 Subject: riscv: cfi: reduce shadow stack size limit from 4GB to 2GB Follow the ARM64 GCS (Guarded Control Stack) implementation approach by reducing the shadow stack size allocation from min(RLIMIT_STACK, 4GB) to min(RLIMIT_STACK/2, 2GB). See commit 506496bcbb42 ("arm64/gcs: Ensure that new threads have a GCS") Rationale: 1. Shadow stacks only store return addresses (8 bytes per entry), not local variables, function parameters, or saved registers. A 2GB shadow stack is far more than sufficient for any practical application, even with extremely deep recursion. Using half the size maintains adequate margin while being more resource-efficient. 2. On memory-constrained systems (e.g., platforms with only 4GB of physical memory, which is a common configuration), allocating 4GB of virtual address space for shadow stack per process/thread can lead to virtual memory allocation failures when the overcommit mode is set to OVERCOMMIT_GUESS or OVERCOMMIT_NEVER: Error: "__vm_enough_memory: not enough memory for the allocation" This reduces virtual address space consumption by 50% while maintaining more than adequate space for return address storage. Signed-off-by: Zong Li Link: https://patch.msgid.link/20260428024105.645162-1-zong.li@sifive.com [pjw@kernel.org: clean up patch description] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/usercfi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 6eaa0d94fdfe..cbfb4e495e9f 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -109,15 +109,16 @@ void set_indir_lp_lock(struct task_struct *task, bool lock) task->thread_info.user_cfi_state.ufcfi_locked = lock; } /* - * If size is 0, then to be compatible with regular stack we want it to be as big as - * regular stack. Else PAGE_ALIGN it and return back + * The shadow stack only stores the return address and not any variables + * this should be more than sufficient for most applications. + * Else PAGE_ALIGN it and return back */ static unsigned long calc_shstk_size(unsigned long size) { if (size) return PAGE_ALIGN(size); - return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); + return PAGE_ALIGN(min(rlimit(RLIMIT_STACK) / 2, SZ_2G)); } /* -- cgit v1.2.3 From 9a390d34d55cb4ecbca4981c660dd95440827c70 Mon Sep 17 00:00:00 2001 From: Sukhdeep Singh Date: Tue, 12 May 2026 18:27:11 +0530 Subject: MAINTAINERS: update atlantic driver maintainer Igor Russkikh and Egor Pomozov have left Marvell. Take over maintenance of the atlantic driver and its PTP subsystem. Signed-off-by: Sukhdeep Singh Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8a134cf6e9bb..7a4f2aab78ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2021,7 +2021,7 @@ F: Documentation/hwmon/aquacomputer_d5next.rst F: drivers/hwmon/aquacomputer_d5next.c AQUANTIA ETHERNET DRIVER (atlantic) -M: Igor Russkikh +M: Sukhdeep Singh L: netdev@vger.kernel.org S: Maintained W: https://www.marvell.com/ @@ -2030,7 +2030,7 @@ F: Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst F: drivers/net/ethernet/aquantia/atlantic/ AQUANTIA ETHERNET DRIVER PTP SUBSYSTEM -M: Egor Pomozov +M: Sukhdeep Singh L: netdev@vger.kernel.org S: Maintained W: http://www.aquantia.com -- cgit v1.2.3 From b84c5632c7b31f8910167075a8128cfb9e50fcfe Mon Sep 17 00:00:00 2001 From: Faicker Mo Date: Mon, 11 May 2026 22:05:51 +0800 Subject: net: net_failover: Fix the deadlock in slave register There is netdev_lock_ops() before the NETDEV_REGISTER notifier in register_netdevice(), so use the non-locking functions in net_failover_slave_register(). failover_slave_register() in failover_existing_slave_register() adds lock and unlock ops too. Call Trace: __schedule+0x30d/0x7a0 schedule+0x27/0x90 schedule_preempt_disabled+0x15/0x30 __mutex_lock.constprop.0+0x538/0x9e0 __mutex_lock_slowpath+0x13/0x20 mutex_lock+0x3b/0x50 dev_set_mtu+0x40/0xe0 net_failover_slave_register+0x24/0x280 failover_slave_register+0x103/0x1b0 failover_event+0x15e/0x210 ? dropmon_net_event+0xac/0xe0 notifier_call_chain+0x5e/0xe0 raw_notifier_call_chain+0x16/0x30 call_netdevice_notifiers_info+0x52/0xa0 register_netdevice+0x5f4/0x7c0 register_netdev+0x1e/0x40 _mlx5e_probe+0xe2/0x370 [mlx5_core] mlx5e_probe+0x59/0x70 [mlx5_core] ? __pfx_mlx5e_probe+0x10/0x10 [mlx5_core] Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Signed-off-by: Faicker Mo Signed-off-by: Jakub Kicinski --- drivers/net/net_failover.c | 12 ++++++------ net/core/failover.c | 6 +++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index d0361aaf25ef..3f7d31033bae 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -502,7 +502,7 @@ static int net_failover_slave_register(struct net_device *slave_dev, /* Align MTU of slave with failover dev */ orig_mtu = slave_dev->mtu; - err = dev_set_mtu(slave_dev, failover_dev->mtu); + err = netif_set_mtu(slave_dev, failover_dev->mtu); if (err) { netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n", slave_dev->name, failover_dev->mtu); @@ -512,11 +512,11 @@ static int net_failover_slave_register(struct net_device *slave_dev, dev_hold(slave_dev); if (netif_running(failover_dev)) { - err = dev_open(slave_dev, NULL); + err = netif_open(slave_dev, NULL); if (err && (err != -EBUSY)) { netdev_err(failover_dev, "Opening slave %s failed err:%d\n", slave_dev->name, err); - goto err_dev_open; + goto err_netif_open; } } @@ -562,10 +562,10 @@ static int net_failover_slave_register(struct net_device *slave_dev, err_vlan_add: dev_uc_unsync(slave_dev, failover_dev); dev_mc_unsync(slave_dev, failover_dev); - dev_close(slave_dev); -err_dev_open: + netif_close(slave_dev); +err_netif_open: dev_put(slave_dev); - dev_set_mtu(slave_dev, orig_mtu); + netif_set_mtu(slave_dev, orig_mtu); done: return err; } diff --git a/net/core/failover.c b/net/core/failover.c index 11bb183c7a1b..e43c59cd6868 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -12,6 +12,7 @@ #include #include #include +#include #include static LIST_HEAD(failover_list); @@ -221,8 +222,11 @@ failover_existing_slave_register(struct net_device *failover_dev) for_each_netdev(net, dev) { if (netif_is_failover(dev)) continue; - if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) + if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) { + netdev_lock_ops(dev); failover_slave_register(dev); + netdev_unlock_ops(dev); + } } rtnl_unlock(); } -- cgit v1.2.3 From c6690a9030d784d3f099850800b6d5323771ca37 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Mon, 11 May 2026 23:30:58 +0800 Subject: macsec: introduce dedicated workqueue for SA crypto cleanup Introduce a dedicated ordered workqueue, macsec_wq, which will be used by subsequent patches to defer SA crypto cleanup (crypto_free_aead and related teardown) out of softirq context. Using a dedicated workqueue instead of system_wq allows macsec_exit() to drain exactly the work items belonging to this module via destroy_workqueue(), without interfering with unrelated work items on system_wq or causing unexpected delays elsewhere. rcu_barrier() in macsec_exit() ensures all in-flight rcu_work callbacks have enqueued their work items before destroy_workqueue() drains and destroys the queue, making the two-step teardown correct and complete. The same sequence is kept in the error path of macsec_init() as a precaution, to mirror macsec_exit() and stay safe if work ever becomes queueable before this point in the future. While at it, rename the error labels in macsec_init() from the resource-named style (rtnl:, notifier:, wq:) to the err_xxx: style (err_rtnl:, err_notifier:, err_destroy_wq:) to align with the broader kernel convention. Signed-off-by: Jinliang Zheng Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260511153102.2640368-2-alexjlzheng@tencent.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 6147ee8b1d78..ef5ac634f916 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -26,6 +26,8 @@ #include +static struct workqueue_struct *macsec_wq; + /* SecTAG length = macsec_eth_header without the optional SCI */ #define MACSEC_TAG_LEN 6 @@ -4505,25 +4507,35 @@ static int __init macsec_init(void) { int err; + macsec_wq = alloc_workqueue("macsec", WQ_UNBOUND, 0); + if (!macsec_wq) + return -ENOMEM; + pr_info("MACsec IEEE 802.1AE\n"); err = register_netdevice_notifier(&macsec_notifier); if (err) - return err; + goto err_destroy_wq; err = rtnl_link_register(&macsec_link_ops); if (err) - goto notifier; + goto err_notifier; err = genl_register_family(&macsec_fam); if (err) - goto rtnl; + goto err_rtnl; return 0; -rtnl: +err_rtnl: rtnl_link_unregister(&macsec_link_ops); -notifier: +err_notifier: unregister_netdevice_notifier(&macsec_notifier); +err_destroy_wq: + /* Precautionary, mirrors macsec_exit() to stay safe if work + * ever becomes queueable before this point in the future. + */ + rcu_barrier(); + destroy_workqueue(macsec_wq); return err; } @@ -4533,6 +4545,7 @@ static void __exit macsec_exit(void) rtnl_link_unregister(&macsec_link_ops); unregister_netdevice_notifier(&macsec_notifier); rcu_barrier(); + destroy_workqueue(macsec_wq); } module_init(macsec_init); -- cgit v1.2.3 From 6624bba469a325ecd699feae400b77cd11c76b98 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Mon, 11 May 2026 23:30:59 +0800 Subject: macsec: use rcu_work to defer RX SA crypto cleanup out of softirq crypto_free_aead() can internally invoke vunmap() (e.g. via dma_free_attrs() in hardware crypto drivers such as hisi_sec2). vunmap() must not be called from softirq context, but free_rxsa() is an RCU callback that runs in softirq, leading to a kernel crash: vunmap+0x4c/0x70 __iommu_dma_free+0xd0/0x138 dma_free_attrs+0xf4/0x100 sec_aead_exit+0x64/0xb8 [hisi_sec2] crypto_destroy_tfm+0x98/0x110 free_rxsa+0x28/0x50 [macsec] rcu_do_batch+0x184/0x460 rcu_core+0xf4/0x1f8 handle_softirqs+0x118/0x330 Use rcu_work to defer the cleanup to a workqueue. rcu_work dispatches the worker asynchronously after the RCU grace period, so no thread blocks waiting, and concurrent releases of multiple SAs naturally share the same grace period. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Jinliang Zheng Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260511153102.2640368-3-alexjlzheng@tencent.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 8 +++++--- include/net/macsec.h | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index ef5ac634f916..e7ad24f1ea5b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -176,9 +176,10 @@ static void macsec_rxsc_put(struct macsec_rx_sc *sc) call_rcu(&sc->rcu_head, free_rx_sc_rcu); } -static void free_rxsa(struct rcu_head *head) +static void free_rxsa_work(struct work_struct *work) { - struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu); + struct macsec_rx_sa *sa = + container_of(to_rcu_work(work), struct macsec_rx_sa, destroy_work); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); @@ -188,7 +189,7 @@ static void free_rxsa(struct rcu_head *head) static void macsec_rxsa_put(struct macsec_rx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) - call_rcu(&sa->rcu, free_rxsa); + queue_rcu_work(macsec_wq, &sa->destroy_work); } static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr) @@ -1409,6 +1410,7 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len, rx_sa->next_pn = 1; refcount_set(&rx_sa->refcnt, 1); spin_lock_init(&rx_sa->lock); + INIT_RCU_WORK(&rx_sa->destroy_work, free_rxsa_work); return 0; } diff --git a/include/net/macsec.h b/include/net/macsec.h index bc7de5b53e54..0980ef36fbf0 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -123,6 +124,7 @@ struct macsec_dev_stats { * @key: key structure * @ssci: short secure channel identifier * @stats: per-SA stats + * @destroy_work: deferred work to free the SA in process context after RCU grace period */ struct macsec_rx_sa { struct macsec_key key; @@ -136,7 +138,7 @@ struct macsec_rx_sa { bool active; struct macsec_rx_sa_stats __percpu *stats; struct macsec_rx_sc *sc; - struct rcu_head rcu; + struct rcu_work destroy_work; }; struct pcpu_rx_sc_stats { -- cgit v1.2.3 From 552cc2306c3d87632f44a655737d1d367c2a3295 Mon Sep 17 00:00:00 2001 From: Jinliang Zheng Date: Mon, 11 May 2026 23:31:00 +0800 Subject: macsec: use rcu_work to defer TX SA crypto cleanup out of softirq free_txsa() is an RCU callback running in softirq context, but calls crypto_free_aead() which can invoke vunmap() internally on hardware crypto drivers (e.g. hisi_sec2), triggering a kernel crash. Use rcu_work to defer the cleanup to a workqueue, for the same reasons as the analogous fix to free_rxsa() in the previous patch. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Jinliang Zheng Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260511153102.2640368-4-alexjlzheng@tencent.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 8 +++++--- include/net/macsec.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e7ad24f1ea5b..f904f4d16b45 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -205,9 +205,10 @@ static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr) return sa; } -static void free_txsa(struct rcu_head *head) +static void free_txsa_work(struct work_struct *work) { - struct macsec_tx_sa *sa = container_of(head, struct macsec_tx_sa, rcu); + struct macsec_tx_sa *sa = + container_of(to_rcu_work(work), struct macsec_tx_sa, destroy_work); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); @@ -217,7 +218,7 @@ static void free_txsa(struct rcu_head *head) static void macsec_txsa_put(struct macsec_tx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) - call_rcu(&sa->rcu, free_txsa); + queue_rcu_work(macsec_wq, &sa->destroy_work); } static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) @@ -1510,6 +1511,7 @@ static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len, tx_sa->active = false; refcount_set(&tx_sa->refcnt, 1); spin_lock_init(&tx_sa->lock); + INIT_RCU_WORK(&tx_sa->destroy_work, free_txsa_work); return 0; } diff --git a/include/net/macsec.h b/include/net/macsec.h index 0980ef36fbf0..d962093ee923 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -176,6 +176,7 @@ struct macsec_rx_sc { * @key: key structure * @ssci: short secure channel identifier * @stats: per-SA stats + * @destroy_work: deferred work to free the SA in process context after RCU grace period */ struct macsec_tx_sa { struct macsec_key key; @@ -188,7 +189,7 @@ struct macsec_tx_sa { refcount_t refcnt; bool active; struct macsec_tx_sa_stats __percpu *stats; - struct rcu_head rcu; + struct rcu_work destroy_work; }; /** -- cgit v1.2.3 From f44d38a31f1802b7222adaea9ee69f9d280f698a Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Thu, 14 May 2026 10:18:47 +0800 Subject: io_uring: validate user-controlled cq.head in io_cqe_cache_refill() A fuzzing run reproduced an unkillable io_uring task stuck at ~100% CPU: [root@fedora io_uring_stress]# ps -ef | grep io_uring root 1240 1 99 13:36 ? 00:01:35 [io_uring_stress] The task loops inside io_cqring_wait() and never returns to userspace, and SIGKILL has no effect. This is caused by the CQ ring exposing rings->cq.head to userspace as writable, while the authoritative tail lives in kernel-private ctx->cached_cq_tail. io_cqe_cache_refill() computes free space as an unsigned subtraction: free = ctx->cq_entries - min(tail - head, ctx->cq_entries); If userspace keeps head within [0, tail], the subtraction is well defined and min() just acts as a defensive clamp. But if userspace advances head past tail, (tail - head) wraps to a huge value, free becomes 0, and io_cqe_cache_refill() fails. The CQE is pushed onto the overflow list and IO_CHECK_CQ_OVERFLOW_BIT is set. The wait loop in io_cqring_wait() relies on an invariant: refill() only fails when the CQ is *physically* full, in which case rings->cq.tail has been advanced to iowq->cq_tail and io_should_wake() returns true. The tampered head breaks this: refill() fails while the ring is not full, no OCQE is copied in, rings->cq.tail never catches up, io_should_wake() stays false, and io_cqring_wait_schedule() keeps returning early because IO_CHECK_CQ_OVERFLOW_BIT is still set. The result is a tight retry loop that never returns to userspace. Introduce io_cqring_queued() as the single point that converts the (tail, head) pair into a trustworthy queued count. Since the real head/tail distance is bounded by cq_entries (far below 2^31), a signed comparison reliably detects userspace moving head past tail; in that case treat the queue as empty so callers see the full cache as free and forward progress is preserved. Suggested-by: Jens Axboe Signed-off-by: Zizhi Wo Link: https://patch.msgid.link/20260514021847.4062782-1-wozizhi@huaweicloud.com [axboe: fixup commit message, kill 'queued' var, and keep it all in io_uring.c] Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2ebb0ba37c4f..036145ee466c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -686,13 +686,27 @@ static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx, return ocqe; } +/* + * Compute queued CQEs for free-space calculation, clamped to cq_entries. + */ +static unsigned int io_cqring_queued(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = io_get_rings(ctx); + int diff; + + diff = (int)(ctx->cached_cq_tail - READ_ONCE(rings->cq.head)); + if (diff >= 0) + return min((unsigned int)diff, ctx->cq_entries); + return 0; +} + /* * Fill an empty dummy CQE, in case alignment is off for posting a 32b CQE * because the ring is a single 16b entry away from wrapping. */ static bool io_fill_nop_cqe(struct io_ring_ctx *ctx, unsigned int off) { - if (__io_cqring_events(ctx) < ctx->cq_entries) { + if (io_cqring_queued(ctx) < ctx->cq_entries) { struct io_uring_cqe *cqe = &ctx->rings->cqes[off]; cqe->user_data = 0; @@ -713,7 +727,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32) { struct io_rings *rings = ctx->rings; unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); - unsigned int free, queued, len; + unsigned int free, len; /* * Posting into the CQ when there are pending overflowed CQEs may break @@ -733,9 +747,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32) off = 0; } - /* userspace may cheat modifying the tail, be safe and do min */ - queued = min(__io_cqring_events(ctx), ctx->cq_entries); - free = ctx->cq_entries - queued; + free = ctx->cq_entries - io_cqring_queued(ctx); /* we need a contiguous range, limit based on the current array offset */ len = min(free, ctx->cq_entries - off); if (len < (cqe32 + 1)) -- cgit v1.2.3 From 50da1c9ccb70fc5250c37ac474b54ee072732ea3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 6 Apr 2026 16:23:04 -0700 Subject: riscv: Docs: fix unmatched quote warning 'make htmldocs' complains about ``prctrl` -- so add a second '`' to avoid the warning. Documentation/arch/riscv/zicfilp.rst:79: WARNING: Inline literal start-string without end-string. [docutils] Fixes: 08ee1559052b ("prctl: cfi: change the branch landing pad prctl()s to be more descriptive") Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260406232304.1892528-1-rdunlap@infradead.org Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/zicfilp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arch/riscv/zicfilp.rst b/Documentation/arch/riscv/zicfilp.rst index ab7d8e62ddaf..12b35969d17a 100644 --- a/Documentation/arch/riscv/zicfilp.rst +++ b/Documentation/arch/riscv/zicfilp.rst @@ -78,7 +78,7 @@ the program. Per-task indirect branch tracking state can be monitored and controlled via the :c:macro:`PR_GET_CFI` and :c:macro:`PR_SET_CFI` -``prctl()` arguments (respectively), by supplying +``prctl()`` arguments (respectively), by supplying :c:macro:`PR_CFI_BRANCH_LANDING_PADS` as the second argument. These are architecture-agnostic, and will return -EINVAL if the underlying functionality is not supported. -- cgit v1.2.3 From b69bcb13ed7024a84d6cd8ad330f1e32782fcf28 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Wed, 1 Apr 2026 09:53:17 +0800 Subject: riscv: misaligned: Make enabling delegation depend on NONPORTABLE The unaligned access emulation code in Linux has various deficiencies. For example, it doesn't emulate vector instructions [1] [2], and doesn't emulate KVM guest accesses. Therefore, requesting misaligned exception delegation with SBI FWFT actually regresses vector instructions' and KVM guests' behavior. Until Linux can handle it properly, guard these sbi_fwft_set() calls behind RISCV_SBI_FWFT_DELEGATE_MISALIGNED, which in turn depends on NONPORTABLE. Those who are sure that this wouldn't be a problem can enable this option, perhaps getting better performance. The rest of the existing code proceeds as before, except as if SBI_FWFT_MISALIGNED_EXC_DELEG is not available, to handle any remaining address misaligned exceptions on a best-effort basis. The KVM SBI FWFT implementation is also not touched, but it is disabled if the firmware emulates unaligned accesses. Cc: stable@vger.kernel.org Fixes: cf5a8abc6560 ("riscv: misaligned: request misaligned exception from SBI") Reported-by: Songsong Zhang # KVM Link: https://lore.kernel.org/linux-riscv/38ce44c1-08cf-4e3f-8ade-20da224f529c@iscas.ac.cn/ [1] Link: https://lore.kernel.org/linux-riscv/b3cfcdac-0337-4db0-a611-258f2868855f@iscas.ac.cn/ [2] Signed-off-by: Vivian Wang Acked-by: Conor Dooley Link: https://patch.msgid.link/20260401-riscv-misaligned-dont-delegate-v2-1-5014a288c097@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/Kconfig | 22 ++++++++++++++++++++++ arch/riscv/kernel/traps_misaligned.c | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d235396c4514..c5754942cf85 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -937,6 +937,28 @@ config RISCV_VECTOR_MISALIGNED help Enable detecting support for vector misaligned loads and stores. +config RISCV_SBI_FWFT_DELEGATE_MISALIGNED + bool "Request firmware delegation of unaligned access exceptions" + depends on RISCV_SBI + depends on NONPORTABLE + help + Use SBI FWFT to request delegation of load address misaligned and + store address misaligned exceptions, if possible, and prefer Linux + kernel emulation of these accesses to firmware emulation. + + Unfortunately, Linux's emulation is still incomplete. Namely, it + currently does not handle vector instructions and KVM guest accesses. + On platforms where these accesses would have been handled by firmware, + enabling this causes unexpected kernel oopses, userspaces crashes and + KVM guest crashes. If you are sure that these are not a problem for + your platform, you can say Y here, which may improve performance. + + Saying N here will not worsen emulation support for unaligned accesses + even in the case where the firmware also has incomplete support. It + simply keeps the firmware's emulation enabled. + + If you don't know what to do here, say N. + choice prompt "Unaligned Accesses Support" default RISCV_PROBE_UNALIGNED_ACCESS diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 2a27d3ff4ac6..81b7682e6c6d 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -584,7 +584,7 @@ static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) static bool misaligned_traps_delegated; -#ifdef CONFIG_RISCV_SBI +#if defined(CONFIG_RISCV_SBI_FWFT_DELEGATE_MISALIGNED) static int cpu_online_sbi_unaligned_setup(unsigned int cpu) { -- cgit v1.2.3 From 16ca52bc209fa4bf9239cd9e5643e95533476b58 Mon Sep 17 00:00:00 2001 From: Nicolás Bazaes Date: Wed, 13 May 2026 21:35:49 -0400 Subject: Input: synaptics - add LEN2058 to SMBus passlist for ThinkPad E490 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Lenovo ThinkPad E490 (PNP ID: LEN2058) has a Synaptics TM3471-020 touchpad that supports SMBus/RMI4 mode but is not listed in smbus_pnp_ids[]. Without this entry, RMI4 over SMBus is not enabled by default, and the touchpad falls back to PS/2 mode. Adding LEN2058 to the passlist enables automatic RMI4 detection without requiring the psmouse.synaptics_intertouch parameter, and matches the behavior of similar ThinkPad models already in the list (E480/LEN2054, E580/LEN2055). Tested on ThinkPad E490 with kernel 7.0.5-zen1 and Arch Linux. RMI4 over SMBus is confirmed working without any kernel parameters. Signed-off-by: Nicolás Bazaes Assisted-by: Claude:claude-sonnet-4-6 Link: https://patch.msgid.link/20260514013552.14234-1-contacto@bazaes.cl Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 26071128f43a..c70502e24031 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2044", /* L470 */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ + "LEN2058", /* E490 */ "LEN2068", /* T14 Gen 1 */ "SYN1221", /* TUXEDO InfinityBook Pro 14 v5 */ "SYN3003", /* HP EliteBook 850 G1 */ -- cgit v1.2.3 From 31467b23823ffec1f6fff407f8e3ca9af8b7491a Mon Sep 17 00:00:00 2001 From: Sayali Patil Date: Wed, 13 May 2026 13:44:13 +0530 Subject: powerpc/time: Remove redundant preempt_disable|enable() calls from arch_irq_work_raise() A kernel panic is observed when handling machine check exceptions from real mode. BUG: Unable to handle kernel data access on read at 0xc00000006be21300 Oops: Kernel access of bad area, sig: 11 [#1] MSR: 8000000000001003 CR: 88222248 XER: 00000005 CFAR: c00000000003ffc4 DAR: c00000006be21300 DSISR: 40000000 IRQMASK: 0 NIP [c000000000029e40] arch_irq_work_raise+0x10/0x70 LR [c00000000003ffc8] machine_check_queue_event+0xa8/0x150 Call Trace: [c0000000179d3c70] [c00000000003ff64] machine_check_queue_event+0x44/0x150 [c0000000179d3d30] [c0000000000084e0] machine_check_early_common+0x1f0/0x2c0 The crash occurs because arch_irq_work_raise() calls preempt_disable() from machine check exception (MCE) handlers running in real mode. In this context, accessing the preempt_count can fault, leading to the panic. The preempt_disable()/preempt_enable() pair in arch_irq_work_raise() was originally added by commit 0fe1ac48bef0 ("powerpc/perf_event: Fix oops due to perf_event_do_pending call") to avoid races while raising irq work from exception context. Later, commit 471ba0e686cb ("irq_work: Do not raise an IPI when queueing work on the local CPU") added preemption protection in irq_work_queue() path, while commit 20b876918c06 ("irq_work: Use per cpu atomics instead of regular atomics") added equivalent protection in irq_work_queue_on() before reaching arch_irq_work_raise(): irq_work_queue() / irq_work_queue_on() -> preempt_disable() -> __irq_work_queue_local() -> irq_work_raise() -> arch_irq_work_raise() As a result, callers other than mce_irq_work_raise() already execute with preemption disabled, making the additional preempt_disable()/preempt_enable() pair in arch_irq_work_raise() redundant. The arch_irq_work_raise() function executes in NMI context when called from MCE handler. Hence we will not be preempted or scheduled out since we are in NMI context with MSR[EE]=0. Therefore, it is safe to remove the preempt_disable()/preempt_enable() calls from here. Remove it to avoid accessing preempt_count from real mode context. Fixes: cc15ff327569 ("powerpc/mce: Avoid using irq_work_queue() in realmode") Suggested-by: Mahesh Salgaonkar Acked-by: Shrikanth Hegde Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Sayali Patil [Maddy: Fixed the commit title] Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20260513081413.222490-1-sayalip@linux.ibm.com --- arch/powerpc/kernel/time.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4bbeb8644d3d..b4472288e0d4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,6 +458,10 @@ DEFINE_PER_CPU(u8, irq_work_pending); #endif /* 32 vs 64 bit */ +/* + * Must be called with preemption disabled since it updates + * per-CPU irq_work state and programs the local CPU decrementer. + */ void arch_irq_work_raise(void) { /* @@ -471,10 +475,8 @@ void arch_irq_work_raise(void) * which could get tangled up if we're messing with the same state * here. */ - preempt_disable(); set_irq_work_pending_flag(); set_dec(1); - preempt_enable(); } static void set_dec_or_work(u64 val) -- cgit v1.2.3 From 742b04d0550b0ec89dcbc99537ec88653bd1ad90 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 May 2026 10:49:14 -0600 Subject: xfrm: Check for underflow in xfrm_state_mtu Leo Lin reported OOB write issue in esp component: xfrm_state_mtu() returns u32 but performs its arithmetic in unsigned modulo-2^32 space using an attacker-influenced "header_len + authsize + net_adj" subtracted from a small "mtu" argument. A nobody user can install an IPv4 ESP tunnel SA with a large authentication key (XFRMA_ALG_AUTH_TRUNC, e.g. hmac(sha512), 64-byte key, 64-byte trunc), configure a small interface MTU (68 bytes), and set XFRMA_TFCPAD to a large value. When a single UDP datagram is then sent through the tunnel, xfrm_state_mtu() underflows to a near-2^32 value, and esp_output() consumes it as a signed int via: padto = min(x->tfcpad, xfrm_state_mtu(x, mtu_cached)) esp.tfclen = padto - skb->len (assigned to int) esp.tfclen ends up negative (e.g. -207). It is sign-extended to size_t when passed to memset() inside esp_output_fill_trailer(), producing a ~16 EB write of zeroes at skb_tail_pointer(skb). KASAN logs it as "Write of size 18446744073709551537 at addr ffff888...". Check for underflow and return 1. This causes the sendmsg attempt to fail with ENETUNREACH. Fixes: c5c252389374 ("[XFRM]: Optimize MTU calculation") Reported-by: Leo Lin Assisted-by: Codex:26.506.31004 Signed-off-by: David Ahern Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 395d82411a87..589c3b6e4679 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -3114,10 +3114,14 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; u32 blksize, net_adj = 0; + u32 overhead, payload_mtu; if (x->km.state != XFRM_STATE_VALID || - !type || type->proto != IPPROTO_ESP) + !type || type->proto != IPPROTO_ESP) { + if (mtu <= x->props.header_len) + return 1; return mtu - x->props.header_len; + } aead = x->data; blksize = ALIGN(crypto_aead_blocksize(aead), 4); @@ -3140,8 +3144,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) break; } - return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - - net_adj) & ~(blksize - 1)) + net_adj - 2; + overhead = x->props.header_len + crypto_aead_authsize(aead) + net_adj; + if (mtu <= overhead) + return 1; + + payload_mtu = mtu - overhead; + payload_mtu &= ~(blksize - 1); + if (payload_mtu <= 2) + return 1; + + return payload_mtu + net_adj - 2; + } EXPORT_SYMBOL_GPL(xfrm_state_mtu); -- cgit v1.2.3 From 1ef2a89584b7b788b2603590d886db076b2f24cc Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Thu, 7 May 2026 16:28:12 +0100 Subject: arm_mpam: Fix monitor instance selection when checking for hardware NRDY In _mpam_ris_hw_probe_hw_nrdy() a new register value to select the first monitor and relevant RIS is prepared in mon_sel. However, it is written to the monitor value register, e.g. MSMON_CSU, rather than MSMON_CFG_MON_SEL. As MSMON_CFG_MON_SEL is a 32 bit register update the type of mon_sel to u32. Write mon_sel to the intended register, MSMON_CFG_MON_SEL. Fixes: 8c90dc68a5de ("arm_mpam: Probe the hardware features resctrl supports") Cc: Signed-off-by: Ben Horgan Reviewed-by: James Morse Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 41b14344b16f..817cb10a8e79 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -731,7 +731,7 @@ static void mpam_enable_quirks(struct mpam_msc *msc) static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) { u32 now; - u64 mon_sel; + u32 mon_sel; bool can_set, can_clear; struct mpam_msc *msc = ris->vmsc->msc; @@ -740,7 +740,7 @@ static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) | FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); - _mpam_write_monsel_reg(msc, mon_reg, mon_sel); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY); now = _mpam_read_monsel_reg(msc, mon_reg); -- cgit v1.2.3 From 4387970bbd84fd14e0c49c3089c5061ccd86b98a Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Thu, 7 May 2026 16:28:13 +0100 Subject: arm_mpam: Pretend that NRDY is always hardware managed Rule ZTXDS of the MPAM specification, IHI009 version B.b, states: "If a monitor does not support automatic updates of NRDY, software can use that bit for any purpose." As software is not reliably informed whether or not the monitor supports automatic updates of NRDY always assume that hardware may manage NRDY but don't rely on it. When NRDY is truly untouched by hardware then, as it is written to 0 on configuration, it will always read 0. At probe it's checked if MSMON_CSU.NRDY and MSMON_MBWU.NRDY are hardware managed but not MSMON_MBWU_L.NDRY. Specialize the checking for hardware managed NRDY to CSU counters as this is the only case where hardware management makes sense. Continue to inform the user if MSMON_CSU.NRDY appears to be hardware managed but the firmware doesn't provide the associated time limit for the automatic clearing of NRDY. Remove the NRDY feature flags as they are now unused. Fixes: 8c90dc68a5de ("arm_mpam: Probe the hardware features resctrl supports") Cc: Signed-off-by: Ben Horgan Reviewed-by: James Morse Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 53 +++++++++++++---------------------------- drivers/resctrl/mpam_internal.h | 2 -- 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 817cb10a8e79..58e0c8970e8c 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -728,7 +728,7 @@ static void mpam_enable_quirks(struct mpam_msc *msc) * Try and see what values stick in this bit. If we can write either value, * its probably not implemented by hardware. */ -static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) +static bool mpam_ris_hw_probe_csu_nrdy(struct mpam_msc_ris *ris) { u32 now; u32 mon_sel; @@ -742,21 +742,18 @@ static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); - _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY); - now = _mpam_read_monsel_reg(msc, mon_reg); + _mpam_write_monsel_reg(msc, MSMON_CSU, MSMON___NRDY); + now = _mpam_read_monsel_reg(msc, MSMON_CSU); can_set = now & MSMON___NRDY; - _mpam_write_monsel_reg(msc, mon_reg, 0); - now = _mpam_read_monsel_reg(msc, mon_reg); + _mpam_write_monsel_reg(msc, MSMON_CSU, 0); + now = _mpam_read_monsel_reg(msc, MSMON_CSU); can_clear = !(now & MSMON___NRDY); mpam_mon_sel_unlock(msc); return (!can_set || !can_clear); } -#define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \ - _mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg) - static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) { int err; @@ -873,20 +870,18 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) mpam_set_feature(mpam_feat_msmon_csu_xcl, props); /* Is NRDY hardware managed? */ - hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); - if (hw_managed) - mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props); - } + hw_managed = mpam_ris_hw_probe_csu_nrdy(ris); - /* - * Accept the missing firmware property if NRDY appears - * un-implemented. - */ - if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props)) - dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); + /* + * Accept the missing firmware property if NRDY appears + * un-implemented. + */ + if (err && hw_managed) + dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); + } } if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { - bool has_long, hw_managed; + bool has_long; u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR); props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr); @@ -905,16 +900,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) } else { mpam_set_feature(mpam_feat_msmon_mbwu_31counter, props); } - - /* Is NRDY hardware managed? */ - hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); - if (hw_managed) - mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); - - /* - * Don't warn about any missing firmware property for - * MBWU NRDY - it doesn't make any sense! - */ } } } @@ -1197,7 +1182,6 @@ static void __ris_msmon_read(void *arg) bool reset_on_next_read = false; struct mpam_msc_ris *ris = m->ris; struct msmon_mbwu_state *mbwu_state; - struct mpam_props *rprops = &ris->props; struct mpam_msc *msc = m->ris->vmsc->msc; u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; @@ -1253,8 +1237,7 @@ static void __ris_msmon_read(void *arg) switch (m->type) { case mpam_feat_msmon_csu: now = mpam_read_monsel_reg(msc, CSU); - if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) - nrdy = now & MSMON___NRDY; + nrdy = now & MSMON___NRDY; now = FIELD_GET(MSMON___VALUE, now); if (mpam_has_quirk(IGNORE_CSU_NRDY, msc) && m->waited_timeout) @@ -1266,8 +1249,7 @@ static void __ris_msmon_read(void *arg) case mpam_feat_msmon_mbwu_63counter: if (m->type != mpam_feat_msmon_mbwu_31counter) { now = mpam_msc_read_mbwu_l(msc); - if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) - nrdy = now & MSMON___L_NRDY; + nrdy = now & MSMON___L_NRDY; if (m->type == mpam_feat_msmon_mbwu_63counter) now = FIELD_GET(MSMON___LWD_VALUE, now); @@ -1275,8 +1257,7 @@ static void __ris_msmon_read(void *arg) now = FIELD_GET(MSMON___L_VALUE, now); } else { now = mpam_read_monsel_reg(msc, MBWU); - if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) - nrdy = now & MSMON___NRDY; + nrdy = now & MSMON___NRDY; now = FIELD_GET(MSMON___VALUE, now); } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 1914aefdcba9..04d1a59f02af 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -181,14 +181,12 @@ enum mpam_device_features { mpam_feat_msmon_csu, mpam_feat_msmon_csu_capture, mpam_feat_msmon_csu_xcl, - mpam_feat_msmon_csu_hw_nrdy, mpam_feat_msmon_mbwu, mpam_feat_msmon_mbwu_31counter, mpam_feat_msmon_mbwu_44counter, mpam_feat_msmon_mbwu_63counter, mpam_feat_msmon_mbwu_capture, mpam_feat_msmon_mbwu_rwbw, - mpam_feat_msmon_mbwu_hw_nrdy, mpam_feat_partid_nrw, MPAM_FEATURE_LAST }; -- cgit v1.2.3 From ccad6001be5c38426ccf45790c411467ad3c03c6 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Thu, 7 May 2026 16:28:14 +0100 Subject: arm_mpam: Improve check for whether or not NRDY is hardware managed mpam_ris_hw_probe_csu_nrdy() sets and clears MSMON_CSU.NRDY and checks whether it's configuration sticks. However, hardware isn't given a chance to disagree. Based on rule LRTGP, in MPAM specification IHI0099 version B.b, the hardware will set NRDY if it needs time to establish a count after a configuration change. Enable the monitor so that NRDY becomes relevant and change the configuration after clearing NRDY to try and coax the hardware into setting it. Fixes: 8c90dc68a5de ("arm_mpam: Probe the hardware features resctrl supports") Cc: Signed-off-by: Ben Horgan Reviewed-by: James Morse Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 58e0c8970e8c..e145828f3f73 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -730,8 +730,7 @@ static void mpam_enable_quirks(struct mpam_msc *msc) */ static bool mpam_ris_hw_probe_csu_nrdy(struct mpam_msc_ris *ris) { - u32 now; - u32 mon_sel; + u32 now, mon_sel, ctl_val; bool can_set, can_clear; struct mpam_msc *msc = ris->vmsc->msc; @@ -742,11 +741,21 @@ static bool mpam_ris_hw_probe_csu_nrdy(struct mpam_msc_ris *ris) FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + /* Hardware might ignore nrdy if it's not enabled */ + ctl_val = MSMON_CFG_CSU_CTL_TYPE_CSU; + ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID; + ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG; + ctl_val |= MSMON_CFG_x_CTL_EN; + mpam_write_monsel_reg(msc, CFG_CSU_FLT, 0); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val); + _mpam_write_monsel_reg(msc, MSMON_CSU, MSMON___NRDY); now = _mpam_read_monsel_reg(msc, MSMON_CSU); can_set = now & MSMON___NRDY; _mpam_write_monsel_reg(msc, MSMON_CSU, 0); + /* Configuration change to try and coax hardware into setting nrdy */ + mpam_write_monsel_reg(msc, CFG_CSU_FLT, 0x1); now = _mpam_read_monsel_reg(msc, MSMON_CSU); can_clear = !(now & MSMON___NRDY); mpam_mon_sel_unlock(msc); -- cgit v1.2.3 From f1caff3335ea6eab88cdc84ec8f2e3c45ca05486 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 8 May 2026 17:23:38 +0100 Subject: arm_mpam: Fix false positive assert failure during mpam_disable() mpam_assert_partid_sizes_fixed() is used to document that the caller doesn't expect the discovered PARTID size to change while it is walking a list sized by PARTID. Typically the MSC state is not written to until all the MSC have been discovered and this value is set. However, if discovering the MSC fails and schedules mpam_disable(), then the MSC state is written to reset it. In this case the discovered PARTID size may be become smaller - but only PARTID 0 will be used once resctrl_exit() has been called. Skip the WARN_ON_ONCE() if mpam_disable_reason has been set. Fixes: 3bd04fe7d807 ("arm_mpam: Extend reset logic to allow devices to be reset any time") Cc: Signed-off-by: James Morse Reviewed-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index e145828f3f73..5c48812d8573 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -164,11 +164,17 @@ static void mpam_free_garbage(void) /* * Once mpam is enabled, new requestors cannot further reduce the available * partid. Assert that the size is fixed, and new requestors will be turned - * away. + * away. This is needed when walking over structures sized by PARTID. + * + * During mpam_disable() these structures are not fixed, but the MSC state + * is still reset using whatever sizes have been discovered so far. As only + * PARTID 0 will be used after mpam_disable(), any race would be benign. + * Skip the check if a mpam_disable_reason has been set. */ static void mpam_assert_partid_sizes_fixed(void) { - WARN_ON_ONCE(!partid_max_published); + if (!mpam_disable_reason) + WARN_ON_ONCE(!partid_max_published); } static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) -- cgit v1.2.3 From 6ccbb613b42a1f1ba7bfd547a148f644a902a25c Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 8 May 2026 17:23:39 +0100 Subject: arm_mpam: Check whether the config array is allocated before destroying it __destroy_component_cfg() is called to free the configuration array. It uses the embedded 'garbage' structure, which means the array has to be allocated. If __destroy_component_cfg() is called from mpam_disable() before the configuration was ever allocated, then a NULL pointer is dereferenced. Check for this case and return early if the configuration is not allocated. __destroy_component_cfg() also frees the mbwu_state as this is allocated by __allocate_component_cfg(). As the mbwu_state is allocated after comp->cfg is set, and is also under mpam_list_lock, only the first pointer needs checking. Fixes: 3bd04fe7d807 ("arm_mpam: Extend reset logic to allow devices to be reset any time") Cc: Signed-off-by: James Morse Reviewed-by: Ben Horgan Signed-off-by: Catalin Marinas --- drivers/resctrl/mpam_devices.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 5c48812d8573..988fc291241d 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -2581,6 +2581,9 @@ static void __destroy_component_cfg(struct mpam_component *comp) lockdep_assert_held(&mpam_list_lock); + if (!comp->cfg) + return; + add_to_garbage(comp->cfg); list_for_each_entry(vmsc, &comp->vmsc, comp_list) { msc = vmsc->msc; -- cgit v1.2.3 From 277740023def559a4a2ddc3e8e784ee37a0f16a9 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Sun, 10 May 2026 23:21:38 -0700 Subject: net/smc: reject CHID-0 ACCEPT that matches an empty ism_dev slot On the SMC-D client, slot 0 of ini->ism_dev[]/ini->ism_chid[] is reserved for an SMC-Dv1 device. smc_find_ism_v2_device_clnt() populates V2 entries starting at index 1, so when no V1 device is selected slot 0 is left in its kzalloc()'ed state with ism_dev[0] == NULL and ism_chid[0] == 0. smc_v2_determine_accepted_chid() then matches the peer's CHID against the array starting from index 0 using the CHID alone. A malicious peer replying to a SMC-Dv2-only proposal with d1.chid == 0 matches the empty slot, ini->ism_selected becomes 0, and the subsequent ism_dev[0]->lgr_lock dereference in smc_conn_create() faults at offsetof(struct smcd_dev, lgr_lock) == 0x68: BUG: KASAN: null-ptr-deref in _raw_spin_lock_bh+0x79/0xe0 Write of size 4 at addr 0000000000000068 by task exploit/144 Call Trace: _raw_spin_lock_bh smc_conn_create (net/smc/smc_core.c:1997) __smc_connect (net/smc/af_smc.c:1447) smc_connect (net/smc/af_smc.c:1720) __sys_connect __x64_sys_connect do_syscall_64 Require ism_dev[i] to be non-NULL before accepting a CHID match. Fixes: a7c9c5f4af7f ("net/smc: CLC accept / confirm V2") Reported-by: Weiming Shi Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Xiang Mei Link: https://patch.msgid.link/20260511062138.2839584-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/smc/af_smc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da28652f6810..dffbd529762d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1400,7 +1400,8 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { + if (ini->ism_dev[i] && + ini->ism_chid[i] == ntohs(aclc->d1.chid)) { ini->ism_selected = i; return 0; } -- cgit v1.2.3 From 602d60ebae0f10bfbc7ba90eee026fdbd0203df3 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 22 Apr 2026 11:42:32 +0200 Subject: vdso/gettimeofday: Reload sequence counter after switch to time page in do_aux() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After switching to the real data pages, the sequence counter needs to be reloaded from there. The code using vdso_read_begin_timens() assumed this worked by 'continue' jumping to the *beginning* of the do-while retry loop. However the 'continue' jumps to the *end* of said loop, evaluating the exit condition. If the data page has a sequence counter of '1' it will match the one from the time namespace page and prematurely exit the retry loop. This would result in garbage returned to the caller. Reload the sequence counter after switching the pages by using an inner while loop again, which will loop at most once. The loop generates slightly better code than an explicit reload through 'seq = vdso_read_begin()'. Fixes: ed78b7b2c5ae ("vdso/gettimeofday: Add a helper to read the sequence lock of a time namespace aware clock") Reported-by: Ricardo Ribalda Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Tested-by: Ricardo Ribalda Reviewed-by: Christophe Leroy (CS GROUP) Link: https://patch.msgid.link/20260422-vdso-aux-timens-loop-v1-1-e2dd8c7164cc@linutronix.de Closes: https://lore.kernel.org/lkml/CANiDSCsOy0P1if-gJZqOM5pTJ0RDcwVfru1B7KFbTOEMqjPKJw@mail.gmail.com/ --- lib/vdso/gettimeofday.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index a5798bd26d20..da224011fafd 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -248,11 +248,10 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti vc = &vd->aux_clock_data[idx]; do { - if (vdso_read_begin_timens(vc, &seq)) { + while (vdso_read_begin_timens(vc, &seq)) { + /* Re-read from the real time data page, reload seq by looping */ vd = __arch_get_vdso_u_timens_data(vd); vc = &vd->aux_clock_data[idx]; - /* Re-read from the real time data page */ - continue; } /* Auxclock disabled? */ -- cgit v1.2.3 From 591711b32681a04b57d00c2a404658f8419a081c Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 8 May 2026 18:09:20 +0200 Subject: drm/ttm: Convert -EAGAIN from dmem_cgroup_try_charge to -ENOSPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dmem_cgroup_try_charge() returns -EAGAIN when the cgroup limit is hit and the charge fails. TTM has no concept of -EAGAIN from resource allocation; -ENOSPC is the canonical error meaning "no space, try eviction". Convert at the source in ttm_resource_alloc() so no caller needs to handle an unexpected error code, and clean up the now-redundant -EAGAIN check in ttm_bo_alloc_resource(). Without this, -EAGAIN escaping ttm_resource_alloc() during an eviction walk causes the walk to terminate early instead of continuing to the next candidate. Cc: Friedrich Vock Cc: Maarten Lankhorst Cc: Tejun Heo Cc: Maxime Ripard Cc: Christian Koenig Cc: dri-devel@lists.freedesktop.org Cc: # v6.14+ Fixes: 2b624a2c1865 ("drm/ttm: Handle cgroup based eviction in TTM") Assisted-by: GitHub_Copilot:claude-sonnet-4.6 Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20260508160920.230339-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_resource.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 293401705542..bcd76f6bb7f0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM); ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL); if (ret) { - if (ret != -ENOSPC && ret != -EAGAIN) { + if (ret != -ENOSPC) { dmem_cgroup_pool_state_put(limit_pool); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 0e5f1582f13d..154d6739256f 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -398,8 +398,11 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, if (man->cg) { ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool); - if (ret) + if (ret) { + if (ret == -EAGAIN) + ret = -ENOSPC; return ret; + } } ret = man->func->alloc(man, bo, place, res_ptr); -- cgit v1.2.3 From 285943c6e7ca309bbea84b253745154241d9788a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 May 2026 10:49:17 -0700 Subject: net: tls: fix off-by-one in sg_chain entry count for wrapped sk_msg ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an sk_msg scatterlist ring wraps (sg.end < sg.start), tls_push_record() chains the tail portion of the ring to the head using sg_chain(). An extra entry in the sg array is reserved for this: struct sk_msg_sg { [...] /* The extra two elements: * 1) used for chaining the front and sections when the list becomes * partitioned (e.g. end < start). The crypto APIs require the * chaining; * 2) to chain tailer SG entries after the message. */ struct scatterlist data[MAX_MSG_FRAGS + 2]; The current code uses MAX_SKB_FRAGS + 1 as the ring size: sg_chain(&msg_pl->sg.data[msg_pl->sg.start], MAX_SKB_FRAGS - msg_pl->sg.start + 1, msg_pl->sg.data); This places the chain pointer at sg_chain(data[start], (MAX_SKB_FRAGS - msg_start + 1) .. = &data[start] + (MAX_SKB_FRAGS - msg_start + 1) - 1 = data[start + (MAX_SKB_FRAGS - start + 1) - 1] = data[MAX_SKB_FRAGS] instead of the true last entry. This is likely due to a "race" of the commit under Fixes landing close to commit 031097d9e079 ("bpf: sk_msg, zap ingress queue on psock down") Convert to ARRAY_SIZE and drop the data[start] / - start (as suggested by Sabrina). Reported-by: 钱一铭 Fixes: 9aaaa56845a0 ("bpf: Sockmap/tls, skmsg can have wrapped skmsg that needs extra chaining") Signed-off-by: Jakub Kicinski Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260511174920.433155-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/tls/tls_sw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2590e855f6a5..2608b0c01849 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -800,11 +800,9 @@ static int tls_push_record(struct sock *sk, int flags, sg_mark_end(sk_msg_elem(msg_pl, i)); } - if (msg_pl->sg.end < msg_pl->sg.start) { - sg_chain(&msg_pl->sg.data[msg_pl->sg.start], - MAX_SKB_FRAGS - msg_pl->sg.start + 1, + if (msg_pl->sg.end < msg_pl->sg.start) + sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data), msg_pl->sg.data); - } i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); -- cgit v1.2.3 From ff26a0e8377dec07e4a7230db7675bed1b9a6d03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 May 2026 10:49:18 -0700 Subject: net: tls: prevent chain-after-chain in plain text SG Sashiko points out that if end = 0 (start != 0) the current code will create a chain link to content type right after the wrap link: This would create a chain where the wrap link points directly to another chain link. The scatterlist API sg_next iterator does not recursively resolve consecutive chain links. meaning this is illegal input to crypto. The wrapping link is unnecessary if end = 0. end is the entry after the last one used so end = 0 means there's nothing pushed after the wrap: end start i v v v [ ]...[ ][ d ][ d ][ d ][ d ][rsv for wrap] Skip the wrapping in this case. TLS 1.3 can use the "wrapping slot" for it's chaining if end = 0. This avoids the chain-after-chain. Move the wrap chaining before marking END and chaining off content type, that feels like more logical ordering to me, but should not matter from functional perspective. Reported-by: Sashiko Fixes: 9aaaa56845a0 ("bpf: Sockmap/tls, skmsg can have wrapped skmsg that needs extra chaining") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20260511174920.433155-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/tls/tls_sw.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 2608b0c01849..3bfdaf5e64f5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -789,21 +789,33 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_pl->sg.end; sk_msg_iter_var_prev(i); + /* msg_pl->sg.data is a ring; data[MAX+1] is reserved for the wrap + * link (frags won't use it). 'i' is now the last filled entry: + * + * i end start + * v v v [ rsv ] + * [ d ][ d ][ ][ ]...[ ][ d ][ d ][ d ][chain] + * ^ END v + * `-----------------------------------------' + * + * Note that SGL does not allow chain-after-chain, so for TLS 1.3, + * we must make sure we don't create the wrap entry and then chain + * link to content_type immediately at index 0. + */ + if (i < msg_pl->sg.start) + sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data), + msg_pl->sg.data); + rec->content_type = record_type; if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); - sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, - &rec->sg_content_type); + sg_chain(msg_pl->sg.data, i + 2, &rec->sg_content_type); } else { sg_mark_end(sk_msg_elem(msg_pl, i)); } - if (msg_pl->sg.end < msg_pl->sg.start) - sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data), - msg_pl->sg.data); - i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); -- cgit v1.2.3 From 561458db0d6b08b4e4956c6e4456d7781b18676f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 May 2026 14:51:29 -0600 Subject: docs: security-bugs: add a link to the threat-model documentation Rather than make readers search for this document, just a link to it where it is referenced. (While I was at it, I removed the unused and unneeded _threatmodel label from the top of threat-model.rst). Acked-by: Willy Tarreau Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jonathan Corbet --- Documentation/process/security-bugs.rst | 13 +++++++------ Documentation/process/threat-model.rst | 2 -- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index f85c65f31f12..3c51ddde31dd 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -191,12 +191,13 @@ handle: Please **always convert your report to plain text** without any formatting decorations before sending it. - * **Impact Evaluation**: Many AI-generated reports lack an understanding of - the kernel's threat model and go to great lengths inventing theoretical - consequences. This adds noise and complicates triage. Please stick to - verifiable facts (e.g., "this bug permits any user to gain CAP_NET_ADMIN") - without enumerating speculative implications. Have your tool read this - documentation as part of the evaluation process. + * **Impact Evaluation**: Many AI-generated reports lack an understanding + of the kernel's threat model (see Documentation/process/threat-model.rst) + and go to great lengths inventing theoretical consequences. This adds + noise and complicates triage. Please stick to verifiable facts (e.g., + "this bug permits any user to gain CAP_NET_ADMIN") without enumerating + speculative implications. Have your tool read this documentation as + part of the evaluation process. * **Reproducer**: AI-based tools are often capable of generating reproducers. Please always ensure your tool provides one and **test it thoroughly**. If diff --git a/Documentation/process/threat-model.rst b/Documentation/process/threat-model.rst index ecb432390e79..91da52f7114f 100644 --- a/Documentation/process/threat-model.rst +++ b/Documentation/process/threat-model.rst @@ -1,5 +1,3 @@ -.. _threatmodel: - The Linux Kernel threat model ============================= -- cgit v1.2.3 From f2e65e4e5b4b4b9ecf43f03c3fdbe8c9a8a43a9e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 May 2026 14:58:53 -0600 Subject: docs: threat-model: don't limit root capabilities to CAP_SYS_ADMIN The threat-model document says that only users with CAP_SYS_ADMIN can carry out a number of admin-level tasks, but there are numerous capabilities that can confer that sort of power. Generalize the text slightly to make it clear that CAP_SYS_ADMIN is not the only all-powerful capability. Acked-by: Willy Tarreau Signed-off-by: Jonathan Corbet --- Documentation/process/threat-model.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/process/threat-model.rst b/Documentation/process/threat-model.rst index 91da52f7114f..f177b8d3c1ca 100644 --- a/Documentation/process/threat-model.rst +++ b/Documentation/process/threat-model.rst @@ -62,7 +62,8 @@ on common processors featuring privilege levels and memory management units: * **Capability-based protection**: - * users not having the ``CAP_SYS_ADMIN`` capability may not alter the + * users not having elevated capabilities (including but not limited to + CAP_SYS_ADMIN) may not alter the kernel's configuration, memory nor state, change other users' view of the file system layout, grant any user capabilities they do not have, nor affect the system's availability (shutdown, reboot, panic, hang, or making -- cgit v1.2.3 From 67ea9d353d0ba12bdbc9183ff568dead9e949b80 Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Tue, 12 May 2026 11:50:35 +0800 Subject: mm/slub: hold cpus_read_lock around flush_rcu_sheaves_on_cache() flush_rcu_sheaves_on_cache() calls queue_work_on() in a for_each_online_cpu() loop, which requires the cpu to stay online. But cpus_read_lock() is not held in kvfree_rcu_barrier_on_cache() and the set of "online cpus" is subject to change. There are two paths that call flush_rcu_sheaves_on_cache(): // has cpus_read_lock() flush_all_rcu_sheaves() -> flush_rcu_sheaves_on_cache() // no cpus_read_lock() kvfree_rcu_barrier_on_cache() -> flush_rcu_sheaves_on_cache() Fix this by holding cpus_read_lock() in kvfree_rcu_barrier_on_cache(). Why not move cpus_read_lock() from flush_all_rcu_sheaves() into flush_rcu_sheaves_on_cache()? The reason is it would introduce a new lock order (slab_mutex -> cpu_hotplug_lock). The reverse order (cpu_hotplug_lock -> slab_mutex) is established by - cpuhp_setup_state_nocalls(..., slub_cpu_setup, ...) - kmem_cache_destroy() The two orders together would form an AB-BA deadlock. Finally, add lockdep_assert_cpus_held() in flush_rcu_sheaves_on_cache() to catch the same problem in the future. Fixes: 0f35040de593 ("mm/slab: introduce kvfree_rcu_barrier_on_cache() for cache destruction") Cc: Signed-off-by: Qing Wang Link: https://patch.msgid.link/20260512035035.762317-1-wangqing7171@gmail.com Signed-off-by: Vlastimil Babka (SUSE) --- mm/slab_common.c | 2 ++ mm/slub.c | 1 + 2 files changed, 3 insertions(+) diff --git a/mm/slab_common.c b/mm/slab_common.c index d5a70a831a2a..8b661fff5eed 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -2110,7 +2110,9 @@ EXPORT_SYMBOL_GPL(kvfree_rcu_barrier); void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) { if (cache_has_sheaves(s)) { + cpus_read_lock(); flush_rcu_sheaves_on_cache(s); + cpus_read_unlock(); rcu_barrier(); } diff --git a/mm/slub.c b/mm/slub.c index 0baa906f39ab..9ad80b7f601a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4024,6 +4024,7 @@ void flush_rcu_sheaves_on_cache(struct kmem_cache *s) struct slub_flush_work *sfw; unsigned int cpu; + lockdep_assert_cpus_held(); mutex_lock(&flush_lock); for_each_online_cpu(cpu) { -- cgit v1.2.3 From c78bdba7b9666020c0832150a4fc4c0aebc7c6ac Mon Sep 17 00:00:00 2001 From: Sven Schuchmann Date: Tue, 12 May 2026 09:19:47 +0200 Subject: net: phy: DP83TC811: add reading of abilities At this time the driver is not listing any speeds it supports. This should be ETHTOOL_LINK_MODE_100baseT1_Full_BIT for DP83TC811. Add the missing call for phylib to read the abilities. Fixes: b753a9faaf9a ("net: phy: DP83TC811: Introduce support for the DP83TC811 phy") Suggested-by: Andrew Lunn Signed-off-by: Sven Schuchmann Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260512071949.6218-1-schuchmann@schleissheimer.de [pabeni@redhat.com: dropped revision history] Signed-off-by: Paolo Abeni --- drivers/net/phy/dp83tc811.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index e480c2a07450..252fb12b3e68 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -393,6 +393,7 @@ static struct phy_driver dp83811_driver[] = { .config_init = dp83811_config_init, .config_aneg = dp83811_config_aneg, .soft_reset = dp83811_phy_reset, + .get_features = genphy_c45_pma_read_ext_abilities, .get_wol = dp83811_get_wol, .set_wol = dp83811_set_wol, .config_intr = dp83811_config_intr, -- cgit v1.2.3 From 1d59f36e95f7f7134db0e313c9d787cb0adb2153 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 11 May 2026 18:24:43 +0200 Subject: drm/ttm: Fix ttm_bo_shrink() infinite LRU walk on backup failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply the same fix as b2ed01e7ad ("drm/ttm: Fix ttm_bo_swapout() infinite LRU walk on swapout failure") to the ttm_bo_shrink() path. Move del_bulk_move from before the backup to after success only, using ttm_resource_del_bulk_move_unevictable() since the resource is now unevictable once fully backed up. Fixes: 70d645deac98 ("drm/ttm: Add helpers for shrinking") Cc: Christian König Cc: Huang Rui Cc: Matthew Auld Cc: Matthew Brost Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v6.15+ Assisted-by: GitHub_Copilot:claude-opus-4.6 Reviewed-by: Matthew Auld Link: https://patch.msgid.link/20260511162443.24352-1-thomas.hellstrom@linux.intel.com Signed-off-by: Thomas Hellström --- drivers/gpu/drm/ttm/ttm_bo_util.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index f83b7d5ec6c6..3e3c201a0222 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -1112,19 +1112,14 @@ long ttm_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, if (lret < 0) return lret; - if (bo->bulk_move) { - spin_lock(&bdev->lru_lock); - ttm_resource_del_bulk_move(bo->resource, bo); - spin_unlock(&bdev->lru_lock); - } - lret = ttm_tt_backup(bdev, bo->ttm, (struct ttm_backup_flags) {.purge = flags.purge, .writeback = flags.writeback}); - if (lret <= 0 && bo->bulk_move) { + if (lret > 0) { spin_lock(&bdev->lru_lock); - ttm_resource_add_bulk_move(bo->resource, bo); + ttm_resource_del_bulk_move_unevictable(bo->resource, bo); + ttm_resource_move_to_lru_tail(bo->resource); spin_unlock(&bdev->lru_lock); } -- cgit v1.2.3 From e4e9b7b38d5db2cc6a8770bc0596bb8b36b92b1f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 12 May 2026 17:19:47 -0500 Subject: cpufreq/amd-pstate: Drop Kconfig option for dynamic EPP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some performance issues being identified by dynamic EPP and we don't want to have distributions turning it on by default exposing them to users at this time. Drop the kconfig option, and require an explicit opt in from kernel command line or runtime sysfs option to turn it on. Reported-by: Viktor Jägersküpper Closes: https://lore.kernel.org/linux-pm/14a87c99-785c-4b16-bfce-35ecbf053448@freenet.de/ Reported-by: Stuart Meckle Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221473 Signed-off-by: Mario Limonciello Reviewed-by: K Prateek Nayak Link: https://lore.kernel.org/r/20260512221947.1652988-1-mario.limonciello@amd.com (fix sysfs file path) Signed-off-by: Mario Limonciello (AMD) --- Documentation/admin-guide/pm/amd-pstate.rst | 11 +++++------ drivers/cpufreq/Kconfig.x86 | 12 ------------ drivers/cpufreq/amd-pstate.c | 4 ---- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index f8e7050fc762..a95e2ebce005 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -358,9 +358,9 @@ Dynamic energy performance profile The amd-pstate driver supports dynamically selecting the energy performance profile based on whether the machine is running on AC or DC power. -Whether this behavior is enabled by default depends on the kernel -config option `CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP`. This behavior can also be overridden -at runtime by the sysfs file ``/sys/devices/system/cpu/cpufreq/policyX/dynamic_epp``. +Whether this behavior is enabled by default depends on the kernel command line option +``amd_dynamic_epp`` is set. This behavior can also be overridden +at runtime by the sysfs file ``/sys/devices/system/cpu/amd_pstate/dynamic_epp``. When set to enabled, the driver will select a different energy performance profile when the machine is running on battery or AC power. The driver will @@ -485,9 +485,8 @@ kernel parameter ``amd_prefcore=disable``. ``amd_dynamic_epp`` When AMD pstate is in auto mode, dynamic EPP will control whether the kernel -autonomously changes the EPP mode. The default is configured by -``CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP`` but can be explicitly enabled with -``amd_dynamic_epp=enable`` or disabled with ``amd_dynamic_epp=disable``. +autonomously changes the EPP mode. The default is disabled. It can be enabled +with the kernel parameter ``amd_dynamic_epp=enable``. User Space Interface in ``sysfs`` - General =========================================== diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 027e6ea2e038..a9093cd5e5d1 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -70,18 +70,6 @@ config X86_AMD_PSTATE_DEFAULT_MODE For details, take a look at: . -config X86_AMD_PSTATE_DYNAMIC_EPP - bool "AMD Processor P-State dynamic EPP support" - depends on X86_AMD_PSTATE - default n - help - Allow the kernel to dynamically change the energy performance - value from events like ACPI platform profile and AC adapter plug - events. - - This feature can also be changed at runtime, this configuration - option only sets the kernel default value behavior. - config X86_AMD_PSTATE_UT tristate "selftest for AMD Processor P-State driver" depends on X86 && ACPI_PROCESSOR diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9eb9c3f4e809..62b5d995281d 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -87,11 +87,7 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool amd_pstate_prefcore = true; -#ifdef CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP -static bool dynamic_epp = CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP; -#else static bool dynamic_epp; -#endif static struct quirk_entry *quirks; /* -- cgit v1.2.3 From e83f5e24da741fa9405aeeff00b08c5ee7c37b88 Mon Sep 17 00:00:00 2001 From: Jiexun Wang Date: Wed, 6 May 2026 19:43:30 +0800 Subject: Bluetooth: serialize accept_q access bt_sock_poll() walks the accept queue without synchronization, while child teardown can unlink the same socket and drop its last reference. The unsynchronized accept queue walk has existed since the initial Bluetooth import. Protect accept_q with a dedicated lock for queue updates and polling. Also rework bt_accept_dequeue() to take temporary child references under the queue lock before dropping it and locking the child socket. Fixes: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reported-by: Jann Horn Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Jiexun Wang Signed-off-by: Ren Wei Signed-off-by: Jiexun Wang Reviewed-by: Jann Horn Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/af_bluetooth.c | 87 +++++++++++++++++++++++++++++---------- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 69eed69f7f26..3faea66b1979 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -398,6 +398,7 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src); struct bt_sock { struct sock sk; struct list_head accept_q; + spinlock_t accept_q_lock; /* protects accept_q */ struct sock *parent; unsigned long flags; void (*skb_msg_name)(struct sk_buff *, void *, int *); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 33d053d63407..9d68dd86023c 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,6 +154,7 @@ struct sock *bt_sock_alloc(struct net *net, struct socket *sock, sock_init_data(sock, sk); INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + spin_lock_init(&bt_sk(sk)->accept_q_lock); sock_reset_flag(sk, SOCK_ZAPPED); @@ -214,6 +215,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; struct pid *old_pid; + struct bt_sock *par = bt_sk(parent); BT_DBG("parent %p, sk %p", parent, sk); @@ -224,9 +226,13 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) else lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; + spin_lock_bh(&par->accept_q_lock); + list_add_tail(&bt_sk(sk)->accept_q, &par->accept_q); + sk_acceptq_added(parent); + spin_unlock_bh(&par->accept_q_lock); + /* Copy credentials from parent since for incoming connections the * socket is allocated by the kernel. */ @@ -244,8 +250,6 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) bh_unlock_sock(sk); else release_sock(sk); - - sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); @@ -254,45 +258,72 @@ EXPORT_SYMBOL(bt_accept_enqueue); */ void bt_accept_unlink(struct sock *sk) { + struct sock *parent = bt_sk(sk)->parent; + BT_DBG("sk %p state %d", sk, sk->sk_state); + spin_lock_bh(&bt_sk(parent)->accept_q_lock); list_del_init(&bt_sk(sk)->accept_q); - sk_acceptq_removed(bt_sk(sk)->parent); + sk_acceptq_removed(parent); + spin_unlock_bh(&bt_sk(parent)->accept_q_lock); bt_sk(sk)->parent = NULL; sock_put(sk); } EXPORT_SYMBOL(bt_accept_unlink); +static struct sock *bt_accept_get(struct sock *parent, struct sock *sk) +{ + struct bt_sock *bt = bt_sk(parent); + struct sock *next = NULL; + + /* accept_q is modified from child teardown paths too, so take a + * temporary reference before dropping the queue lock. + */ + spin_lock_bh(&bt->accept_q_lock); + + if (sk) { + if (bt_sk(sk)->parent != parent) + goto out; + + if (!list_is_last(&bt_sk(sk)->accept_q, &bt->accept_q)) { + next = &list_next_entry(bt_sk(sk), accept_q)->sk; + sock_hold(next); + } + } else if (!list_empty(&bt->accept_q)) { + next = &list_first_entry(&bt->accept_q, + struct bt_sock, accept_q)->sk; + sock_hold(next); + } + +out: + spin_unlock_bh(&bt->accept_q_lock); + return next; +} + struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { - struct bt_sock *s, *n; - struct sock *sk; + struct sock *sk, *next; BT_DBG("parent %p", parent); restart: - list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)s; - + for (sk = bt_accept_get(parent, NULL); sk; sk = next) { /* Prevent early freeing of sk due to unlink and sock_kill */ - sock_hold(sk); lock_sock(sk); /* Check sk has not already been unlinked via * bt_accept_unlink() due to serialisation caused by sk locking */ - if (!bt_sk(sk)->parent) { + if (bt_sk(sk)->parent != parent) { BT_DBG("sk %p, already unlinked", sk); release_sock(sk); sock_put(sk); - /* Restart the loop as sk is no longer in the list - * and also avoid a potential infinite loop because - * list_for_each_entry_safe() is not thread safe. - */ goto restart; } + next = bt_accept_get(parent, sk); + /* sk is safely in the parent list so reduce reference count */ sock_put(sk); @@ -310,6 +341,8 @@ restart: sock_graft(sk, newsock); release_sock(sk); + if (next) + sock_put(next); return sk; } @@ -518,18 +551,28 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline __poll_t bt_accept_poll(struct sock *parent) { - struct bt_sock *s, *n; + struct bt_sock *bt = bt_sk(parent); + struct bt_sock *s; struct sock *sk; + __poll_t mask = 0; + + spin_lock_bh(&bt->accept_q_lock); + list_for_each_entry(s, &bt->accept_q, accept_q) { + int state; - list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; - if (sk->sk_state == BT_CONNECTED || - (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && - sk->sk_state == BT_CONNECT2)) - return EPOLLIN | EPOLLRDNORM; + state = READ_ONCE(sk->sk_state); + + if (state == BT_CONNECTED || + (test_bit(BT_SK_DEFER_SETUP, &bt->flags) && + state == BT_CONNECT2)) { + mask = EPOLLIN | EPOLLRDNORM; + break; + } } + spin_unlock_bh(&bt->accept_q_lock); - return 0; + return mask; } __poll_t bt_sock_poll(struct file *file, struct socket *sock, -- cgit v1.2.3 From e3ac0d9f1a205f33a43fba3b79ef74d2f604c78b Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Fri, 24 Apr 2026 22:24:29 +0300 Subject: Bluetooth: btmtk: accept too short WMT FUNC_CTRL events MT7925 (USB ID 0e8d:e025) on fw version 20260106153314 sends WMT FUNC_CTRL events that are missing the status field. Prior to commit 006b9943b982 ("Bluetooth: btmtk: validate WMT event SKB length before struct access") the status was read from out-of-bounds of SKB data, which usually would result to success with BTMTK_WMT_ON_UNDONE, although I don't know the intent here. The bounds check added in that commit returns with error instead, producing "Bluetooth: hci0: Failed to send wmt func ctrl (-22)" and makes the device unusable. Fix the regression by interpreting too short packet as status BTMTK_WMT_ON_UNDONE, which makes the device work normally again. Fixes: 634a4408c061 ("Bluetooth: btmtk: validate WMT event SKB length before struct access") Signed-off-by: Pauli Virtanen Tested-by: Mikhail Gavrilov # MT7922 (0489:e0e2) Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index f70c1b0f8990..a29f72216c34 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -719,8 +719,8 @@ static int btmtk_usb_hci_wmt_sync(struct hci_dev *hdev, case BTMTK_WMT_FUNC_CTRL: if (!skb_pull_data(data->evt_skb, sizeof(wmt_evt_funcc->status))) { - err = -EINVAL; - goto err_free_skb; + status = BTMTK_WMT_ON_UNDONE; + break; } wmt_evt_funcc = (struct btmtk_hci_wmt_evt_funcc *)wmt_evt; -- cgit v1.2.3 From 3374ef8cf99368a40f7efd51a2a375a4c5dc6f0d Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 11 May 2026 08:26:41 -0400 Subject: Bluetooth: L2CAP: ecred_reconfigure: send packed pdu, not stack pointer Commit 1c08108f3014 ("Bluetooth: L2CAP: Avoid -Wflex-array-member-not-at-end warnings") converted the on-stack request PDU in l2cap_ecred_reconfigure() from an explicit packed struct to DEFINE_RAW_FLEX(), but did not adjust the size and source-pointer arguments to l2cap_send_cmd(): - struct { - struct l2cap_ecred_reconf_req req; - __le16 scid; - } pdu; + DEFINE_RAW_FLEX(struct l2cap_ecred_reconf_req, pdu, scid, 1); ... l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, sizeof(pdu), &pdu); After the conversion, DEFINE_RAW_FLEX() expands to declare an anonymous union pdu_u plus a local pointer "pdu" pointing at it. Therefore: - sizeof(pdu) is now sizeof(struct l2cap_ecred_reconf_req *) = 8 on 64-bit (4 on 32-bit), not the 6 bytes of (mtu, mps, scid[1]). - &pdu is the address of the local pointer's stack storage, not the address of the request payload. l2cap_send_cmd() forwards (data, count) to l2cap_build_cmd(), which calls skb_put_data(skb, data, count). The L2CAP_ECRED_RECONFIGURE_REQ packet body therefore contains 8 bytes copied from the kernel stack starting at &pdu -- the 8 bytes overlap the pdu pointer's value, leaking a kernel stack address to the paired Bluetooth peer. The intended (mtu, mps, scid) fields are not transmitted at all, so the peer rejects the request as malformed and the L2CAP_ECRED_RECONFIGURE feature itself has been broken for the local-side initiator since the introducing commit landed. The sibling site l2cap_ecred_conn_req() in the same commit was converted correctly (sizeof(*pdu) + len, pdu); only this site was missed. Restore the original semantics: pass the full flex-struct size via struct_size(pdu, scid, 1) and the pdu pointer (the struct address) as the source. Validated on a stock 7.0-based host kernel via the real call path: setsockopt(SOL_BLUETOOTH, BT_RCVMTU, ...) on a BT_CONNECTED L2CAP_MODE_EXT_FLOWCTL socket emits an L2CAP_ECRED_RECONFIGURE_REQ whose body is 8 bytes (the on-stack pdu local's value) rather than the expected 6. Three captures from fresh socket / fresh hciemu peer on the same host -- low bytes vary per call, high 0xffff confirms a kernel virtual address (KASLR-randomised stack slot, not a fixed string): RECONF_REQ body (ident=0x02 len=8): 42 fb 54 af 0e ca ff ff RECONF_REQ body (ident=0x02 len=8): 52 3d 2e af 0e ca ff ff RECONF_REQ body (ident=0x02 len=8): b2 fc 5b af 0e ca ff ff After this patch the body is 6 bytes carrying the expected little-endian (mtu, mps, scid). Cc: stable@vger.kernel.org Fixes: 1c08108f3014 ("Bluetooth: L2CAP: Avoid -Wflex-array-member-not-at-end warnings") Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7701528f1167..fdccd62ccca8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7274,7 +7274,7 @@ static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, - sizeof(pdu), &pdu); + struct_size(pdu, scid, 1), pdu); } int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) -- cgit v1.2.3 From 375ba7484132662a4a8c7547d088fb6275c00282 Mon Sep 17 00:00:00 2001 From: Shuai Zhang Date: Mon, 11 May 2026 21:58:37 +0800 Subject: Bluetooth: hci_qca: Convert timeout from jiffies to ms Since the timer uses jiffies as its unit rather than ms, the timeout value must be converted from ms to jiffies when configuring the timer. Otherwise, the intended 8s timeout is incorrectly set to approximately 33s. To improve readability, embed msecs_to_jiffies() directly in the macro definitions and drop the _MS suffix from macros that now yield jiffies values: MEMDUMP_TIMEOUT, FW_DOWNLOAD_TIMEOUT, IBS_DISABLE_SSR_TIMEOUT, CMD_TRANS_TIMEOUT, and IBS_BTSOC_TX_IDLE_TIMEOUT. IBS_WAKE_RETRANS_TIMEOUT_MS and IBS_HOST_TX_IDLE_TIMEOUT_MS are intentionally left unchanged. Their values are stored in the struct fields wake_retrans and tx_idle_delay, which hold ms values at runtime and can be modified via debugfs. The msecs_to_jiffies() conversion happens at each call site against the field value, so it cannot be embedded in the macro. Wake timer depends on commit c347ca17d62a Cc: stable@vger.kernel.org Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR") Reviewed-by: Paul Menzel Acked-by: Bartosz Golaszewski Signed-off-by: Shuai Zhang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index cd1834246b47..ed280399bf47 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -48,13 +48,12 @@ #define HCI_MAX_IBS_SIZE 10 #define IBS_WAKE_RETRANS_TIMEOUT_MS 100 -#define IBS_BTSOC_TX_IDLE_TIMEOUT_MS 200 +#define IBS_BTSOC_TX_IDLE_TIMEOUT msecs_to_jiffies(200) #define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000 -#define CMD_TRANS_TIMEOUT_MS 100 -#define MEMDUMP_TIMEOUT_MS 8000 -#define IBS_DISABLE_SSR_TIMEOUT_MS \ - (MEMDUMP_TIMEOUT_MS + FW_DOWNLOAD_TIMEOUT_MS) -#define FW_DOWNLOAD_TIMEOUT_MS 3000 +#define CMD_TRANS_TIMEOUT msecs_to_jiffies(100) +#define MEMDUMP_TIMEOUT msecs_to_jiffies(8000) +#define FW_DOWNLOAD_TIMEOUT msecs_to_jiffies(3000) +#define IBS_DISABLE_SSR_TIMEOUT (MEMDUMP_TIMEOUT + FW_DOWNLOAD_TIMEOUT) /* susclk rate */ #define SUSCLK_RATE_32KHZ 32768 @@ -1096,7 +1095,7 @@ static void qca_controller_memdump(struct work_struct *work) queue_delayed_work(qca->workqueue, &qca->ctrl_memdump_timeout, - msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)); + MEMDUMP_TIMEOUT); skb_pull(skb, sizeof(qca_memdump->ram_dump_size)); qca_memdump->current_seq_no = 0; qca_memdump->received_dump = 0; @@ -1369,7 +1368,7 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate) if (hu->serdev) serdev_device_wait_until_sent(hu->serdev, - msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); + CMD_TRANS_TIMEOUT); /* Give the controller time to process the request */ switch (qca_soc_type(hu)) { @@ -1401,8 +1400,8 @@ static inline void host_set_baudrate(struct hci_uart *hu, unsigned int speed) static int qca_send_power_pulse(struct hci_uart *hu, bool on) { + int timeout = CMD_TRANS_TIMEOUT; int ret; - int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); u8 cmd = on ? QCA_WCN3990_POWERON_PULSE : QCA_WCN3990_POWEROFF_PULSE; /* These power pulses are single byte command which are sent @@ -1607,7 +1606,7 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev) struct qca_data *qca = hu->priv; wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, - TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); + TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT); clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); } @@ -2591,7 +2590,7 @@ static void qca_serdev_remove(struct serdev_device *serdev) static void qca_serdev_shutdown(struct serdev_device *serdev) { int ret; - int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); + int timeout = CMD_TRANS_TIMEOUT; struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct hci_uart *hu = &qcadev->serdev_hu; struct hci_dev *hdev = hu->hdev; @@ -2648,7 +2647,7 @@ static int __maybe_unused qca_suspend(struct device *dev) bool tx_pending = false; int ret = 0; u8 cmd; - u32 wait_timeout = 0; + unsigned long wait_timeout = 0; set_bit(QCA_SUSPENDING, &qca->flags); @@ -2669,15 +2668,15 @@ static int __maybe_unused qca_suspend(struct device *dev) if (test_bit(QCA_IBS_DISABLED, &qca->flags) || test_bit(QCA_SSR_TRIGGERED, &qca->flags)) { wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ? - IBS_DISABLE_SSR_TIMEOUT_MS : - FW_DOWNLOAD_TIMEOUT_MS; + IBS_DISABLE_SSR_TIMEOUT : + FW_DOWNLOAD_TIMEOUT; /* QCA_IBS_DISABLED flag is set to true, During FW download * and during memory dump collection. It is reset to false, * After FW download complete. */ wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED, - TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout)); + TASK_UNINTERRUPTIBLE, wait_timeout); if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { bt_dev_err(hu->hdev, "SSR or FW download time out"); @@ -2729,7 +2728,7 @@ static int __maybe_unused qca_suspend(struct device *dev) if (tx_pending) { serdev_device_wait_until_sent(hu->serdev, - msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS)); + CMD_TRANS_TIMEOUT); serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_OFF, hu); } @@ -2738,7 +2737,7 @@ static int __maybe_unused qca_suspend(struct device *dev) */ ret = wait_event_interruptible_timeout(qca->suspend_wait_q, qca->rx_ibs_state == HCI_IBS_RX_ASLEEP, - msecs_to_jiffies(IBS_BTSOC_TX_IDLE_TIMEOUT_MS)); + IBS_BTSOC_TX_IDLE_TIMEOUT); if (ret == 0) { ret = -ETIMEDOUT; goto error; -- cgit v1.2.3 From 7f114497784661b887f1097c440221b18e2914e9 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 13 May 2026 13:10:49 +0200 Subject: selftests: ovpn: reduce remaining ping flood counts Commit 201ba706318d ("selftests: ovpn: reduce ping count in test.sh") lowered the baseline traffic flood ping count to avoid flakes on slower CI instances, however some instances were left out. Apply the same limit to the remaining ovpn selftest flood pings that still request 500 packets. Fixes: 201ba706318d ("selftests: ovpn: reduce ping count in test.sh") Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- tools/testing/selftests/net/ovpn/test-close-socket.sh | 2 +- tools/testing/selftests/net/ovpn/test-mark.sh | 6 +++--- tools/testing/selftests/net/ovpn/test.sh | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh index af1532b4d2da..ec9a51bbf3c9 100755 --- a/tools/testing/selftests/net/ovpn/test-close-socket.sh +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -53,7 +53,7 @@ ovpn_run_ping_traffic() { for p in $(seq 1 ${OVPN_NUM_PEERS}); do ovpn_cmd_ok "send ping traffic to peer ${p}" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \ 5.5.5.$((p + 1)) done } diff --git a/tools/testing/selftests/net/ovpn/test-mark.sh b/tools/testing/selftests/net/ovpn/test-mark.sh index 5a8f47554286..7c1d56e9c525 100755 --- a/tools/testing/selftests/net/ovpn/test-mark.sh +++ b/tools/testing/selftests/net/ovpn/test-mark.sh @@ -66,7 +66,7 @@ ovpn_mark_run_baseline_traffic() { for p in $(seq 1 3); do ovpn_cmd_ok "send baseline traffic to peer ${p}" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \ 5.5.5.$((p + 1)) done } @@ -101,7 +101,7 @@ ovpn_mark_verify_drop_traffic() { local total_count for p in $(seq 1 3); do - if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 500 -w 1 \ + if ping_output=$(ip netns exec ovpn_peer0 ping -qfc 100 -w 1 \ 5.5.5.$((p + 1)) 2>&1); then printf '%s\n' "expected ping to peer ${p} to fail \ after nft drop rule" @@ -144,7 +144,7 @@ ovpn_mark_verify_traffic_recovery() { sleep 1 for p in $(seq 1 3); do ovpn_cmd_ok "send recovery traffic to peer ${p}" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 \ + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 \ 5.5.5.$((p + 1)) done } diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index c06e3135fbef..9b5610837032 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -110,7 +110,7 @@ ovpn_run_basic_traffic() { ovpn_run_lan_traffic() { ovpn_cmd_ok "ping LAN behind peer1" \ - ip netns exec ovpn_peer0 ping -qfc 500 -w 3 "${OVPN_LAN_IP}" + ip netns exec ovpn_peer0 ping -qfc 100 -w 3 "${OVPN_LAN_IP}" } ovpn_run_float_mode() { @@ -127,7 +127,7 @@ ovpn_run_float_mode() { for p in $(seq 1 ${OVPN_NUM_PEERS}); do peer_ns="ovpn_peer${p}" ovpn_cmd_ok "ping tunnel after float peer ${p}" \ - ip netns exec "${peer_ns}" ping -qfc 500 -w 3 5.5.5.1 + ip netns exec "${peer_ns}" ping -qfc 100 -w 3 5.5.5.1 done } -- cgit v1.2.3 From 775d8d7ad02aa345e1588424a6a8b9ae49fb9012 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 13 May 2026 11:55:20 +0100 Subject: ovpn: tcp - use cached peer pointer in ovpn_tcp_close() ovpn_tcp_close() loads the ovpn_socket via rcu_dereference_sk_user_data() under rcu_read_lock(), takes a reference on sock->peer, caches the peer pointer in a local, and drops the read lock. It then passes sock->peer (rather than the cached local) to ovpn_peer_del(), re-dereferencing the ovpn_socket after the RCU read section has ended. Unlike ovpn_tcp_sendmsg(), which uses the same "load under RCU, use after unlock" pattern but is protected by lock_sock() held across the function, ovpn_tcp_close() runs without the socket lock: inet_release() invokes sk_prot->close() without taking lock_sock first. ovpn_socket_release() can therefore complete its kref_put -> detach -> synchronize_rcu -> kfree(sock) sequence concurrently, in the window after ovpn_tcp_close() drops rcu_read_lock() but before it dereferences sock->peer. The synchronize_rcu() in ovpn_socket_release() protects readers that use the dereferenced pointer inside the RCU read section, not those that escape the pointer to a local and use it afterwards. A reproducer follows the pattern of commit 94560267d6c4 ("ovpn: tcp - don't deref NULL sk_socket member after tcp_close()"): trigger a peer removal (keepalive expiration or netlink OVPN_CMD_DEL_PEER) at the same moment userspace closes the TCP fd. That commit fixed the detach-side of the same race window; this one fixes the close-side at a different victim. Tighten the entry block to read sock->peer exactly once into the cached peer local, and route all subsequent uses (the hold check, the ovpn_peer_del() call, and the prot->close() invocation) through that local. sock->peer is only ever written once in ovpn_socket_new() under lock_sock(), before rcu_assign_sk_user_data() publishes the ovpn_socket, and is never reassigned afterwards - but the previous multi-read pattern made that invariant implicit rather than explicit. The same multi-read shape exists in ovpn_tcp_recvmsg(), ovpn_tcp_sendmsg(), ovpn_tcp_data_ready() and ovpn_tcp_write_space(); those will be cleaned up via a dedicated helper in a follow-up net-next series. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Reviewed-by: Sabrina Dubroca Assisted-by: Claude:claude-opus-4-7 Signed-off-by: David Carlier Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/tcp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index 65054cc84be5..82809b016f0a 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -581,14 +581,19 @@ static void ovpn_tcp_close(struct sock *sk, long timeout) rcu_read_lock(); sock = rcu_dereference_sk_user_data(sk); - if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) { + if (!sock) { rcu_read_unlock(); return; } + peer = sock->peer; + if (!peer || !ovpn_peer_hold(peer)) { + rcu_read_unlock(); + return; + } rcu_read_unlock(); - ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); + ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); peer->tcp.sk_cb.prot->close(sk, timeout); ovpn_peer_put(peer); } -- cgit v1.2.3 From 1fef6614673ff0846d30acdeeaf3cf98bb5f6116 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 13 May 2026 11:55:21 +0100 Subject: ovpn: respect peer refcount in CMD_NEW_PEER error path ovpn_nl_peer_new_doit()'s error path calls ovpn_peer_release() directly rather than ovpn_peer_put(), bypassing the kref. The accompanying comment ("peer was not yet hashed, thus it is not used in any context") holds for UDP but not for TCP. For UDP, the ovpn_socket union uses the .ovpn arm and never points back at a peer; UDP encap_recv looks up peers via the not-yet-populated hashtables, so the new peer is unreachable until ovpn_peer_add() publishes it. For TCP, ovpn_socket_new() sets ovpn_sock->peer and ovpn_tcp_socket_attach() publishes ovpn_sock via rcu_assign_sk_user_data(). From that moment until ovpn_socket_release() detaches in the error path, the TCP fd is fully wired: userspace recvmsg / sendmsg / close / poll on the fd, as well as the strparser-driven ovpn_tcp_rcv() path, can reach the peer through sk_user_data -> ovpn_sock->peer and bump its refcount via ovpn_peer_hold(). ovpn_tcp_socket_wait_finish() (called inside ovpn_socket_release()) drains strparser and the tx work, but does not synchronize with userspace syscall callers that already hold a peer reference. If ovpn_nl_peer_modify() or ovpn_peer_add() returns an error while such a caller is in flight - notably an ovpn_tcp_recvmsg() blocked in __skb_recv_datagram() on peer->tcp.user_queue - the direct ovpn_peer_release() destroys the peer while the caller still holds the reference, and the eventual ovpn_peer_put() from that caller operates on freed memory. Replace the direct destructor call with ovpn_peer_put() so the kref correctly defers destruction until the last reference is dropped. In the common case where no concurrent user is present, behaviour is unchanged: the kref hits zero immediately and ovpn_peer_release_kref() runs the same destructor. With this conversion ovpn_peer_release() has no callers outside peer.c - ovpn_peer_release_kref() in the same translation unit is the only remaining user - so make it static and drop its declaration from peer.h. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Reviewed-by: Sabrina Dubroca Assisted-by: Claude:claude-opus-4-7 Signed-off-by: David Carlier Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/netlink.c | 8 +++++--- drivers/net/ovpn/peer.c | 2 +- drivers/net/ovpn/peer.h | 1 - 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index 291e2e5bb450..4c66c1ec497e 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -462,10 +462,12 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info) sock_release: ovpn_socket_release(peer); peer_release: - /* release right away because peer was not yet hashed, thus it is not - * used in any context + /* For UDP, the peer is unreachable until added to the hashtables, so + * dropping the initial reference is enough. For TCP, the peer may be + * concurrently reachable via sk_user_data->peer until + * ovpn_socket_release() detaches; rely on the refcount. */ - ovpn_peer_release(peer); + ovpn_peer_put(peer); return ret; } diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index c02dfab51a6e..fb10d1fea940 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -354,7 +354,7 @@ static void ovpn_peer_release_rcu(struct rcu_head *head) * ovpn_peer_release - release peer private members * @peer: the peer to release */ -void ovpn_peer_release(struct ovpn_peer *peer) +static void ovpn_peer_release(struct ovpn_peer *peer) { ovpn_crypto_state_release(&peer->crypto); spin_lock_bh(&peer->lock); diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h index 328401570cba..86c8cffada6d 100644 --- a/drivers/net/ovpn/peer.h +++ b/drivers/net/ovpn/peer.h @@ -127,7 +127,6 @@ static inline bool ovpn_peer_hold(struct ovpn_peer *peer) return kref_get_unless_zero(&peer->refcount); } -void ovpn_peer_release(struct ovpn_peer *peer); void ovpn_peer_release_kref(struct kref *kref); /** -- cgit v1.2.3 From 982422b11e6f95f766a8cd2c2b1cbdb77e234a61 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 17 Mar 2026 14:47:56 +0100 Subject: ovpn: fix race between deleting interface and adding new peer While deleting an existing ovpn interface, there is a very narrow window where adding a new peer via netlink may cause the netdevice to hang and prevent its unregistration. It may happen during ovpn_dellink(), when all existing peers are freed and the device is queued for deregistration, but a CMD_PEER_NEW message comes in adding a new peer that takes again a reference to the netdev. At this point there is no way to release the device because we are under the assumption that all peers were already released. Fix the race condition by releasing all peers in ndo_uninit(), when the netdevice has already been removed from the netdev list. Also ovpn_peer_add() has now an extra check that forces the function to bail out if the device reg_state is not REGISTERED. This way any incoming CMD_PEER_NEW racing with the interface deletion routine will simply stop before adding the peer. Note that the above check happens while holding the netdev_lock to prevent racing netdev state changes. ovpn_dellink() is now empty and can be removed. Reported-by: Hyunwoo Kim Closes: https://lore.kernel.org/netdev/aaVgJ16edTfQkYbx@v4bel/ Suggested-by: Sabrina Dubroca Fixes: 80747caef33d ("ovpn: introduce the ovpn_peer object") Reviewed-by: Sabrina Dubroca Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/main.c | 12 ++---------- drivers/net/ovpn/peer.c | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c index 2e0420febda0..9993c1dfe471 100644 --- a/drivers/net/ovpn/main.c +++ b/drivers/net/ovpn/main.c @@ -92,6 +92,8 @@ static void ovpn_net_uninit(struct net_device *dev) { struct ovpn_priv *ovpn = netdev_priv(dev); + disable_delayed_work_sync(&ovpn->keepalive_work); + ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN); gro_cells_destroy(&ovpn->gro_cells); } @@ -208,15 +210,6 @@ static int ovpn_newlink(struct net_device *dev, return register_netdevice(dev); } -static void ovpn_dellink(struct net_device *dev, struct list_head *head) -{ - struct ovpn_priv *ovpn = netdev_priv(dev); - - cancel_delayed_work_sync(&ovpn->keepalive_work); - ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN); - unregister_netdevice_queue(dev, head); -} - static int ovpn_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ovpn_priv *ovpn = netdev_priv(dev); @@ -235,7 +228,6 @@ static struct rtnl_link_ops ovpn_link_ops = { .policy = ovpn_policy, .maxtype = IFLA_OVPN_MAX, .newlink = ovpn_newlink, - .dellink = ovpn_dellink, .fill_info = ovpn_fill_info, }; diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index fb10d1fea940..a09d61296425 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -1034,14 +1034,29 @@ static int ovpn_peer_add_p2p(struct ovpn_priv *ovpn, struct ovpn_peer *peer) */ int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer) { + int ret = -ENODEV; + + /* Prevent adding new peers while destroying the ovpn interface. + * Failing to do so would end up holding the device reference + * endlessly hostage of the new peer object with no chance of + * release.. + */ + netdev_lock(ovpn->dev); + if (ovpn->dev->reg_state != NETREG_REGISTERED) + goto out; + switch (ovpn->mode) { case OVPN_MODE_MP: - return ovpn_peer_add_mp(ovpn, peer); + ret = ovpn_peer_add_mp(ovpn, peer); + break; case OVPN_MODE_P2P: - return ovpn_peer_add_p2p(ovpn, peer); + ret = ovpn_peer_add_p2p(ovpn, peer); + break; } +out: + netdev_unlock(ovpn->dev); - return -EOPNOTSUPP; + return ret; } /** -- cgit v1.2.3 From 7d9a7f1f96cd617ee9e75bb22217c709038e26b8 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 14 May 2026 21:14:18 +0800 Subject: smb/client: fix possible infinite loop and oob read in symlink_data() On 32-bit architectures, the infinite loop is as follows: len = p->ErrorDataLength == 0xfffffff8 u8 *next = p->ErrorContextData + len next == p On 32-bit architectures, the out-of-bounds read is as follows: len = p->ErrorDataLength == 0xfffffff0 u8 *next = p->ErrorContextData + len next == (u8 *)p - 8 Reported-by: ChenXiaoSong Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Cc: stable@vger.kernel.org Signed-off-by: Ye Bin Reviewed-by: ChenXiaoSong Signed-off-by: Steve French --- fs/smb/client/smb2file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c index b292aa94a593..6860eff31693 100644 --- a/fs/smb/client/smb2file.c +++ b/fs/smb/client/smb2file.c @@ -49,6 +49,9 @@ static struct smb2_symlink_err_rsp *symlink_data(const struct kvec *iov) __func__, le32_to_cpu(p->ErrorId)); len = ALIGN(le32_to_cpu(p->ErrorDataLength), 8); + if (len > end - ((u8 *)p + sizeof(*p))) + return ERR_PTR(-EINVAL); + p = (struct smb2_error_context_rsp *)(p->ErrorContextData + len); } } else if (le32_to_cpu(err->ByteCount) >= sizeof(*sym) && -- cgit v1.2.3 From a6ab75639e23169a741b0b2e12191fd8acb32c73 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 14 May 2026 21:16:01 +0800 Subject: nvme-apple: Reset q->sq_tail during queue init Fixes a "duplicate tag error for tag 0" firmware crash during controller reset while setting up a queue on Apple A11 / T8015 caused by stale entries in the submission queue due to an invalid sq_tail offset after reset. Fixes: 04d8ecf37b5e ("nvme: apple: Add Apple A11 support") Cc: stable@vger.kernel.org Suggested-by: Yuriy Havrylyuk Reviewed-by: Sven Peter Signed-off-by: Nick Chan Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 423c9c628e7b..c692fc73babf 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1009,6 +1009,7 @@ static void apple_nvme_init_queue(struct apple_nvme_queue *q) unsigned int depth = apple_nvme_queue_depth(q); struct apple_nvme *anv = queue_to_apple_nvme(q); + q->sq_tail = 0; q->cq_head = 0; q->cq_phase = 1; if (anv->hw->has_lsq_nvmmu) -- cgit v1.2.3 From ab26dfeba278b0efbcea012f1698cf524d9b5695 Mon Sep 17 00:00:00 2001 From: DaeMyung Kang Date: Wed, 13 May 2026 22:26:22 +0900 Subject: cifs: client: stage smb3_reconfigure() updates and restore ctx on failure smb3_reconfigure() moves strings out of cifs_sb->ctx before the multichannel update, so a later failure can leave the live context with NULL strings or options that do not match the session. Stage the new ctx separately, commit it only on success, and restore the snapshot on failure. Also make smb3_sync_session_ctx_passwords() all-or-nothing. Commit session passwords before channel updates so newly added channels authenticate with the staged credentials. Fixes: ef529f655a2c ("cifs: client: allow changing multichannel mount options on remount") Reported-by: RAJASI MANDAL Closes: https://lore.kernel.org/lkml/CAEY6_V1+dzW3OD5zqXhsWyXwrDTrg5tAMGZ1AJ7_GAuRE+aevA@mail.gmail.com/ Link: https://lore.kernel.org/lkml/xkr2dlvgibq5j6gkcxd3yhhnj4atgxw2uy4eug2pxm7wy7nbms@iq6cf5taa65v/ Reviewed-by: Henrique Carvalho Signed-off-by: DaeMyung Kang Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 161 ++++++++++++++++++++++++++++++--------------- 1 file changed, 108 insertions(+), 53 deletions(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index b63ec7ab6e51..4c8b1b9ade8b 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -736,7 +736,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, static int smb3_fs_context_parse_monolithic(struct fs_context *fc, void *data); static int smb3_get_tree(struct fs_context *fc); -static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channels); +static void smb3_sync_ses_chan_max(struct cifs_ses *ses, size_t max_channels); static int smb3_reconfigure(struct fs_context *fc); static const struct fs_context_operations smb3_fs_context_ops = { @@ -1010,25 +1010,34 @@ do { \ int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) { + char *password = NULL, *password2 = NULL; + if (ses->password && cifs_sb->ctx->password && strcmp(ses->password, cifs_sb->ctx->password)) { - kfree_sensitive(cifs_sb->ctx->password); - cifs_sb->ctx->password = kstrdup(ses->password, GFP_KERNEL); - if (!cifs_sb->ctx->password) + password = kstrdup(ses->password, GFP_KERNEL); + if (!password) return -ENOMEM; } if (ses->password2 && cifs_sb->ctx->password2 && strcmp(ses->password2, cifs_sb->ctx->password2)) { - kfree_sensitive(cifs_sb->ctx->password2); - cifs_sb->ctx->password2 = kstrdup(ses->password2, GFP_KERNEL); - if (!cifs_sb->ctx->password2) { - kfree_sensitive(cifs_sb->ctx->password); - cifs_sb->ctx->password = NULL; + password2 = kstrdup(ses->password2, GFP_KERNEL); + if (!password2) { + kfree_sensitive(password); return -ENOMEM; } } + + if (password) { + kfree_sensitive(cifs_sb->ctx->password); + cifs_sb->ctx->password = password; + } + if (password2) { + kfree_sensitive(cifs_sb->ctx->password2); + cifs_sb->ctx->password2 = password2; + } + return 0; } @@ -1041,7 +1050,7 @@ int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_se * with the session's channel lock. This should be called whenever the maximum * allowed channels for a session changes (e.g., after a remount or reconfigure). */ -static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channels) +static void smb3_sync_ses_chan_max(struct cifs_ses *ses, size_t max_channels) { spin_lock(&ses->chan_lock); ses->chan_max = max_channels; @@ -1051,12 +1060,15 @@ static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channe static int smb3_reconfigure(struct fs_context *fc) { struct smb3_fs_context *ctx = smb3_fc2context(fc); + struct smb3_fs_context *new_ctx = NULL; + struct smb3_fs_context *old_ctx = NULL; struct dentry *root = fc->root; struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; unsigned int rsize = ctx->rsize, wsize = ctx->wsize; char *new_password = NULL, *new_password2 = NULL; bool need_recon = false; + bool need_mchan_update; int rc; if (ses->expired_pwd) @@ -1066,6 +1078,16 @@ static int smb3_reconfigure(struct fs_context *fc) if (rc) return rc; + old_ctx = kzalloc_obj(*old_ctx); + if (!old_ctx) + return -ENOMEM; + + rc = smb3_fs_context_dup(old_ctx, cifs_sb->ctx); + if (rc) { + kfree(old_ctx); + return rc; + } + /* * We can not change UNC/username/password/domainname/ * workstation_name/nodename/iocharset @@ -1075,16 +1097,22 @@ static int smb3_reconfigure(struct fs_context *fc) STEAL_STRING(cifs_sb, ctx, UNC); STEAL_STRING(cifs_sb, ctx, source); STEAL_STRING(cifs_sb, ctx, username); + STEAL_STRING(cifs_sb, ctx, domainname); + STEAL_STRING(cifs_sb, ctx, nodename); + STEAL_STRING(cifs_sb, ctx, iocharset); - if (need_recon == false) + if (!need_recon) { STEAL_STRING_SENSITIVE(cifs_sb, ctx, password); - else { + } else { if (ctx->password) { new_password = kstrdup(ctx->password, GFP_KERNEL); - if (!new_password) - return -ENOMEM; - } else + if (!new_password) { + rc = -ENOMEM; + goto restore_ctx; + } + } else { STEAL_STRING_SENSITIVE(cifs_sb, ctx, password); + } } /* @@ -1094,11 +1122,29 @@ static int smb3_reconfigure(struct fs_context *fc) if (ctx->password2) { new_password2 = kstrdup(ctx->password2, GFP_KERNEL); if (!new_password2) { - kfree_sensitive(new_password); - return -ENOMEM; + rc = -ENOMEM; + goto restore_ctx; } - } else + } else { STEAL_STRING_SENSITIVE(cifs_sb, ctx, password2); + } + + /* if rsize or wsize not passed in on remount, use previous values */ + ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; + ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; + + new_ctx = kzalloc_obj(*new_ctx); + if (!new_ctx) { + rc = -ENOMEM; + goto restore_ctx; + } + + rc = smb3_fs_context_dup(new_ctx, ctx); + if (rc) + goto restore_ctx; + + need_mchan_update = ctx->multichannel != cifs_sb->ctx->multichannel || + ctx->max_channels != cifs_sb->ctx->max_channels; /* * we may update the passwords in the ses struct below. Make sure we do @@ -1109,54 +1155,55 @@ static int smb3_reconfigure(struct fs_context *fc) /* * smb2_reconnect may swap password and password2 in case session setup * failed. First get ctx passwords in sync with ses passwords. It should - * be okay to do this even if this function were to return an error at a - * later stage + * be done before committing new passwords. */ rc = smb3_sync_session_ctx_passwords(cifs_sb, ses); if (rc) { mutex_unlock(&ses->session_mutex); - kfree_sensitive(new_password); - kfree_sensitive(new_password2); - return rc; + goto cleanup_new_ctx; + } + + /* + * If multichannel or max_channels has changed, update the session's channels accordingly. + * This may add or remove channels to match the new configuration. + */ + if (need_mchan_update) { + /* Prevent concurrent scaling operations */ + spin_lock(&ses->ses_lock); + if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { + spin_unlock(&ses->ses_lock); + mutex_unlock(&ses->session_mutex); + rc = -EINVAL; + goto cleanup_new_ctx; + } + ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); } /* - * now that allocations for passwords are done, commit them + * Commit session passwords before any channel work so newly added + * channels authenticate with the new credentials. */ if (new_password) { kfree_sensitive(ses->password); ses->password = new_password; + new_password = NULL; } if (new_password2) { kfree_sensitive(ses->password2); ses->password2 = new_password2; + new_password2 = NULL; } - /* - * If multichannel or max_channels has changed, update the session's channels accordingly. - * This may add or remove channels to match the new configuration. - */ - if ((ctx->multichannel != cifs_sb->ctx->multichannel) || - (ctx->max_channels != cifs_sb->ctx->max_channels)) { - + if (need_mchan_update) { /* Synchronize ses->chan_max with the new mount context */ smb3_sync_ses_chan_max(ses, ctx->max_channels); - /* Now update the session's channels to match the new configuration */ - /* Prevent concurrent scaling operations */ - spin_lock(&ses->ses_lock); - if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { - spin_unlock(&ses->ses_lock); - mutex_unlock(&ses->session_mutex); - return -EINVAL; - } - ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; - spin_unlock(&ses->ses_lock); mutex_unlock(&ses->session_mutex); - rc = smb3_update_ses_channels(ses, ses->server, - false /* from_reconnect */, - false /* disable_mchan */); + smb3_update_ses_channels(ses, ses->server, + false /* from_reconnect */, + false /* disable_mchan */); /* Clear scaling flag after operation */ spin_lock(&ses->ses_lock); @@ -1166,22 +1213,30 @@ static int smb3_reconfigure(struct fs_context *fc) mutex_unlock(&ses->session_mutex); } - STEAL_STRING(cifs_sb, ctx, domainname); - STEAL_STRING(cifs_sb, ctx, nodename); - STEAL_STRING(cifs_sb, ctx, iocharset); - - /* if rsize or wsize not passed in on remount, use previous values */ - ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; - ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; - smb3_cleanup_fs_context_contents(cifs_sb->ctx); - rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); + memcpy(cifs_sb->ctx, new_ctx, sizeof(*new_ctx)); + kfree(new_ctx); + new_ctx = NULL; + smb3_cleanup_fs_context(old_ctx); + old_ctx = NULL; smb3_update_mnt_flags(cifs_sb); #ifdef CONFIG_CIFS_DFS_UPCALL if (!rc) rc = dfs_cache_remount_fs(cifs_sb); #endif + return rc; + +cleanup_new_ctx: + smb3_cleanup_fs_context_contents(new_ctx); +restore_ctx: + kfree(new_ctx); + kfree_sensitive(new_password); + kfree_sensitive(new_password2); + smb3_cleanup_fs_context_contents(cifs_sb->ctx); + memcpy(cifs_sb->ctx, old_ctx, sizeof(*old_ctx)); + kfree(old_ctx); + return rc; } -- cgit v1.2.3 From 593980175389a05793f6060aa20e626330960395 Mon Sep 17 00:00:00 2001 From: Guannan Wang Date: Thu, 14 May 2026 15:44:54 +0800 Subject: bpf: Use array_map_meta_equal for percpu array inner map replacement percpu_array_map_ops.map_meta_equal points to the generic bpf_map_meta_equal(), which does not compare max_entries. When a percpu array serves as an inner map, replacing it with one that has fewer max_entries bypasses the check. Since percpu_array_map_gen_lookup() inlines the original template's index_mask as a JIT immediate, a lookup on the replacement map can access pptrs[] out of bounds. Point percpu_array_map_ops.map_meta_equal to array_map_meta_equal(), which already enforces the max_entries equality check. Add a selftest to verify that replacing a percpu array inner map with a differently-sized one is rejected. Fixes: db69718b8efa ("bpf: inline bpf_map_lookup_elem() for PERCPU_ARRAY maps") Signed-off-by: Guannan Wang Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20260514074454.77491-1-wgnbuaa@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 2 +- .../bpf/prog_tests/percpu_array_inner_map.c | 57 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 5e25e0353509..dfb2110ab733 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -827,7 +827,7 @@ const struct bpf_map_ops array_map_ops = { }; const struct bpf_map_ops percpu_array_map_ops = { - .map_meta_equal = bpf_map_meta_equal, + .map_meta_equal = array_map_meta_equal, .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c b/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c new file mode 100644 index 000000000000..2a8b2381306b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/percpu_array_inner_map.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +/* + * Test that replacing an inner percpu array map with one that has different + * max_entries is rejected. percpu_array_map_gen_lookup() inlines the + * template's index_mask, so allowing a smaller replacement would cause OOB. + */ +void test_percpu_array_inner_map(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int outer_fd, tmpl_fd, good_fd, bad_fd, err; + int zero = 0; + + /* Create template: percpu array with 8 entries */ + tmpl_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "tmpl", + sizeof(int), sizeof(long), 8, NULL); + if (!ASSERT_OK_FD(tmpl_fd, "create_tmpl")) + return; + + /* Create outer array-of-maps using template */ + opts.inner_map_fd = tmpl_fd; + outer_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer", + sizeof(int), sizeof(int), 1, &opts); + if (!ASSERT_OK_FD(outer_fd, "create_outer")) + goto close_tmpl; + + /* Insert template as initial inner map */ + err = bpf_map_update_elem(outer_fd, &zero, &tmpl_fd, 0); + if (!ASSERT_OK(err, "insert_tmpl")) + goto close_outer; + + /* Replacement with same max_entries should succeed */ + good_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "good", + sizeof(int), sizeof(long), 8, NULL); + if (!ASSERT_OK_FD(good_fd, "create_good")) + goto close_outer; + + err = bpf_map_update_elem(outer_fd, &zero, &good_fd, 0); + ASSERT_OK(err, "replace_same_max_entries"); + close(good_fd); + + /* Replacement with fewer max_entries must fail */ + bad_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, "bad", + sizeof(int), sizeof(long), 2, NULL); + if (!ASSERT_OK_FD(bad_fd, "create_bad")) + goto close_outer; + + err = bpf_map_update_elem(outer_fd, &zero, &bad_fd, 0); + ASSERT_ERR(err, "replace_smaller_max_entries"); + close(bad_fd); + +close_outer: + close(outer_fd); +close_tmpl: + close(tmpl_fd); +} -- cgit v1.2.3 From 31e62c2ebbfdc3fe3dbdf5e02c92a9dc67087a3a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 13 May 2026 11:37:18 -0700 Subject: ptrace: slightly saner 'get_dumpable()' logic The 'dumpability' of a task is fundamentally about the memory image of the task - the concept comes from whether it can core dump or not - and makes no sense when you don't have an associated mm. And almost all users do in fact use it only for the case where the task has a mm pointer. But we have one odd special case: ptrace_may_access() uses 'dumpable' to check various other things entirely independently of the MM (typically explicitly using flags like PTRACE_MODE_READ_FSCREDS). Including for threads that no longer have a VM (and maybe never did, like most kernel threads). It's not what this flag was designed for, but it is what it is. The ptrace code does check that the uid/gid matches, so you do have to be uid-0 to see kernel thread details, but this means that the traditional "drop capabilities" model doesn't make any difference for this all. Make it all make a *bit* more sense by saying that if you don't have a MM pointer, we'll use a cached "last dumpability" flag if the thread ever had a MM (it will be zero for kernel threads since it is never set), and require a proper CAP_SYS_PTRACE capability to override. Reported-by: Qualys Security Advisory Cc: Oleg Nesterov Cc: Kees Cook Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +++ kernel/exit.c | 1 + kernel/ptrace.c | 22 ++++++++++++++++------ 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 368c7b4d7cb5..ee06cba5c6f5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1002,6 +1002,9 @@ struct task_struct { unsigned sched_rt_mutex:1; #endif + /* Save user-dumpable when mm goes away */ + unsigned user_dumpable:1; + /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; diff --git a/kernel/exit.c b/kernel/exit.c index 9a909993ab1d..f50d73c272d6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -571,6 +571,7 @@ static void exit_mm(void) */ smp_mb__after_spinlock(); local_irq_disable(); + current->user_dumpable = (get_dumpable(mm) == SUID_DUMP_USER); current->mm = NULL; membarrier_update_current_mm(NULL); enter_lazy_tlb(mm, current); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 68c17daef8d4..130043bfc209 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -272,11 +272,24 @@ static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) return ns_capable(ns, CAP_SYS_PTRACE); } +static bool task_still_dumpable(struct task_struct *task, unsigned int mode) +{ + struct mm_struct *mm = task->mm; + if (mm) { + if (get_dumpable(mm) == SUID_DUMP_USER) + return true; + return ptrace_has_cap(mm->user_ns, mode); + } + + if (task->user_dumpable) + return true; + return ptrace_has_cap(&init_user_ns, mode); +} + /* Returns 0 on success, -errno on denial. */ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -337,11 +350,8 @@ ok: * Pairs with a write barrier in commit_creds(). */ smp_rmb(); - mm = task->mm; - if (mm && - ((get_dumpable(mm) != SUID_DUMP_USER) && - !ptrace_has_cap(mm->user_ns, mode))) - return -EPERM; + if (!task_still_dumpable(task, mode)) + return -EPERM; return security_ptrace_access_check(task, mode); } -- cgit v1.2.3 From 1854082fe0ddb81bc93d1f8e8a00554217fd09d1 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 8 May 2026 21:19:58 +0100 Subject: phy: apple: atc: Fix typec switch/mux leak on unbind atcphy_probe_switch() and atcphy_probe_mux() discard the pointers returned by typec_switch_register() and typec_mux_register(). The platform driver has no .remove callback, so when the driver unbinds (e.g. via sysfs unbind) neither typec_switch_unregister() nor typec_mux_unregister() is called. The framework reference taken in typec_switch_register() (device_initialize() + device_add() in drivers/usb/typec/mux.c) is therefore never dropped and the typec_switch_dev / typec_mux_dev objects stay live forever, with their sysfs entries under the typec_mux class also left behind. A subsequent rebind cannot recreate them with the same fwnode-derived name. Save the registered handles and unregister them through devm_add_action_or_reset() so framework registration is torn down in step with the driver's other devm-managed state. While here, drop struct apple_atcphy::sw and ::mux: they were declared with the consumer-side types (typec_switch *, typec_mux *) instead of the provider-side types and were never assigned. Scope of the fix ================ This patch fixes the registration leak only. It does not close the use-after-free window that arises when a consumer that obtained a reference via fwnode_typec_switch_get() / fwnode_typec_mux_get() outlives the provider unbind: such consumers keep the underlying typec_switch_dev / typec_mux_dev alive past device_unregister(), and a later typec_switch_set() / typec_mux_set() still invokes the registered atcphy_sw_set() / atcphy_mux_set(), which dereferences the freed apple_atcphy through typec_{switch,mux}_get_drvdata(). On Apple Silicon the relevant consumers are the typec port and the cd321x controller registered by drivers/usb/typec/tipd/core.c. Cable plug / orientation events and alt-mode transitions trigger the .set callbacks via: tps6598x_interrupt() drivers/usb/typec/tipd/core.c tps6598x_handle_plug_event() tps6598x_connect()/_disconnect() typec_set_orientation() drivers/usb/typec/class.c typec_switch_set(port->sw) drivers/usb/typec/mux.c atcphy_sw_set() drivers/phy/apple/atc.c cd321x_update_work() drivers/usb/typec/tipd/core.c cd321x_typec_update_mode() typec_mux_set(cd321x->mux) drivers/usb/typec/mux.c atcphy_mux_set() drivers/phy/apple/atc.c Closing that window requires framework support for invalidating consumer-held references on provider unbind. The same consumer-survives-provider pattern has been discussed for the PHY framework [1] and is out of scope here. [1] https://lore.kernel.org/linux-phy/aZejMSJ9qqRWb2pX@google.com/ Fixes: 8e98ca1e74db ("phy: apple: Add Apple Type-C PHY") Signed-off-by: David Carlier Reviewed-by: Vladimir Oltean Tested-by: Joshua Peisach Link: https://lkml.kernel.org/r/6ec1ed08328340db42655287afd5fa4067316b11.camel@perches.com Link: https://patch.msgid.link/20260508201958.30060-1-devnexen@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/apple/atc.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/phy/apple/atc.c b/drivers/phy/apple/atc.c index e9d106f135c5..4156fabad742 100644 --- a/drivers/phy/apple/atc.c +++ b/drivers/phy/apple/atc.c @@ -628,9 +628,6 @@ struct apple_atcphy { struct reset_controller_dev rcdev; - struct typec_switch *sw; - struct typec_mux *mux; - struct mutex lock; }; @@ -2066,15 +2063,25 @@ static int atcphy_sw_set(struct typec_switch_dev *sw, enum typec_orientation ori return 0; } +static void atcphy_typec_switch_unregister(void *data) +{ + typec_switch_unregister(data); +} + static int atcphy_probe_switch(struct apple_atcphy *atcphy) { + struct typec_switch_dev *sw; struct typec_switch_desc sw_desc = { .drvdata = atcphy, .fwnode = atcphy->dev->fwnode, .set = atcphy_sw_set, }; - return PTR_ERR_OR_ZERO(typec_switch_register(atcphy->dev, &sw_desc)); + sw = typec_switch_register(atcphy->dev, &sw_desc); + if (IS_ERR(sw)) + return PTR_ERR(sw); + + return devm_add_action_or_reset(atcphy->dev, atcphy_typec_switch_unregister, sw); } static int atcphy_mux_set(struct typec_mux_dev *mux, struct typec_mux_state *state) @@ -2146,15 +2153,25 @@ static int atcphy_mux_set(struct typec_mux_dev *mux, struct typec_mux_state *sta return atcphy_configure(atcphy, target_mode); } +static void atcphy_typec_mux_unregister(void *data) +{ + typec_mux_unregister(data); +} + static int atcphy_probe_mux(struct apple_atcphy *atcphy) { + struct typec_mux_dev *mux; struct typec_mux_desc mux_desc = { .drvdata = atcphy, .fwnode = atcphy->dev->fwnode, .set = atcphy_mux_set, }; - return PTR_ERR_OR_ZERO(typec_mux_register(atcphy->dev, &mux_desc)); + mux = typec_mux_register(atcphy->dev, &mux_desc); + if (IS_ERR(mux)) + return PTR_ERR(mux); + + return devm_add_action_or_reset(atcphy->dev, atcphy_typec_mux_unregister, mux); } static int atcphy_load_tunables(struct apple_atcphy *atcphy) -- cgit v1.2.3 From 81a874233c305d29e37fdb70b691ff4254294c0b Mon Sep 17 00:00:00 2001 From: Jeremy Erazo Date: Thu, 14 May 2026 12:03:34 +0000 Subject: smb: client: avoid integer overflow in SMB2 READ length check SMB2 READ response validation in cifs_readv_receive() and handle_read_data() checks data_offset + data_len against the received buffer length. Both values are attacker-controlled fields from the server response and are stored as unsigned int, so the addition can wrap before the bounds check: fs/smb/client/transport.c:1259 if (!use_rdma_mr && (data_offset + data_len > buflen)) fs/smb/client/smb2ops.c:4839 else if (buf_len >= data_offset + data_len) A malicious SMB server can use this to bypass validation. In the non-encrypted receive path the client attempts an oversized socket read and stalls for the SMB response timeout (180 seconds) before reconnecting. In the SMB3 encrypted path, runtime testing shows the malformed length can reach copy_to_iter() in handle_read_data() with attacker-controlled size, where usercopy hardening stops the oversized copy before bytes reach userspace. Guard both call sites with check_add_overflow(), which is already used elsewhere in this subsystem (smb2pdu.c). On overflow, treat the response as malformed and reject with -EIO. Signed-off-by: Jeremy Erazo Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 4 +++- fs/smb/client/transport.c | 15 +++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index e6cb9b144530..3738204984f5 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4721,6 +4721,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, { unsigned int data_offset; unsigned int data_len; + unsigned int end_off; unsigned int cur_off; unsigned int cur_page_idx; unsigned int pad_len; @@ -4836,7 +4837,8 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, } rdata->got_bytes = buffer_len; - } else if (buf_len >= data_offset + data_len) { + } else if (!check_add_overflow(data_offset, data_len, &end_off) && + buf_len >= end_off) { /* read response payload is in buf */ WARN_ONCE(buffer, "read data can be either in buf or in buffer"); copied = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 05f8099047e1..fdf4e50c27ce 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1158,7 +1158,7 @@ int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { int length, len; - unsigned int data_offset, data_len; + unsigned int data_offset, data_len, end_off; struct cifs_io_subrequest *rdata = mid->callback_data; char *buf = server->smallbuf; unsigned int buflen = server->pdu_size; @@ -1256,11 +1256,14 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) use_rdma_mr = rdata->mr; #endif data_len = server->ops->read_data_length(buf, use_rdma_mr); - if (!use_rdma_mr && (data_offset + data_len > buflen)) { - /* data_len is corrupt -- discard frame */ - rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed, - data_offset + data_len, buflen); - return cifs_readv_discard(server, mid); + if (!use_rdma_mr) { + if (check_add_overflow(data_offset, data_len, &end_off) || + end_off > buflen) { + /* data_len is corrupt -- discard frame */ + rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed, + end_off, buflen); + return cifs_readv_discard(server, mid); + } } #ifdef CONFIG_CIFS_SMB_DIRECT -- cgit v1.2.3 From 905c559e51497b8bfdbb68df8be56d2f70f0de8e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 14 Mar 2026 14:24:56 +0100 Subject: gcc-plugins: Always define CONST_CAST_GIMPLE and CONST_CAST_TREE For gcc-16, the CONST_CAST macro family was removed. Add back what we were using in gcc-common.h, as they are simple wrappers. See GCC commits: c3d96ff9e916c02584aa081f03ab999292efbb50 458c7926d48959abcb2c1adaa22458e27459a551 Suggested-by: Ingo Saitz Link: https://lore.kernel.org/lkml/ab6OKoay0OWkywjK@spatz.zoo Fixes: 6b90bd4ba40b ("GCC plugin infrastructure") Tested-by: Ivan Bulatovic Tested-by: Christopher Cradock Signed-off-by: Kees Cook --- scripts/gcc-plugins/gcc-common.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 8f1b3500f8e2..abb1964c44d4 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -309,7 +309,9 @@ typedef const gimple *const_gimple_ptr; #define gimple gimple_ptr #define const_gimple const_gimple_ptr #undef CONST_CAST_GIMPLE -#define CONST_CAST_GIMPLE(X) CONST_CAST(gimple, (X)) +#define CONST_CAST_GIMPLE(X) const_cast((X)) +#undef CONST_CAST_TREE +#define CONST_CAST_TREE(X) const_cast((X)) /* gimple related */ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree lhs, tree op1, tree op2 MEM_STAT_DECL) -- cgit v1.2.3 From 28e03f78e69cf6628b81f24777799778528a84c1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 5 May 2026 12:24:17 +0200 Subject: x86/xen: Fix xen_e820_swap_entry_with_ram() When swapping a not page-aligned E820 map entry with RAM, the start address of the modified entry is calculated wrong (the offset into the page is subtracted instead of being added to the page address). Fixes: be35d91c8880 ("xen: tolerate ACPI NVS memory overlapping with Xen allocated memory") Reported-by: Jan Beulich Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Message-ID: <20260505102417.208138-1-jgross@suse.com> --- arch/x86/xen/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index bb95a05259b8..41251d4cf953 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -655,7 +655,7 @@ static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry) /* Fill new entry (keep size and page offset). */ entry->type = swap_entry->type; entry->addr = entry_end - swap_size + - swap_addr - swap_entry->addr; + swap_entry->addr - swap_addr; entry->size = swap_entry->size; /* Convert old entry to RAM, align to pages. */ -- cgit v1.2.3 From 4594437880ce347ac8438758fd91543f70da1aa9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 8 May 2026 16:39:33 +0200 Subject: x86/xen: Tolerate nested XEN_LAZY_MMU entering/leaving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the support of nested lazy mmu sections it can happen that arch_enter_lazy_mmu_mode() is being called twice without a call of arch_leave_lazy_mmu_mode() in between, as the lazy_mmu_*() helpers are not disabling preemption when checking for nested lazy mmu sections. This is a problem when running as a Xen PV guest, as xen_enter_lazy_mmu() and xen_leave_lazy_mmu() don't tolerate this case. Fix that in xen_enter_lazy_mmu() and xen_leave_lazy_mmu() in order not to hurt all other lazy mmu mode users. Fixes: 291b3abed657 ("x86/xen: use lazy_mmu_state when context-switching") Tested-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Message-ID: <20260508143933.493013-1-jgross@suse.com> --- arch/x86/xen/mmu_pv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c80d0058efd1..3eee5f84f8a7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2145,7 +2145,10 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) static void xen_enter_lazy_mmu(void) { - enter_lazy(XEN_LAZY_MMU); + preempt_disable(); + if (xen_get_lazy_mode() != XEN_LAZY_MMU) + enter_lazy(XEN_LAZY_MMU); + preempt_enable(); } static void xen_flush_lazy_mmu(void) @@ -2182,7 +2185,8 @@ static void xen_leave_lazy_mmu(void) { preempt_disable(); xen_mc_flush(); - leave_lazy(XEN_LAZY_MMU); + if (xen_get_lazy_mode() != XEN_LAZY_NONE) + leave_lazy(XEN_LAZY_MMU); preempt_enable(); } -- cgit v1.2.3 From 9cd3f16c320bfdadd4509358122368deb56a5741 Mon Sep 17 00:00:00 2001 From: Ruide Cao Date: Wed, 13 May 2026 11:58:15 +0800 Subject: batman-adv: fix fragment reassembly length accounting batman-adv keeps a running payload length for queued fragments and uses it to validate a fragment chain before reassembly. That accounting currently allows the accumulated fragment length to be truncated during updates. As a result, malformed fragment chains can bypass the intended validation and drive reassembly with inconsistent length state, leading to a local denial of service. Fix the accounting by storing the accumulated length in a length-typed field and rejecting update overflows before the existing validation logic runs. The fix was verified against the original reproducer and against valid fragment reassembly paths. Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruide Cao Tested-by: Ren Wei Signed-off-by: Ren Wei Signed-off-by: Sven Eckelmann --- net/batman-adv/fragmentation.c | 23 +++++++++++++++++------ net/batman-adv/types.h | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index f4e45cc25816..1152c2ce0c1e 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -80,9 +81,9 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, * * Return: the maximum size of payload that can be fragmented. */ -static int batadv_frag_size_limit(void) +static size_t batadv_frag_size_limit(void) { - int limit = BATADV_FRAG_MAX_FRAG_SIZE; + size_t limit = BATADV_FRAG_MAX_FRAG_SIZE; limit -= sizeof(struct batadv_frag_packet); limit *= BATADV_FRAG_MAX_FRAGMENTS; @@ -143,7 +144,9 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, struct batadv_frag_packet *frag_packet; u8 bucket; u16 seqno, hdr_size = sizeof(struct batadv_frag_packet); + bool overflow = false; bool ret = false; + size_t data_len; /* Linearize packet to avoid linearizing 16 packets in a row when doing * the later merge. Non-linear merge should be added to remove this @@ -153,6 +156,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, goto err; frag_packet = (struct batadv_frag_packet *)skb->data; + data_len = skb->len - hdr_size; seqno = ntohs(frag_packet->seqno); bucket = seqno % BATADV_FRAG_BUFFER_COUNT; @@ -171,7 +175,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, spin_lock_bh(&chain->lock); if (batadv_frag_init_chain(chain, seqno)) { hlist_add_head(&frag_entry_new->list, &chain->fragment_list); - chain->size = skb->len - hdr_size; + chain->size = data_len; chain->timestamp = jiffies; chain->total_size = ntohs(frag_packet->total_size); ret = true; @@ -188,7 +192,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, if (frag_entry_curr->no < frag_entry_new->no) { hlist_add_before(&frag_entry_new->list, &frag_entry_curr->list); - chain->size += skb->len - hdr_size; + + if (check_add_overflow(chain->size, data_len, + &chain->size)) + overflow = true; + chain->timestamp = jiffies; ret = true; goto out; @@ -201,13 +209,16 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, /* Reached the end of the list, so insert after 'frag_entry_last'. */ if (likely(frag_entry_last)) { hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list); - chain->size += skb->len - hdr_size; + + if (check_add_overflow(chain->size, data_len, &chain->size)) + overflow = true; + chain->timestamp = jiffies; ret = true; } out: - if (chain->size > batadv_frag_size_limit() || + if (overflow || chain->size > batadv_frag_size_limit() || chain->total_size != ntohs(frag_packet->total_size) || chain->total_size > batadv_frag_size_limit()) { /* Clear chain if total size of either the list or the packet diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 739439e2b235..c8c3e8064f00 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -301,7 +301,7 @@ struct batadv_frag_table_entry { u16 seqno; /** @size: accumulated size of packets in list */ - u16 size; + size_t size; /** @total_size: expected size of the assembled packet */ u16 total_size; -- cgit v1.2.3 From a340a51ed801eab7bb454150c226323b865263cc Mon Sep 17 00:00:00 2001 From: Ruijie Li Date: Thu, 14 May 2026 16:13:25 +0800 Subject: batman-adv: clear current gateway during teardown batadv_gw_node_free() removes the gateway list entries during mesh teardown, but it does not clear the currently selected gateway. This leaves stale gateway state behind across cleanup and can break a later mesh recreation. Clear bat_priv->gw.curr_gw before walking the gateway list so the selected gateway reference is dropped as part of teardown. Fixes: 2265c1410864 ("batman-adv: gateway election code refactoring") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Ruijie Li Signed-off-by: Zhanpeng Li Signed-off-by: Ren Wei Signed-off-by: Sven Eckelmann --- net/batman-adv/gateway_client.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 51e9c081a2a4..a9d0346e8332 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -478,10 +478,14 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, */ void batadv_gw_node_free(struct batadv_priv *bat_priv) { + struct batadv_gw_node *curr_gw; struct batadv_gw_node *gw_node; struct hlist_node *node_tmp; spin_lock_bh(&bat_priv->gw.list_lock); + curr_gw = rcu_replace_pointer(bat_priv->gw.curr_gw, NULL, true); + batadv_gw_node_put(curr_gw); + hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.gateway_list, list) { hlist_del_init_rcu(&gw_node->list); -- cgit v1.2.3 From 05f2a68b407a6817fe141dd64972c6ab8725312d Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Mon, 11 May 2026 07:58:23 -0700 Subject: vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable() Previously BAR resource requests and the corresponding pci_iomap() were performed on-demand and without synchronisation, which was racy. Rather than add synchronisation, it's simplest to address this by doing both activities from vfio_pci_core_enable(). The resource allocation and/or pci_iomap() can still fail; their status is tracked and existing calls to vfio_pci_core_setup_barmap() will fail in a similar way to before. This keeps the point of failure as observed by userspace the same, i.e. failures to request/map unused BARs are benign. Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") Signed-off-by: Matt Evans Link: https://lore.kernel.org/r/20260511145829.2993601-2-mattev@meta.com [ERR_PTR -> IOMEM_ERR_PTR per lkp report] Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 37 ++++++++++++++++++++++++++++++++++++- drivers/vfio/pci/vfio_pci_rdwr.c | 26 +++++++------------------- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3f8d093aacf8..050e7542952e 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -482,6 +482,40 @@ static int vfio_pci_core_runtime_resume(struct device *dev) } #endif /* CONFIG_PM */ +/* + * Eager-request BAR resources, and iomap them. Soft failures are + * allowed, and consumers must check the barmap before use in order to + * give compatible user-visible behaviour with the previous on-demand + * allocation method. + */ +static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev) +{ + struct pci_dev *pdev = vdev->pdev; + int i; + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + int bar = i + PCI_STD_RESOURCES; + + vdev->barmap[bar] = IOMEM_ERR_PTR(-ENODEV); + + if (!pci_resource_len(pdev, i)) + continue; + + if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) { + pci_dbg(pdev, "Failed to reserve region %d\n", bar); + vdev->barmap[bar] = IOMEM_ERR_PTR(-EBUSY); + continue; + } + + vdev->barmap[bar] = pci_iomap(pdev, bar, 0); + if (!vdev->barmap[bar]) { + pci_dbg(pdev, "Failed to iomap region %d\n", bar); + pci_release_selected_regions(pdev, 1 << bar); + vdev->barmap[bar] = IOMEM_ERR_PTR(-ENOMEM); + } + } +} + /* * The pci-driver core runtime PM routines always save the device state * before going into suspended state. If the device is going into low power @@ -568,6 +602,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; + vfio_pci_core_map_bars(vdev); return 0; @@ -648,7 +683,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev) for (i = 0; i < PCI_STD_NUM_BARS; i++) { bar = i + PCI_STD_RESOURCES; - if (!vdev->barmap[bar]) + if (IS_ERR_OR_NULL(vdev->barmap[bar])) continue; pci_iounmap(pdev, vdev->barmap[bar]); pci_release_selected_regions(pdev, 1 << bar); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 4251ee03e146..3bfbb879a005 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -198,27 +198,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, } EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw); +/* + * The barmap is set up in vfio_pci_core_enable(). Callers use this + * function to check that the BAR resources are requested or that the + * pci_iomap() was done. + */ int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) { - struct pci_dev *pdev = vdev->pdev; - int ret; - void __iomem *io; - - if (vdev->barmap[bar]) - return 0; - - ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); - if (ret) - return ret; - - io = pci_iomap(pdev, bar, 0); - if (!io) { - pci_release_selected_regions(pdev, 1 << bar); - return -ENOMEM; - } - - vdev->barmap[bar] = io; - + if (IS_ERR(vdev->barmap[bar])) + return PTR_ERR(vdev->barmap[bar]); return 0; } EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); -- cgit v1.2.3 From 702809dabdecca807bdd50cfdcc1c980feb2ba62 Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Mon, 11 May 2026 07:58:24 -0700 Subject: vfio/pci: Check BAR resources before exporting a DMABUF A DMABUF exports access to BAR resources and, although they are requested at startup time, we need to ensure they really were reserved before exporting. Otherwise, it's possible to access unreserved resources through the export. Add a check to the DMABUF-creation path. Fixes: 5d74781ebc86c ("vfio/pci: Add dma-buf export support for MMIO regions") Signed-off-by: Matt Evans Link: https://lore.kernel.org/r/20260511145829.2993601-3-mattev@meta.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_dmabuf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index fdc22e8b4656..1a177ce7de54 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -244,9 +244,11 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, return -EINVAL; /* - * For PCI the region_index is the BAR number like everything else. + * For PCI the region_index is the BAR number like everything + * else. Check that PCI resources have been claimed for it. */ - if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX) + if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX || + vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index)) return -ENODEV; dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges, -- cgit v1.2.3 From 2d8826a2d3657cea66fb0370f9e521575a673871 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 13 May 2026 09:01:34 +0200 Subject: batman-adv: dat: handle forward allocation error batadv_dat_forward_data() calls pskb_copy_for_clone() to duplicate an skb for each DHT candidate, but does not check the return value before passing it to batadv_send_skb_prepare_unicast_4addr(). That function dereferences the skb unconditionally, so a failed allocation triggers a NULL pointer dereference. Skip forwarding to the current DHT candidate on allocation failure. Cc: stable@kernel.org Fixes: 785ea1144182 ("batman-adv: Distributed ARP Table - create DHT helper functions") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reviewed-by: Yuan Tan Signed-off-by: Sven Eckelmann --- net/batman-adv/distributed-arp-table.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 3efc4cf50b46..0a8bd95e2f99 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -696,6 +696,9 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv, goto free_orig; tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!tmp_skb) + goto free_neigh; + if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { -- cgit v1.2.3 From 6c65cf23d4c6170fcf5714c32aa64689718cb142 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 13 May 2026 09:01:35 +0200 Subject: batman-adv: tp_meter: avoid use of uninit sender vars batadv_tp_recv_ack() and batadv_tp_stop() are only valid for tp_vars in the BATADV_TP_SENDER role. When called with a BATADV_TP_RECEIVER role, it proceeds to read sender-only members that were never initialized, leading to undefined behavior. This can be triggered when a node that is currently acting as a receiver in an ongoing tp_meter session receives a malicious ACK packet. Guard against this by checking tp_vars->role immediately after the lookup and bailing out if it is not BATADV_TP_SENDER, before any of those members are accessed. Cc: stable@kernel.org Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reviewed-by: Yuan Tan Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index ca6c3f6374bc..a3593d104caa 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -664,6 +664,9 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, if (unlikely(!tp_vars)) return; + if (unlikely(tp_vars->role != BATADV_TP_SENDER)) + goto out; + if (unlikely(atomic_read(&tp_vars->sending) == 0)) goto out; @@ -1101,12 +1104,16 @@ void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: trying to interrupt an already over connection\n"); - goto out; + goto out_put_orig_node; } + if (unlikely(tp_vars->role != BATADV_TP_SENDER)) + goto out_put_tp_vars; + batadv_tp_sender_shutdown(tp_vars, return_value); +out_put_tp_vars: batadv_tp_vars_put(tp_vars); -out: +out_put_orig_node: batadv_orig_node_put(orig_node); } -- cgit v1.2.3 From c207f1d785044667f87cc8c72355e33f3981f2d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 May 2026 19:50:02 +0100 Subject: smbdirect: Fix error cleanup in smbdirect_map_sges_from_iter() Fix smbdirect_map_sges_from_iter() to use pre-decrement, not post-decrement so that it cleans up the correct slots. Fixes: e5fbdde43017 ("cifs: Add a function to build an RDMA SGE list from an iterator") Closes: https://sashiko.dev/#/patchset/20260326104544.509518-1-dhowells%40redhat.com Signed-off-by: David Howells Reviewed-by: Stefan Metzmacher cc: Paulo Alcantara cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/smbdirect/connection.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/smbdirect/connection.c b/fs/smb/smbdirect/connection.c index fe9912e53da6..8adf58097534 100644 --- a/fs/smb/smbdirect/connection.c +++ b/fs/smb/smbdirect/connection.c @@ -2168,7 +2168,7 @@ static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, if (ret < 0) { while (state->num_sge > before) { - struct ib_sge *sge = &state->sge[state->num_sge--]; + struct ib_sge *sge = &state->sge[--state->num_sge]; ib_dma_unmap_page(state->device, sge->addr, -- cgit v1.2.3 From 0ce1bc9e46ecabe84772bb561e373c0d9876d6f2 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Wed, 13 May 2026 13:53:24 -0400 Subject: RDMA/siw: Reject MPA FPDU length underflow before signed receive math A malicious connected siw peer can send an iWARP FPDU whose MPA length field (c_hdr->mpa_len, 16 bit big-endian, peer-controlled) is smaller than the fixed DDP/RDMAP header for the announced opcode. Soft-iWARP parses the full header in siw_get_hdr() based on iwarp_pktinfo[opcode] .hdr_len, but never compares mpa_len against that header length. siw_tcp_rx_data() then derives srx->fpdu_part_rem = be16_to_cpu(mpa_len) - fpdu_part_rcvd + MPA_HDR_SIZE; where fpdu_part_rcvd equals iwarp_pktinfo[opcode].hdr_len at this point. For a tagged WRITE (hdr_len 16, MPA_HDR_SIZE 2) the smallest on-wire mpa_len of 0 yields fpdu_part_rem = -14, and any mpa_len below hdr_len - MPA_HDR_SIZE underflows to a negative int. The signed value then flows into siw_proc_write()/siw_proc_rresp() as bytes = min(srx->fpdu_part_rem, srx->skb_new); is handed to siw_check_mem() as an int len (whose interval check addr + len > mem->va + mem->len is satisfied for a valid base when len is negative), and reaches siw_rx_data() -> siw_rx_kva() / siw_rx_umem() -> skb_copy_bits() as a signed copy length. The header copy branch in skb_copy_bits() promotes that to size_t, producing a multi-gigabyte read. KASAN under a KUnit harness that drives the real kernel TCP receive path -- a loopback AF_INET socketpair, the malformed FPDU written via kernel_sendmsg, sk_data_ready firing in softirq, tcp_read_sock dispatching to siw_tcp_rx_data -- reports: BUG: KASAN: use-after-free in skb_copy_bits+0x284/0x480 Read of size 4294967295 at addr ffff888... Call Trace: skb_copy_bits siw_rx_kva siw_rx_data siw_check_mem siw_proc_write siw_tcp_rx_data __tcp_read_sock siw_qp_llp_data_ready tcp_data_ready tcp_data_queue Add the missing invariant at the earliest point where the peer header is fully assembled. iwarp_pktinfo[*].hdr_len - MPA_HDR_SIZE is exactly the value the siw transmitter uses as the minimum mpa_len for each opcode (drivers/infiniband/sw/siw/siw_qp.c:33), so this matches the protocol contract. Out-of-range FPDUs terminate the connection with TERM_ERROR_LAYER_LLP / LLP_ETYPE_MPA / LLP_ECODE_FPDU_START -- which is RFC 5044 Section 8 error code 3 ("Marker and ULPDU Length fields do not agree on the start of an FPDU"), the correct framing-error class for this inconsistency. Fixes: 8b6a361b8c48 ("rdma/siw: receive path") Link: https://patch.msgid.link/r/20260513175325.2042630-2-michael.bommarito@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Acked-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_qp_rx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index e8a88b378d51..34d03584160c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -1081,6 +1081,21 @@ static int siw_get_hdr(struct siw_rx_stream *srx) return -EAGAIN; } + /* + * Peer-controlled mpa_len must not underflow srx->fpdu_part_rem + * in siw_tcp_rx_data(); a negative value flows as a signed copy + * length into siw_check_mem() and skb_copy_bits(). + */ + if (unlikely(be16_to_cpu(c_hdr->mpa_len) + MPA_HDR_SIZE < + iwarp_pktinfo[opcode].hdr_len)) { + pr_warn_ratelimited("siw: short mpa_len %u for opcode %u (hdr_len %u)\n", + be16_to_cpu(c_hdr->mpa_len), opcode, + iwarp_pktinfo[opcode].hdr_len); + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_LLP, + LLP_ETYPE_MPA, LLP_ECODE_FPDU_START, 0); + return -EINVAL; + } + /* * DDP/RDMAP header receive completed. Check if the current * DDP segment starts a new RDMAP message or continues a previously -- cgit v1.2.3 From 4a9b16541ad3faf8bccb398532bf3f8b6bbf1188 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 13 May 2026 14:05:06 -0400 Subject: lsm: hold cred_guard_mutex for lsm_set_self_attr() Just as proc_pid_attr_write() already does before calling the LSM hook. This only matters for SELinux and AppArmor which check whether the process is being ptraced and if so, whether to allow the transition. Cc: stable@vger.kernel.org Signed-off-by: Stephen Smalley Acked-by: Casey Schaufler Signed-off-by: Paul Moore --- security/lsm_syscalls.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/security/lsm_syscalls.c b/security/lsm_syscalls.c index 5648b1f0ce9c..08a017669c02 100644 --- a/security/lsm_syscalls.c +++ b/security/lsm_syscalls.c @@ -57,7 +57,14 @@ u64 lsm_name_to_attr(const char *name) SYSCALL_DEFINE4(lsm_set_self_attr, unsigned int, attr, struct lsm_ctx __user *, ctx, u32, size, u32, flags) { - return security_setselfattr(attr, ctx, size, flags); + int rc; + + rc = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); + if (rc < 0) + return rc; + rc = security_setselfattr(attr, ctx, size, flags); + mutex_unlock(¤t->signal->cred_guard_mutex); + return rc; } /** -- cgit v1.2.3 From aa13e4b120f9cf238ad141d8419851f3a7a3fb5f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:40 -0700 Subject: perf trace: Sync linux/socket.h with the kernel source To pick up changes from: c66e0f453d1afa82 ("net: use ktime_t in struct scm_timestamping_internal") This would be used to beautify networking syscall arguments and not to affect builds of other tools (e.g. objtool). Please see tools/include/uapi/README. Reviewed-by: Ian Rogers Cc: netdev@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index ec715ad4bf25..ec4a0a025793 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -415,7 +415,7 @@ struct __kernel_timespec; struct old_timespec32; struct scm_timestamping_internal { - struct timespec64 ts[3]; + ktime_t ts[3]; }; extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); -- cgit v1.2.3 From b30e1493e3e27b6795244a472f0bbd07d0dc58fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:41 -0700 Subject: perf trace: Sync uapi/linux/fs.h with the kernel source To pick up changes from: 1f662195dbc07a66 ("fs: add generic FS_IOC_SHUTDOWN definitions") This would be used to beautify filesystem syscall arguments and not to affect builds of other tools (e.g. objtool). Please see tools/include/uapi/README. Reviewed-by: Ian Rogers Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/fs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 70b2b661f42c..13f71202845e 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -657,4 +657,16 @@ struct procmap_query { __u64 build_id_addr; /* in */ }; +/* + * Shutdown the filesystem. + */ +#define FS_IOC_SHUTDOWN _IOR('X', 125, __u32) + +/* + * Flags for FS_IOC_SHUTDOWN + */ +#define FS_SHUTDOWN_FLAGS_DEFAULT 0x0 +#define FS_SHUTDOWN_FLAGS_LOGFLUSH 0x1 /* flush log but not data*/ +#define FS_SHUTDOWN_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ + #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From ca706027b5bdb37337e1b99752134d592f42f0ea Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:42 -0700 Subject: perf trace: Sync uapi/linux/mount.h with the kernel source To pick up changes from: 5e8969bd19271241 ("mount: add FSMOUNT_NAMESPACE") This would be used to beautify mount syscall arguments and not to affect builds of other tools (e.g. objtool). Please see tools/include/uapi/README. Reviewed-by: Ian Rogers Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index d9d86598d100..2204708dbf7a 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -110,6 +110,7 @@ enum fsconfig_command { * fsmount() flags. */ #define FSMOUNT_CLOEXEC 0x00000001 +#define FSMOUNT_NAMESPACE 0x00000002 /* Create the mount in a new mount namespace */ /* * Mount attributes. -- cgit v1.2.3 From ad2cd6f9def4899591a75a96f71752e3aadb7579 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:43 -0700 Subject: perf trace: Sync uapi/linux/sched.h with the kernel source To pick up changes from: 9d4e752a24f740b3 ("namespace: allow creating empty mount namespaces") c8134b5f13ae959d ("pidfd: add CLONE_PIDFD_AUTOKILL") 24baca56fafc33d4 ("clone: add CLONE_NNP") 12ae2c81b21cfaa1 ("clone: add CLONE_AUTOREAP") 2e7af192697ef2a7 ("sched/deadline: Add reporting of runtime left & ...") This would be used to beautify scheduler syscall arguments and not to affect builds of other tools (e.g. objtool). Please see tools/include/uapi/README. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/sched.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/trace/beauty/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h index 359a14cc76a4..33a4624285cd 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/sched.h +++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h @@ -34,8 +34,12 @@ #define CLONE_IO 0x80000000 /* Clone io context */ /* Flags for the clone3() syscall. */ -#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ -#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ +#define CLONE_CLEAR_SIGHAND (1ULL << 32) /* Clear any signal handler and reset to SIG_DFL. */ +#define CLONE_INTO_CGROUP (1ULL << 33) /* Clone into a specific cgroup given the right permissions. */ +#define CLONE_AUTOREAP (1ULL << 34) /* Auto-reap child on exit. */ +#define CLONE_NNP (1ULL << 35) /* Set no_new_privs on child. */ +#define CLONE_PIDFD_AUTOKILL (1ULL << 36) /* Kill child when clone pidfd closes. */ +#define CLONE_EMPTY_MNTNS (1ULL << 37) /* Create an empty mount namespace. */ /* * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 @@ -43,6 +47,12 @@ */ #define CLONE_NEWTIME 0x00000080 /* New time namespace */ +/* + * unshare flags share the bit space with clone flags but only apply to the + * unshare syscall: + */ +#define UNSHARE_EMPTY_MNTNS 0x00100000 /* Unshare an empty mount namespace. */ + #ifndef __ASSEMBLY__ /** * struct clone_args - arguments for the clone3 syscall @@ -146,4 +156,7 @@ struct clone_args { SCHED_FLAG_KEEP_ALL | \ SCHED_FLAG_UTIL_CLAMP) +/* Only for sched_getattr() own flag param, if task is SCHED_DEADLINE */ +#define SCHED_GETATTR_FLAG_DL_DYNAMIC 0x01 + #endif /* _UAPI_LINUX_SCHED_H */ -- cgit v1.2.3 From be81aed3f7492caa522493f7c67b9c4d3c8924a6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:44 -0700 Subject: perf build: Add make check-headers target Don't print header differences during the perf build as it's noisy. Mostly people won't care and find it annoying. As it's to improve perf trace beautifier to catch up new changes mostly in UAPIs, we can make it a separate build target and call it occasionally. Make it and build-test related targets phony. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/Makefile | 5 ++++- tools/perf/Makefile.perf | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 816d5d84816b..5b713837eede 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -111,6 +111,9 @@ build-test: build-test-tarball: @$(MAKE) -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory out +check-headers: + @./check-headers.sh + # # All other targets get passed through: # @@ -118,4 +121,4 @@ build-test-tarball: $(print_msg) $(make) -.PHONY: tags TAGS FORCE Makefile +.PHONY: tags TAGS FORCE Makefile build-test build-test-tarball check-headers diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index cee19c923c06..585637fc934f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -285,7 +285,6 @@ goals := $(filter-out all sub-make, $(MAKECMDGOALS)) $(goals) all: sub-make sub-make: fixdep - @./check-headers.sh $(Q)$(MAKE) FIXDEP_BUILT=1 -f Makefile.perf $(goals) else # force_fixdep -- cgit v1.2.3 From 552636b9317c8a843dd4496d77e56976ab48c76b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:45 -0700 Subject: perf trace: Add beautifier script for fsmount flags And move the existing one to fsmount_attr.sh to be more precise. Now the fsmount_flags[] is generated from the mount.h like below. The ilog2() + 1 is an existing pattern to handle bit flags. $ cat tools/perf/trace/beauty/generated/fsmount_arrays.c static const char *fsmount_flags[] = { [ilog2(0x00000001) + 1] = "CLOEXEC", [ilog2(0x00000002) + 1] = "NAMESPACE", }; It was found by Sashiko during the review. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/Makefile.perf | 8 ++++++++ tools/perf/builtin-trace.c | 9 +++------ tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/fsmount.c | 18 +++++++++++++++++- tools/perf/trace/beauty/fsmount.sh | 11 +++-------- tools/perf/trace/beauty/fsmount_attr.sh | 22 ++++++++++++++++++++++ 6 files changed, 56 insertions(+), 15 deletions(-) create mode 100644 tools/perf/trace/beauty/fsmount_attr.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 585637fc934f..76b35ac19acb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -564,6 +564,12 @@ fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh $(fsmount_arrays): $(beauty_uapi_linux_dir)/mount.h $(fsmount_tbls) $(Q)$(SHELL) '$(fsmount_tbls)' $(beauty_uapi_linux_dir) > $@ +fsmount_attr_arrays := $(beauty_outdir)/fsmount_attr_arrays.c +fsmount_attr_tbls := $(srctree)/tools/perf/trace/beauty/fsmount_attr.sh + +$(fsmount_attr_arrays): $(beauty_uapi_linux_dir)/mount.h $(fsmount_attr_tbls) + $(Q)$(SHELL) '$(fsmount_attr_tbls)' $(beauty_uapi_linux_dir) > $@ + fspick_arrays := $(beauty_outdir)/fspick_arrays.c fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh @@ -854,6 +860,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE archheaders \ $(fadvise_advice_array) \ $(fsconfig_arrays) \ $(fsmount_arrays) \ + $(fsmount_attr_arrays) \ $(fspick_arrays) \ $(pkey_alloc_access_rights_array) \ $(sndrv_pcm_ioctl_array) \ @@ -1301,6 +1308,7 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $( $(OUTPUT)$(fadvise_advice_array) \ $(OUTPUT)$(fsconfig_arrays) \ $(OUTPUT)$(fsmount_arrays) \ + $(OUTPUT)$(fsmount_attr_arrays) \ $(OUTPUT)$(fspick_arrays) \ $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(mmap_flags_array) \ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e58c49d047a2..48615ddccd93 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -771,11 +771,6 @@ static const char *bpf_cmd[] = { }; static DEFINE_STRARRAY(bpf_cmd, "BPF_"); -static const char *fsmount_flags[] = { - [1] = "CLOEXEC", -}; -static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_"); - #include "trace/beauty/generated/fsconfig_arrays.c" static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_"); @@ -1202,7 +1197,9 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "fsconfig", .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, }, { .name = "fsmount", - .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags), + .arg = { [1] = { .scnprintf = SCA_FSMOUNT_FLAGS, /* fsmount_flags */ + .strtoul = STUL_STRARRAYS, + .show_zero = true, }, [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, }, { .name = "fspick", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 0a07ad158f87..a90c35fa5c12 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -179,6 +179,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FLOCK syscall_arg__scnprintf_flock +size_t syscall_arg__scnprintf_fsmount_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FSMOUNT_FLAGS syscall_arg__scnprintf_fsmount_flags + size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FSMOUNT_ATTR_FLAGS syscall_arg__scnprintf_fsmount_attr_flags diff --git a/tools/perf/trace/beauty/fsmount.c b/tools/perf/trace/beauty/fsmount.c index 28c2c16fc1a8..179e649fc72a 100644 --- a/tools/perf/trace/beauty/fsmount.c +++ b/tools/perf/trace/beauty/fsmount.c @@ -16,9 +16,25 @@ #define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ #endif -static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) + +static size_t fsmount__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) { #include "trace/beauty/generated/fsmount_arrays.c" + static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_"); + + return strarray__scnprintf_flags(&strarray__fsmount_flags, bf, size, show_prefix, flags); +} + +size_t syscall_arg__scnprintf_fsmount_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return fsmount__scnprintf_flags(flags, bf, size, arg->show_string_prefix); +} + +static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ +#include "trace/beauty/generated/fsmount_attr_arrays.c" static DEFINE_STRARRAY(fsmount_attr_flags, "MOUNT_ATTR_"); size_t printed = 0; diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh index 6b67a54cdeee..6d1e80bc15e4 100755 --- a/tools/perf/trace/beauty/fsmount.sh +++ b/tools/perf/trace/beauty/fsmount.sh @@ -9,14 +9,9 @@ fi linux_mount=${beauty_uapi_linux_dir}/mount.h -# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier -# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case -# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling -# bits. Special case it as well in the beautifier - -printf "static const char *fsmount_attr_flags[] = {\n" -regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' -grep -E $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \ +printf "static const char *fsmount_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSMOUNT_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +grep -E $regex ${linux_mount} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/fsmount_attr.sh b/tools/perf/trace/beauty/fsmount_attr.sh new file mode 100644 index 000000000000..6b67a54cdeee --- /dev/null +++ b/tools/perf/trace/beauty/fsmount_attr.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/ +else + beauty_uapi_linux_dir=$1 +fi + +linux_mount=${beauty_uapi_linux_dir}/mount.h + +# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier +# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case +# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling +# bits. Special case it as well in the beautifier + +printf "static const char *fsmount_attr_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +grep -E $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" +printf "};\n" -- cgit v1.2.3 From 5a433107fab621f4e7379ccba6e52b5b1601046c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 10 May 2026 13:23:46 -0700 Subject: perf trace: Update beautifier script for clone flags According to the change in the sched.h, update the script to generate the flags array like below. Note that '+1' is needed to detect bitmask pattern at index 0. $ cat tools/perf/trace/beauty/generated/clone_flags_array.c static const char *clone_flags[] = { [ilog2(0x00000100) + 1] = "VM", [ilog2(0x00000200) + 1] = "FS", [ilog2(0x00000400) + 1] = "FILES", [ilog2(0x00000800) + 1] = "SIGHAND", [ilog2(0x00001000) + 1] = "PIDFD", [ilog2(0x00002000) + 1] = "PTRACE", [ilog2(0x00004000) + 1] = "VFORK", [ilog2(0x00008000) + 1] = "PARENT", [ilog2(0x00010000) + 1] = "THREAD", [ilog2(0x00020000) + 1] = "NEWNS", [ilog2(0x00040000) + 1] = "SYSVSEM", [ilog2(0x00080000) + 1] = "SETTLS", [ilog2(0x00100000) + 1] = "PARENT_SETTID", [ilog2(0x00200000) + 1] = "CHILD_CLEARTID", [ilog2(0x00400000) + 1] = "DETACHED", [ilog2(0x00800000) + 1] = "UNTRACED", [ilog2(0x01000000) + 1] = "CHILD_SETTID", [ilog2(0x02000000) + 1] = "NEWCGROUP", [ilog2(0x04000000) + 1] = "NEWUTS", [ilog2(0x08000000) + 1] = "NEWIPC", [ilog2(0x10000000) + 1] = "NEWUSER", [ilog2(0x20000000) + 1] = "NEWPID", [ilog2(0x40000000) + 1] = "NEWNET", [ilog2(0x80000000) + 1] = "IO", [ilog2(0x00000080) + 1] = "NEWTIME", [32 + 1] = "CLEAR_SIGHAND", [33 + 1] = "INTO_CGROUP", [34 + 1] = "AUTOREAP", [35 + 1] = "NNP", [36 + 1] = "PIDFD_AUTOKILL", [37 + 1] = "EMPTY_MNTNS", }; This was found by Sashiko during review. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/clone.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/trace/beauty/clone.sh b/tools/perf/trace/beauty/clone.sh index 18b6c0d75693..98cb1f8d4a6f 100755 --- a/tools/perf/trace/beauty/clone.sh +++ b/tools/perf/trace/beauty/clone.sh @@ -14,4 +14,8 @@ regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+CLONE_([^_]+[[:alnum:]_]+)[[: grep -E $regex ${linux_sched} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+CLONE_([^_]+[[:alnum:]_]+)[[:space:]]+\(1ULL[[:space:]]*<<[[:space:]]*([[:digit:]]+)\)[[:space:]]*.*' +grep -E $regex ${linux_sched} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s + 1] = \"%s\",\n" printf "};\n" -- cgit v1.2.3 From 0c0dddc07d272a8d25922e48041e8e4d2434df7e Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 13 May 2026 15:26:10 +0200 Subject: ovpn: disable BHs when updating device stats ovpn updates dev->dstats from both process and softirq contexts. In particular, TCP paths may run from socket callbacks, workqueues or strparser work, while UDP receive and ovpn's ndo_start_xmit path may update the same per-device dstats from BH context. Add ovpn device drop-stat helpers that disable BHs around dev_dstats_rx_dropped() and dev_dstats_tx_dropped(), and use them for drop accounting. The successful RX dev_dstats_rx_add() update is already covered by the BH-disabled section around gro_cells_receive(). For the successful TCP TX dev_dstats_tx_add() update, replace the existing preempt-disabled section with a BH-disabled one. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/io.c | 12 ++++++------ drivers/net/ovpn/stats.h | 16 ++++++++++++++++ drivers/net/ovpn/tcp.c | 10 +++++----- drivers/net/ovpn/udp.c | 2 +- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index 22c555dd962e..a6b777a9c2d9 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -201,7 +201,7 @@ void ovpn_decrypt_post(void *data, int ret) skb = NULL; drop: if (unlikely(skb)) - dev_dstats_rx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_rx_dropped(peer->ovpn->dev); kfree_skb(skb); drop_nocount: if (likely(peer)) @@ -225,7 +225,7 @@ void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb) net_info_ratelimited("%s: no available key for peer %u, key-id: %u\n", netdev_name(peer->ovpn->dev), peer->id, key_id); - dev_dstats_rx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_rx_dropped(peer->ovpn->dev); kfree_skb(skb); ovpn_peer_put(peer); return; @@ -301,7 +301,7 @@ err_unlock: rcu_read_unlock(); err: if (unlikely(skb)) - dev_dstats_tx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_tx_dropped(peer->ovpn->dev); if (likely(peer)) ovpn_peer_put(peer); if (likely(ks)) @@ -343,7 +343,7 @@ static void ovpn_send(struct ovpn_priv *ovpn, struct sk_buff *skb, */ skb_list_walk_safe(skb, curr, next) { if (unlikely(!ovpn_encrypt_one(peer, curr))) { - dev_dstats_tx_dropped(ovpn->dev); + ovpn_dev_dstats_tx_dropped(ovpn->dev); kfree_skb(curr); } } @@ -414,7 +414,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!curr)) { net_err_ratelimited("%s: skb_share_check failed for payload packet\n", netdev_name(dev)); - dev_dstats_tx_dropped(ovpn->dev); + ovpn_dev_dstats_tx_dropped(ovpn->dev); continue; } @@ -440,7 +440,7 @@ netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev) drop: ovpn_peer_put(peer); drop_no_peer: - dev_dstats_tx_dropped(ovpn->dev); + ovpn_dev_dstats_tx_dropped(ovpn->dev); skb_tx_error(skb); kfree_skb_list(skb); return NETDEV_TX_OK; diff --git a/drivers/net/ovpn/stats.h b/drivers/net/ovpn/stats.h index 53433d8b6c33..3a45b97c0056 100644 --- a/drivers/net/ovpn/stats.h +++ b/drivers/net/ovpn/stats.h @@ -11,6 +11,8 @@ #ifndef _NET_OVPN_OVPNSTATS_H_ #define _NET_OVPN_OVPNSTATS_H_ +#include + /* one stat */ struct ovpn_peer_stat { atomic64_t bytes; @@ -44,4 +46,18 @@ static inline void ovpn_peer_stats_increment_tx(struct ovpn_peer_stats *stats, ovpn_peer_stats_increment(&stats->tx, n); } +static inline void ovpn_dev_dstats_tx_dropped(struct net_device *dev) +{ + local_bh_disable(); + dev_dstats_tx_dropped(dev); + local_bh_enable(); +} + +static inline void ovpn_dev_dstats_rx_dropped(struct net_device *dev) +{ + local_bh_disable(); + dev_dstats_rx_dropped(dev); + local_bh_enable(); +} + #endif /* _NET_OVPN_OVPNSTATS_H_ */ diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index 82809b016f0a..433bd07a4f1b 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -152,7 +152,7 @@ err: if (WARN_ON(!ovpn_peer_hold(peer))) goto err_nopeer; schedule_work(&peer->tcp.defer_del_work); - dev_dstats_rx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_rx_dropped(peer->ovpn->dev); err_nopeer: kfree_skb(skb); } @@ -298,9 +298,9 @@ static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk) } while (peer->tcp.out_msg.len > 0); if (!peer->tcp.out_msg.len) { - preempt_disable(); + local_bh_disable(); dev_dstats_tx_add(peer->ovpn->dev, skb->len); - preempt_enable(); + local_bh_enable(); } kfree_skb(peer->tcp.out_msg.skb); @@ -331,7 +331,7 @@ static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk, ovpn_tcp_send_sock(peer, sk); if (peer->tcp.out_msg.skb) { - dev_dstats_tx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_tx_dropped(peer->ovpn->dev); kfree_skb(skb); return; } @@ -353,7 +353,7 @@ void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk, if (sock_owned_by_user(sk)) { if (skb_queue_len(&peer->tcp.out_queue) >= READ_ONCE(net_hotdata.max_backlog)) { - dev_dstats_tx_dropped(peer->ovpn->dev); + ovpn_dev_dstats_tx_dropped(peer->ovpn->dev); kfree_skb(skb); goto unlock; } diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c index 059e896b4a2f..8811aa9eedeb 100644 --- a/drivers/net/ovpn/udp.c +++ b/drivers/net/ovpn/udp.c @@ -125,7 +125,7 @@ static int ovpn_udp_encap_recv(struct sock *sk, struct sk_buff *skb) return 0; drop: - dev_dstats_rx_dropped(ovpn->dev); + ovpn_dev_dstats_rx_dropped(ovpn->dev); drop_noovpn: kfree_skb(skb); return 0; -- cgit v1.2.3 From 51f57607e30bee282a1d40845f89a311cbb26481 Mon Sep 17 00:00:00 2001 From: Chen-Shi-Hong Date: Thu, 14 May 2026 23:39:13 +0800 Subject: docs: hwmon: sy7636a: fix temperature sysfs attribute name The hwmon sysfs naming convention uses temp[1-*]_input for temperature channels. Documentation/hwmon/sy7636a-hwmon.rst currently documents temp0_input, while the driver uses the standard hwmon temperature channel interface. Update the documentation to use temp1_input. Signed-off-by: Chen-Shi-Hong Link: https://lore.kernel.org/r/20260514154108.1937-1-eric039eric@gmail.com Signed-off-by: Guenter Roeck --- Documentation/hwmon/sy7636a-hwmon.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/sy7636a-hwmon.rst b/Documentation/hwmon/sy7636a-hwmon.rst index 0143ce0e5db7..03d866aba6e8 100644 --- a/Documentation/hwmon/sy7636a-hwmon.rst +++ b/Documentation/hwmon/sy7636a-hwmon.rst @@ -22,5 +22,5 @@ The following sensors are supported sysfs-Interface --------------- -temp0_input +temp1_input - Temperature of external NTC (milli-degree C) -- cgit v1.2.3 From d2bfdbb69cf87676981b1043010b6224d84c6d3a Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Tue, 12 May 2026 22:28:07 +0800 Subject: rds_tcp: close NULL deref window in rds_tcp_set_callbacks rds_tcp_set_callbacks() links a new rds_tcp_connection onto rds_tcp_tc_list under rds_tcp_tc_list_lock. It releases the lock, then assigns tc->t_sock = sock outside the lock. rds_tcp_tc_info() and rds6_tcp_tc_info() walk rds_tcp_tc_list under the same lock. Both dereference tc->t_sock->sk without a NULL check. A reader can acquire rds_tcp_tc_list_lock between the writer's spin_unlock and the t_sock store. It then sees a list entry whose t_sock is NULL. The dereference of tc->t_sock->sk is a NULL access. Move tc->t_sock = sock inside rds_tcp_tc_list_lock, before list_add_tail. A reader holding the lock then observes the linkage and the t_sock store together. The restore path is safe. rds_tcp_restore_callbacks() does list_del_init inside the lock. The matching tc->t_sock = NULL after unlink is harmless to readers holding the lock. Fixes: 70041088e3b9 ("RDS: Add TCP transport to RDS") Suggested-by: Simon Horman Signed-off-by: Maoyi Xie Reviewed-by: Allison Henderson Link: https://patch.msgid.link/20260512142807.1855619-1-maoyi.xie@ntu.edu.sg Signed-off-by: Jakub Kicinski --- net/rds/tcp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 654e23d13e3d..5830b31a1f37 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -198,8 +198,13 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); - /* done under the callback_lock to serialize with write_space */ + /* done under the callback_lock to serialize with write_space. + * Set t_sock inside rds_tcp_tc_list_lock so readers walking + * rds_tcp_tc_list under the same lock cannot observe an + * entry whose t_sock is NULL. + */ spin_lock(&rds_tcp_tc_list_lock); + tc->t_sock = sock; list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); #if IS_ENABLED(CONFIG_IPV6) rds6_tcp_tc_count++; @@ -211,8 +216,6 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) /* accepted sockets need our listen data ready undone */ if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready) sock->sk->sk_data_ready = sock->sk->sk_user_data; - - tc->t_sock = sock; if (!tc->t_rtn) tc->t_rtn = net_generic(sock_net(sock->sk), rds_tcp_netid); tc->t_cpath = cp; -- cgit v1.2.3 From 7d260c5d2d89eb2c8c528d54b576b3aae3e20231 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 13 May 2026 12:22:26 +0100 Subject: net/mlx5e: Fix use-after-free in mlx5e_tx_reporter_timeout_recover mlx5e_tx_reporter_timeout_recover() accesses sq->netdev after mlx5e_safe_reopen_channels() has torn down and freed the channel (and its embedded SQs). Replace the three sq->netdev references with priv->netdev which is safe because priv outlives channel teardown. The netdev_err() call already used priv->netdev for this reason; make the trylock/unlock and health_channel_eq_recover calls consistent. This fixes the following KASAN splat: BUG: KASAN: use-after-free in mlx5e_tx_reporter_timeout_recover+0x1dd/0x360 [mlx5_core] Read of size 8 at addr ffff889860ed0b28 by task kworker/u113:2/5277 Call Trace: mlx5e_tx_reporter_timeout_recover+0x1dd/0x360 [mlx5_core] devlink_health_reporter_recover+0xa2/0x150 devlink_health_report+0x254/0x7c0 mlx5e_reporter_tx_timeout+0x297/0x380 [mlx5_core] mlx5e_tx_timeout_work+0x109/0x170 [mlx5_core] process_one_work+0x677/0xf20 worker_thread+0x51f/0xd90 kthread+0x3a5/0x810 ret_from_fork+0x208/0x400 ret_from_fork_asm+0x1a/0x30 Fixes: 83ac0304a2d7 ("net/mlx5e: Fix deadlocks between devlink and netdev instance locks") Cc: stable@vger.kernel.org Reviewed-by: Cosmin Ratiu Reviewed-by: Tariq Toukan Signed-off-by: Matt Fleming Link: https://patch.msgid.link/20260513112226.140512-1-matt@readmodwrite.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index afdeb1b3d425..8409ae73768f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -160,13 +160,13 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) * channels are being closed for other reason and this work is not * relevant anymore. */ - while (!netdev_trylock(sq->netdev)) { + while (!netdev_trylock(priv->netdev)) { if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) return 0; msleep(20); } - err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); + err = mlx5e_health_channel_eq_recover(priv->netdev, eq, sq->cq.ch_stats); if (!err) { to_ctx->status = 0; /* this sq recovered */ goto out; @@ -186,7 +186,7 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", err); out: - netdev_unlock(sq->netdev); + netdev_unlock(priv->netdev); return err; } -- cgit v1.2.3 From f84eca5817390257cef78013d0112481c503b4a3 Mon Sep 17 00:00:00 2001 From: William Bowling Date: Wed, 13 May 2026 04:16:35 +0000 Subject: net: skbuff: preserve shared-frag marker during coalescing skb_try_coalesce() can attach paged frags from @from to @to. If @from has SKBFL_SHARED_FRAG set, the resulting @to skb can contain the same externally-owned or page-cache-backed frags, but the shared-frag marker is currently lost. That breaks the invariant relied on by later in-place writers. In particular, ESP input checks skb_has_shared_frag() before deciding whether an uncloned nonlinear skb can skip skb_cow_data(). If TCP receive coalescing has moved shared frags into an unmarked skb, ESP can see skb_has_shared_frag() as false and decrypt in place over page-cache backed frags. Propagate SKBFL_SHARED_FRAG when skb_try_coalesce() transfers paged frags. The tailroom copy path does not need the marker because it copies bytes into @to's linear data rather than transferring frag descriptors. Fixes: cef401de7be8 ("net: fix possible wrong checksum generation") Fixes: f4c50a4034e6 ("xfrm: esp: avoid in-place decrypt on shared skb frags") Signed-off-by: William Bowling Reviewed-by: Eric Dumazet Tested-by: Jiayuan Chen Link: https://patch.msgid.link/20260513041635.1289541-1-vakzz@zellic.io Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7dad68e3b518..9c4e8d331d6d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6200,6 +6200,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, from_shinfo->frags, from_shinfo->nr_frags * sizeof(skb_frag_t)); to_shinfo->nr_frags += from_shinfo->nr_frags; + if (from_shinfo->nr_frags) + to_shinfo->flags |= from_shinfo->flags & SKBFL_SHARED_FRAG; if (!skb_cloned(from)) from_shinfo->nr_frags = 0; -- cgit v1.2.3 From e8fb3de2a8effcaf62bec2c56b93d8bb480371d1 Mon Sep 17 00:00:00 2001 From: Dawei Feng Date: Wed, 13 May 2026 23:13:20 +0800 Subject: octeontx2-pf: fix double free in rvu_rep_rsrc_init() rvu_rep_rsrc_init() allocates queue memory before calling otx2_init_hw_resources(). When hardware resource setup fails, otx2_init_hw_resources() already unwinds the partially initialized SQ, CQ, and aura state before returning an error. The representor error path then calls otx2_free_hw_resources() again and can free the same resources a second time. Fix this by splitting the cleanup labels so that a failure from otx2_init_hw_resources() only releases queue memory. Keep the otx2_free_hw_resources() call for failures that happen after hardware resource initialization completed successfully. The bug was first flagged by an experimental analysis tool we are developing for kernel memory-management bugs while analyzing v6.13-rc1. The tool is still under development and is not yet publicly available. Manual inspection confirms that the bug is still present in v7.1-rc3. Runtime validation was not performed because reproducing this path requires OcteonTX2 representor hardware. Fixes: 3937b7308d4f ("octeontx2-pf: Create representor netdev") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Zilin Guan Signed-off-by: Dawei Feng Reviewed-by: Geetha sowjanya Link: https://patch.msgid.link/20260513151320.213260-1-dawei.feng@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 94f155ffb17f..0f5d5642d3f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -609,7 +609,7 @@ static int rvu_rep_rsrc_init(struct otx2_nic *priv) err = otx2_init_hw_resources(priv); if (err) - goto err_free_rsrc; + goto err_free_mem; /* Set maximum frame size allowed in HW */ err = otx2_hw_set_mtu(priv, priv->hw.max_mtu); @@ -621,6 +621,7 @@ static int rvu_rep_rsrc_init(struct otx2_nic *priv) err_free_rsrc: otx2_free_hw_resources(priv); +err_free_mem: otx2_free_queue_mem(qset); return err; } -- cgit v1.2.3 From f508262ae9f21fe0e6c0749948b9dc7dd5a62a70 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 13 May 2026 08:58:25 -0400 Subject: tls: Preserve sk_err across recvmsg() when data has been copied The sk_err check in tls_rx_rec_wait() consumes the error via sock_error(), which clears sk_err atomically. When the caller (tls_sw_recvmsg, tls_sw_splice_read, or tls_sw_read_sock) already has bytes copied to userspace, it returns those bytes and discards the error from this call. sk_err is now zero on the socket, so the next read syscall observes only RCV_SHUTDOWN and reports a clean EOF instead of the actual error (typically -ECONNRESET). The race is reachable when tls_read_flush_backlog()'s periodic sk_flush_backlog() triggers tcp_reset() in the middle of a multi-record read. Pass a has_copied flag to tls_rx_rec_wait(). When has_copied is false, consume sk_err via sock_error() as before. When has_copied is true, report the error from READ_ONCE() but leave sk_err set: the caller returns the byte count and discards the err from this call, and the next read syscall surfaces the preserved sk_err. This mirrors the tcp_recvmsg() preserve-and-surface pattern. The decrypt-abort path is unaffected: tls_err_abort() raises sk_err to EBADMSG after tls_rx_rec_wait() returns, and nothing on the caller's return path consumes it, so the EBADMSG surfaces on the next read. tls_sw_splice_read() passes has_copied=false: it processes one record per call, so no bytes have been copied within the function when tls_rx_rec_wait() runs. A reset that arrives between iterations of splice_direct_to_actor() (the sendfile() path) is still consumed by sock_error() in the later call, and the outer loop returns the prior iterations' byte count and drops the error. tcp_splice_read() exhibits the same pattern at the iteration boundary; addressing it belongs at the splice_direct_to_actor() layer and is out of scope here. Fixes: c46b01839f7a ("tls: rx: periodically flush socket backlog") Suggested-by: Jakub Kicinski Signed-off-by: Chuck Lever Link: https://patch.msgid.link/20260513125825.205189-1-cel@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 3bfdaf5e64f5..964ebc268ee4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1366,9 +1366,14 @@ unlock: mutex_unlock(&tls_ctx->tx_lock); } +/* When has_copied is true the caller has already moved bytes to + * userspace. Report sk_err but leave it set so the next read + * surfaces it instead of a spurious EOF, otherwise sk_err is + * consumed via sock_error(). + */ static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, - bool released) + bool released, bool has_copied) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1386,8 +1391,11 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (!sk_psock_queue_empty(psock)) return 0; - if (sk->sk_err) + if (sk->sk_err) { + if (has_copied) + return -READ_ONCE(sk->sk_err); return sock_error(sk); + } if (ret < 0) return ret; @@ -1423,7 +1431,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, } if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) - return tls_rx_rec_wait(sk, psock, nonblock, false); + return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied); return 1; } @@ -2110,7 +2118,7 @@ int tls_sw_recvmsg(struct sock *sk, int to_decrypt, chunk; err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, - released); + released, !!(decrypted + copied)); if (err <= 0) { if (psock) { chunk = sk_msg_recvmsg(sk, psock, msg, len, @@ -2297,7 +2305,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, - true); + true, false); if (err <= 0) goto splice_read_end; @@ -2383,7 +2391,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, } else { struct tls_decrypt_arg darg; - err = tls_rx_rec_wait(sk, NULL, true, released); + err = tls_rx_rec_wait(sk, NULL, true, released, !!copied); if (err <= 0) goto read_sock_end; -- cgit v1.2.3 From b96fe527935b0671194bc436d7d78d3b0f87b2e1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 May 2026 18:26:12 +0200 Subject: ASoC: cs35l56: Drop malformed default N from Kconfig First of all, it has to be 'default n' (small letter n), otherwise it looks for CONFIG_N which is absent and in case of appearance will enable something unrelated. Second and most important is that 'n' *is* the default 'default' already. Hence just drop malformed line. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20260513162612.365729-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index cf94a1c756e0..269c31ce0814 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -898,7 +898,6 @@ menu "CS35L56 driver options" config SND_SOC_CS35L56_CAL_DEBUGFS bool "CS35L56 create debugfs for factory calibration" - default N depends on DEBUG_FS select SND_SOC_CS35L56_CAL_DEBUGFS_COMMON help @@ -909,7 +908,6 @@ config SND_SOC_CS35L56_CAL_DEBUGFS config SND_SOC_CS35L56_CAL_SET_CTRL bool "CS35L56 ALSA control to restore factory calibration" - default N select SND_SOC_CS35L56_CAL_DEBUGFS_COMMON help Allow restoring factory calibration data through an ALSA @@ -923,7 +921,6 @@ config SND_SOC_CS35L56_CAL_SET_CTRL config SND_SOC_CS35L56_CAL_PERFORM_CTRL bool "CS35L56 ALSA control to perform factory calibration" - default N select SND_SOC_CS35L56_CAL_DEBUGFS_COMMON help Allow performing factory calibration data through an ALSA -- cgit v1.2.3 From c9d08c8c4c5006d71b3c3c3c0dc41ebc46931951 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 13 May 2026 09:27:37 +0300 Subject: net/mlx5e: Don't leak RSS context in case of error If mlx5e_rx_res_rss_set_rxfh() fails during mlx5e_create_rxfh_context(), the RSS context is not cleaned up. This leaves a stale entry in 'res->rss[rss_idx]' that occupies a context slot. Destroy the RSS context before returning the error. Fixes: 6c2509d44636 ("net/mlx5e: Add error flow for ethtool -X command") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260513062737.333259-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index bb61e2179078..99a0034b9b20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1574,8 +1574,11 @@ static int mlx5e_create_rxfh_context(struct net_device *dev, rxfh->indir, rxfh->key, hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); - if (err) + if (err) { + WARN_ON(mlx5e_rx_res_rss_destroy(priv->rx_res, + rxfh->rss_context)); goto unlock; + } mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rxfh->rss_context, ethtool_rxfh_context_indir(ctx), -- cgit v1.2.3 From 8d0a5af8b1ba598e7340761729801624e7a9330e Mon Sep 17 00:00:00 2001 From: Jeroen Massar Date: Wed, 13 May 2026 09:33:02 +0300 Subject: net/mlx5: Do not restore destination-less TC rules After IPsec policy/state TX rules are added, any TC flow rule, which forwards packets to uplink, is modified to forward to IPsec TX tables. As these tables are destroyed dynamically, whenever there is no reference to them, the destinations of this kind of rules must be restored to uplink, unless there is no destination for that rule. The flow rules FLOW_ACTION_ACCEPT, DROP, TRAP, GOTO and SAMPLE do not have a destination port, and thus out_count = 0. At cleanup time of the rules in mlx5_esw_ipsec_modify_flow_dests we call mlx5_eswitch_restore_ipsec_rule but as the above types do not have a destination we get an underflow of out_count, as the port is passed, which is esw_attr->out_count - 1. This change avoids calling mlx5_eswitch_restore_ipsec_rule when there are no output destinations and thus avoids the underflow. Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules") Signed-off-by: Jeroen Massar Reviewed-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260513063302.333761-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 3cfe743610d3..ab50d2c734ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -142,7 +142,8 @@ static int mlx5_esw_ipsec_modify_flow_dests(struct mlx5_eswitch *esw, attr = flow->attr; esw_attr = attr->esw_attr; - if (esw_attr->out_count - esw_attr->split_count > 1) + if (!esw_attr->out_count || + esw_attr->out_count - esw_attr->split_count > 1) return 0; err = mlx5_eswitch_restore_ipsec_rule(esw, flow->rule[0], esw_attr, -- cgit v1.2.3 From c6df9a65cbb0fe7808a4b2872095f4c849b3196a Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Wed, 13 May 2026 09:36:40 +0300 Subject: net/mlx5: Skip disabled vports when setting max TX speed When setting vports max TX speed during LAG activation or bond state changes, the code iterates over all eswitch vports. However, some vports may not be enabled yet. Skip vports that are not enabled to avoid sending FW commands for uninitialized vports. Save the LAG aggregated speed in the vport struct so it can be applied when the vport is enabled later. Fixes: 50f1d188c580 ("net/mlx5: Propagate LAG effective max_tx_speed to vports") Signed-off-by: Or Har-Toov Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260513063640.334132-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 21 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 5 +++++ 3 files changed, 27 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 123c96716a54..7c8311f41232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -908,6 +908,24 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport esw_vport_cleanup_acl(esw, vport); } +static void mlx5_esw_vport_set_max_tx_speed(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int ret; + + if (!MLX5_CAP_ESW(esw->dev, esw_vport_state_max_tx_speed)) + return; + + ret = mlx5_modify_vport_max_tx_speed(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport->vport, true, + vport->agg_max_tx_speed); + if (ret) + mlx5_core_dbg(esw->dev, + "Failed to set vport %d speed %d, err=%d\n", + vport->vport, vport->agg_max_tx_speed, ret); +} + int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, enum mlx5_eswitch_vport_event enabled_events) { @@ -948,6 +966,9 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); + + if (vport->agg_max_tx_speed) + mlx5_esw_vport_set_max_tx_speed(esw, vport); done: mutex_unlock(&esw->state_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5128f5020dae..e9cf7c592ce9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -247,6 +247,7 @@ struct mlx5_vport { enum mlx5_eswitch_vport_event enabled_events; int index; struct mlx5_devlink_port *dl_port; + u32 agg_max_tx_speed; }; struct mlx5_esw_indir_table; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 449e4bd86c06..f8e70ac5a85b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1274,6 +1274,11 @@ static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev, if (vport->vport == MLX5_VPORT_UPLINK) continue; + vport->agg_max_tx_speed = speed; + + if (!vport->enabled) + continue; + ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod, vport->vport, true, speed); if (ret) -- cgit v1.2.3 From 5db89c99566fc4728cc92e941d8e1975711e24b5 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Wed, 13 May 2026 21:37:39 -0400 Subject: net: ifb: report ethtool stats over num_tx_queues ifb_dev_init() allocates dp->tx_private to dev->num_tx_queues entries via kzalloc_objs(*txp, dev->num_tx_queues). Both IFB per-queue RX and TX stats live in those entries: ifb_xmit() updates txp->rx_stats using the skb queue mapping, ifb_ri_tasklet() updates txp->tx_stats, and ifb_stats64() aggregates both over dev->num_tx_queues. The ethtool stats callbacks instead size and walk the per-queue stats with dev->real_num_rx_queues and dev->real_num_tx_queues. With an asymmetric device where the RX queue count exceeds the TX queue count, for example: ip link add name ifb10 numtxqueues 1 numrxqueues 8 type ifb ethtool -S ifb10 ifb_get_ethtool_stats() indexes past the tx_private allocation and copies adjacent slab data through ETHTOOL_GSTATS. Use dev->num_tx_queues consistently for the stats strings, the stats count, and the stats data walks. This reports one RX stats group and one TX stats group for each backing ifb_q_private entry, which is the queue set IFB can actually populate. Reproduced under UML+KASAN at v7.1-rc2: BUG: KASAN: slab-out-of-bounds in ifb_fill_stats_data+0x3c/0xae Read of size 8 at addr 0000000062dbd228 by task ethtool/36 ifb_fill_stats_data+0x3c/0xae ifb_get_ethtool_stats+0xc0/0x129 __dev_ethtool+0x1ca5/0x363c dev_ethtool+0x123/0x1b3 dev_ioctl+0x56c/0x744 sock_do_ioctl+0x15f/0x1b2 sock_ioctl+0x4d5/0x50a sys_ioctl+0xd8b/0xde9 With the patch applied, the same UML+KASAN repro is silent and ethtool -S ifb10 reports only the stats backed by the single allocated tx_private entry. Fixes: a21ee5b2fcb8 ("net: ifb: support ethtools stats") Cc: stable@vger.kernel.org Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260514013739.3549624-1-michael.bommarito@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ifb.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 5407d2ed71b3..43aa1bfd41cf 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -211,12 +211,12 @@ static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf) switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < dev->real_num_rx_queues; i++) + for (i = 0; i < dev->num_tx_queues; i++) for (j = 0; j < IFB_Q_STATS_LEN; j++) ethtool_sprintf(&p, "rx_queue_%u_%.18s", i, ifb_q_stats_desc[j].desc); - for (i = 0; i < dev->real_num_tx_queues; i++) + for (i = 0; i < dev->num_tx_queues; i++) for (j = 0; j < IFB_Q_STATS_LEN; j++) ethtool_sprintf(&p, "tx_queue_%u_%.18s", i, ifb_q_stats_desc[j].desc); @@ -229,8 +229,7 @@ static int ifb_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return IFB_Q_STATS_LEN * (dev->real_num_rx_queues + - dev->real_num_tx_queues); + return IFB_Q_STATS_LEN * dev->num_tx_queues * 2; default: return -EOPNOTSUPP; } @@ -262,12 +261,12 @@ static void ifb_get_ethtool_stats(struct net_device *dev, struct ifb_q_private *txp; int i; - for (i = 0; i < dev->real_num_rx_queues; i++) { + for (i = 0; i < dev->num_tx_queues; i++) { txp = dp->tx_private + i; ifb_fill_stats_data(&data, &txp->rx_stats); } - for (i = 0; i < dev->real_num_tx_queues; i++) { + for (i = 0; i < dev->num_tx_queues; i++) { txp = dp->tx_private + i; ifb_fill_stats_data(&data, &txp->tx_stats); } -- cgit v1.2.3 From c996a4418dd4ee45cd086586c04a1103e8160308 Mon Sep 17 00:00:00 2001 From: Ingyu Jang Date: Fri, 15 May 2026 03:52:15 +0900 Subject: ASoC: ti: omap-dmic: Fix IS_ERR() vs NULL check bug in omap_dmic_select_fclk() clk_get_parent() returns NULL when the clock has no parent (or when the input clk is NULL); it never returns an ERR_PTR. The current IS_ERR(mux) check therefore never triggers - a NULL return falls through silently to clk_set_parent(NULL, parent_clk), which simply fails with -EINVAL. Use a NULL check so the dedicated error path runs and the prior clk_get() reference is released via clk_put(). Signed-off-by: Ingyu Jang Acked-by: Sen Wang Link: https://patch.msgid.link/20260514185215.3753998-1-ingyujang25@korea.ac.kr Signed-off-by: Mark Brown --- sound/soc/ti/omap-dmic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/ti/omap-dmic.c b/sound/soc/ti/omap-dmic.c index fb92bb88eb5c..f6c393c9489d 100644 --- a/sound/soc/ti/omap-dmic.c +++ b/sound/soc/ti/omap-dmic.c @@ -328,7 +328,7 @@ static int omap_dmic_select_fclk(struct omap_dmic *dmic, int clk_id, } mux = clk_get_parent(dmic->fclk); - if (IS_ERR(mux)) { + if (!mux) { dev_err(dmic->dev, "can't get fck mux parent\n"); clk_put(parent_clk); return -ENODEV; -- cgit v1.2.3 From 9c0f5bbff146f09f449dd528addeabfb68aef997 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 14 May 2026 16:18:54 +0100 Subject: ASoC: cs35l56: Log SoundWire status updates only on changes The SoundWire slave update_status() callback can be invoked when the status has not changed. To prevent large amounts of log noise with debug enabled, log them only when the status changes. This also helps with understanding them, because they now log an actual change in state. Signed-off-by: Simon Trimmer Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260514151854.695145-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-sdw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index 9dc47fec1ea0..d344217de7aa 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -385,18 +385,19 @@ static int cs35l56_sdw_update_status(struct sdw_slave *peripheral, switch (status) { case SDW_SLAVE_ATTACHED: - dev_dbg(cs35l56->base.dev, "%s: ATTACHED\n", __func__); cs35l56->sdw_in_clock_stop_1 = false; if (cs35l56->sdw_attached) break; + dev_dbg(cs35l56->base.dev, "%s: ATTACHED\n", __func__); if (!cs35l56->base.init_done || cs35l56->soft_resetting) cs35l56_sdw_init(peripheral); cs35l56->sdw_attached = true; break; case SDW_SLAVE_UNATTACHED: - dev_dbg(cs35l56->base.dev, "%s: UNATTACHED\n", __func__); + if (cs35l56->sdw_attached) + dev_dbg(cs35l56->base.dev, "%s: UNATTACHED\n", __func__); cs35l56->sdw_attached = false; break; default: -- cgit v1.2.3 From 6ea68a8dc7d2711504d944811981a5304af7d7a9 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 11 May 2026 12:53:17 -0500 Subject: scsi: sd: Fix return code handling in sd_spinup_disk() As found by smatch-ci, scsi_execute_cmd() can return negative or positve values so we should use a int instead of unsigned int. Fixes: b4d0c33a32c3 ("scsi: sd: Fix sshdr use in sd_spinup_disk") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-scsi/agFbI7E6JQwd3wGW@stanley.mountain/T/#u Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260511175317.114007-1-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index adc3fa55ca2c..599e75f33334 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2476,8 +2476,7 @@ sd_spinup_disk(struct scsi_disk *sdkp) { static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; - int spintime, sense_valid = 0; - unsigned int the_result; + int the_result, spintime, sense_valid = 0; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ -- cgit v1.2.3 From b52a8d52c3125ec9a93106ed816582368de34426 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 17:04:20 -0400 Subject: scsi: isci: Fix use-after-free in device removal path The ISCI completion tasklet is initialized in isci_host_alloc() (drivers/scsi/isci/init.c:496) and scheduled from both MSI-X and legacy interrupt handlers (drivers/scsi/isci/host.c:223,613). isci_host_deinit() stops the controller and waits for stop completion, but it never kills completion_tasklet before teardown continues. A top-of-function tasklet_kill() is not sufficient here: interrupts are only disabled when isci_host_stop_complete() runs, so until wait_for_stop() returns the IRQ handlers can still requeue the tasklet. The tasklet callback also re-enables interrupts after draining completions, so killing the tasklet before the source is quiesced leaves the same race open. Once wait_for_stop() returns, no further IRQ-driven scheduling can occur. Kill completion_tasklet there so teardown cannot race a queued tasklet running on a dead ihost. On remove or unload, the stale callback can otherwise dereference ihost and touch ihost->smu_registers after the host lifetime ends. A UML + KASAN analogue reproduced the failure class both with no tasklet_kill() and with tasklet_kill() placed before source quiesce, and stayed clean once the kill happened after quiescing the scheduling source. This mirrors commit f6ab594672d4 ("scsi: aic94xx: fix use-after-free in device removal path"), but ISCI needs the kill after wait_for_stop(). Fixes: 6f231dda6808 ("isci: Intel(R) C600 Series Chipset Storage Control Unit Driver") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Assisted-by: Codex:gpt-5-4 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419210420.2134639-1-michael.bommarito@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/host.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 6d2f4c831df7..ff199bab5d1a 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -1252,6 +1252,9 @@ void isci_host_deinit(struct isci_host *ihost) wait_for_stop(ihost); + /* No further IRQ-driven scheduling can happen past wait_for_stop(). */ + tasklet_kill(&ihost->completion_tasklet); + /* phy stop is after controller stop to allow port and device to * go idle before shutting down the phys, but the expectation is * that i/o has been shut off well before we reach this -- cgit v1.2.3 From 0d435a7ebcd4e97e47673c1ab6fb27f973a053ec Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 13 May 2026 21:08:52 +0200 Subject: ASoC: codecs: fs210x: fix possible buffer overflow In fs210x_effect_scene_info(), a string was copied like this: strscpy(DST, SRC, strlen(SRC) + 1); A buffer overflow would happen if strlen(SRC) >= sizeof(DST). Actually, strscpy() must be used this way: strscpy(DST, SRC, sizeof(DST)); strscpy(DST, SRC); // defaults to sizeof(DST) Fixes: 756117701779 ("ASoC: codecs: Add FourSemi FS2104/5S audio amplifier driver") Signed-off-by: Alexander A. Klimov Link: https://patch.msgid.link/20260513190852.196723-2-grandmaster@al2klimov.de Signed-off-by: Mark Brown --- sound/soc/codecs/fs210x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/fs210x.c b/sound/soc/codecs/fs210x.c index e6195b71adad..eda716f817b5 100644 --- a/sound/soc/codecs/fs210x.c +++ b/sound/soc/codecs/fs210x.c @@ -968,7 +968,7 @@ static int fs210x_effect_scene_info(struct snd_kcontrol *kcontrol, if (scene->name) name = scene->name; - strscpy(uinfo->value.enumerated.name, name, strlen(name) + 1); + strscpy(uinfo->value.enumerated.name, name); return 0; } -- cgit v1.2.3 From b71cb088b2e3427924a470fc43e7aedb8a40d2e3 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 24 Apr 2026 09:39:23 +0800 Subject: scsi: target: tcm_loop: Fix NULL ptr dereference The TCM_LOOP LUN creation process calls device_register() to create the device, which in turn invokes tcm_loop_driver_probe() registered with the TCM_LOOP bus to create and register the scsi_host. However, if the scsi_host memory allocation fails or scsi_add_host() fails, the device_register() process still returns success. Subsequently, when the user binds the LUN to a specific backend device, it accesses the NULL or freed scsi_host. Crash Call Trace: RIP: 0010:scsi_is_host_device+0x7/0x20 scsi_alloc_target+0x32/0x2c0 __scsi_add_device+0x41/0xf0 scsi_add_device+0xd/0x30 tcm_loop_port_link+0x25/0x50 [tcm_loop] target_fabric_port_link+0x9c/0xb0 [target_core_mod] ... This issue is fixed by: 1. Setting the tcm_loop_hba's scsi_host to NULL, if scsi_add_host() fails. 2. Checking the tcm_loop_hba's scsi_host after device_register(). 3. Checking the tcm_loop_hba's scsi_host in tcm_loop_driver_remove(). Fixes: 3703b2c5d041 ("[SCSI] tcm_loop: Add multi-fabric Linux/SCSI LLD fabric module") Signed-off-by: Guixin Liu Reviewed-by: Mike Christie Link: https://patch.msgid.link/20260424013923.25998-1-kanie@linux.alibaba.com Signed-off-by: Martin K. Petersen --- drivers/target/loopback/tcm_loop.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index a25fd826b542..110297345751 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -393,6 +393,7 @@ static int tcm_loop_driver_probe(struct device *dev) if (error) { pr_err("%s: scsi_add_host failed\n", __func__); scsi_host_put(sh); + tl_hba->sh = NULL; return -ENODEV; } return 0; @@ -406,8 +407,10 @@ static void tcm_loop_driver_remove(struct device *dev) tl_hba = to_tcm_loop_hba(dev); sh = tl_hba->sh; - scsi_remove_host(sh); - scsi_host_put(sh); + if (sh) { + scsi_remove_host(sh); + scsi_host_put(sh); + } } static void tcm_loop_release_adapter(struct device *dev) @@ -436,6 +439,11 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host return -ENODEV; } + if (!tl_hba->sh) { + device_unregister(&tl_hba->dev); + return -ENODEV; + } + return 0; } -- cgit v1.2.3 From 6fc7e8a3b8115294f60f5c89de27330bf1b9c98e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 May 2026 13:46:13 -0300 Subject: iommu: Fix loss of errno on map failure for classic ops A typo, likely from a rebase, inverted the condition and caused errors to be lost. Fix it to be "if (ret)". This was breaking iommu_create_device_direct_mappings() on drivers that don't use iommupt and don't fully set up their domain in alloc_pages() (i.e., SMMUv2). In this case the first call of iommu_create_device_direct_mappings() should fail due to the incompletely initialized domain. Since it wrongly returns success, the second call to iommu_create_device_direct_mappings() doesn't happen and IOMMU_RESV_DIRECT is never set up. Cc: stable@vger.kernel.org Fixes: d6c65b0fd621 ("iommupt: Avoid rewalking during map") Reported-by: Josua Mayer Closes: https://lore.kernel.org/all/321c2e57-6a17-4aef-ba42-d2ebd577e472@solid-run.com/ Signed-off-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Reviewed-by: Samiullah Khawaja Reviewed-by: Mostafa Saleh Tested-by: Josua Mayer Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e7bd28cc77ee..05f78cfd1f1d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2709,7 +2709,7 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, return 0; } ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp); - if (!ret) + if (ret) return ret; trace_map(iova, paddr, size); -- cgit v1.2.3 From b948a87228482235afbaf5f4d8037860b5c470fd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 May 2026 13:46:14 -0300 Subject: iommu: Fix up map/unmap debugging for iommupt domains Sashiko noticed a few issues in this path, and a few more were found on review. Tidy them up further. These are intertwined because the debug code depends on some of the WARN_ONs to function right: Lift into iommu_map_nosync(): - The might_sleep_if() - 0 pgsize_bitmap WARN_ON - Promote the illegal domain->type to a WARN_ON - WARN_ON for illegal gfp flags Then remove the return 0 since it is now safe to call iommu_debug_map(). Lift into __iommu_unmap(): - 0 pgsize_bitmap WARN_ON - Promote the illegal domain->type to a WARN_ON - iommu_debug_unmap_begin() This now pairs with the unconditional iommu_debug_map() on the mapping side. Thus iommu debugging now works for iommupt along with some of the other debugging features. Fixes: 99fb8afa16ad ("iommupt: Directly call iommupt's unmap_range()") Fixes: d6c65b0fd621 ("iommupt: Avoid rewalking during map") Signed-off-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Reviewed-by: Samiullah Khawaja Reviewed-by: Mostafa Saleh Tested-by: Josua Mayer Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 05f78cfd1f1d..c21d1e3c4876 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2623,19 +2623,9 @@ static int __iommu_map_domain_pgtbl(struct iommu_domain *domain, size_t orig_size = size; int ret = 0; - might_sleep_if(gfpflags_allow_blocking(gfp)); - - if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) - return -EINVAL; - - if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) + if (WARN_ON(!ops->map_pages)) return -ENODEV; - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; - /* find out the minimum page size supported */ min_pagesz = 1 << __ffs(domain->pgsize_bitmap); @@ -2697,6 +2687,15 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, struct pt_iommu *pt = iommupt_from_domain(domain); int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags or illegal domains */ + if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) || + !domain->pgsize_bitmap || + (gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM)))) + return -EINVAL; + if (pt) { size_t mapped = 0; @@ -2706,11 +2705,12 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, iommu_unmap(domain, iova, mapped); return ret; } - return 0; + } else { + ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, + gfp); + if (ret) + return ret; } - ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp); - if (ret) - return ret; trace_map(iova, paddr, size); iommu_debug_map(domain, paddr, size); @@ -2742,10 +2742,7 @@ __iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova, size_t unmapped_page, unmapped = 0; unsigned int min_pagesz; - if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) - return 0; - - if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) + if (WARN_ON(!ops->unmap_pages)) return 0; /* find out the minimum page size supported */ @@ -2764,8 +2761,6 @@ __iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova, pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); - iommu_debug_unmap_begin(domain, iova, size); - /* * Keep iterating until we either unmap 'size' bytes (or more) * or we hit an area that isn't mapped. @@ -2801,6 +2796,12 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, struct pt_iommu *pt = iommupt_from_domain(domain); size_t unmapped; + if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) || + !domain->pgsize_bitmap)) + return 0; + + iommu_debug_unmap_begin(domain, iova, size); + if (pt) unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather); else -- cgit v1.2.3 From 0735c54804c709d1b292f3b6947cfb560b2ce552 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 May 2026 13:46:15 -0300 Subject: iommu: Handle unmap error when iommu_debug is enabled Sashiko noticed a latent bug where the map error flow called iommu_unmap() which calls iommu_debug_unmap_begin()/iommu_debug_unmap_end() however since this is an error path the map flow never actually established the original iommu_debug_map() it will malfunction. Lift the unmap error handling into iommu_map_nosync() and reorder it so the trace_map()/iommu_debug_map() records the partial mapping and then immediately unmaps it. This avoid creating the unbalanced tracking and provides saner tracing instead of a unmap unmatched to any map. Fixes: ccc21213f013 ("iommu: Add calls for IOMMU_DEBUG_PAGEALLOC") Signed-off-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Reviewed-by: Samiullah Khawaja Reviewed-by: Mostafa Saleh Tested-by: Josua Mayer Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 49 +++++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c21d1e3c4876..d1a9e713d3a0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2615,12 +2615,11 @@ out_set_count: static int __iommu_map_domain_pgtbl(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, - size_t size, int prot, gfp_t gfp) + size_t size, int prot, gfp_t gfp, + size_t *mapped) { const struct iommu_domain_ops *ops = domain->ops; - unsigned long orig_iova = iova; unsigned int min_pagesz; - size_t orig_size = size; int ret = 0; if (WARN_ON(!ops->map_pages)) @@ -2643,31 +2642,25 @@ static int __iommu_map_domain_pgtbl(struct iommu_domain *domain, pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t pgsize, count, mapped = 0; + size_t pgsize, count, op_mapped = 0; pgsize = iommu_pgsize(domain, iova, paddr, size, &count); pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", iova, &paddr, pgsize, count); ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, - gfp, &mapped); + gfp, &op_mapped); /* * Some pages may have been mapped, even if an error occurred, * so we should account for those so they can be unmapped. */ - size -= mapped; - + *mapped += op_mapped; if (ret) - break; - - iova += mapped; - paddr += mapped; - } + return ret; - /* unroll mapping in case something went wrong */ - if (ret) { - iommu_unmap(domain, orig_iova, orig_size - size); - return ret; + size -= op_mapped; + iova += op_mapped; + paddr += op_mapped; } return 0; } @@ -2685,6 +2678,7 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { struct pt_iommu *pt = iommupt_from_domain(domain); + size_t mapped = 0; int ret; might_sleep_if(gfpflags_allow_blocking(gfp)); @@ -2696,24 +2690,19 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, __GFP_HIGHMEM)))) return -EINVAL; - if (pt) { - size_t mapped = 0; - + if (pt) ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp, &mapped); - if (ret) { - iommu_unmap(domain, iova, mapped); - return ret; - } - } else { + else ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, - gfp); - if (ret) - return ret; - } + gfp, &mapped); - trace_map(iova, paddr, size); - iommu_debug_map(domain, paddr, size); + trace_map(iova, paddr, mapped); + iommu_debug_map(domain, paddr, mapped); + if (ret) { + iommu_unmap(domain, iova, mapped); + return ret; + } return 0; } -- cgit v1.2.3 From 8ef3f77c440005c7f04229a75976bfc078364247 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 May 2026 13:46:16 -0300 Subject: iommupt: Check for missing PAGE_SIZE in the pgsize_bitmap Sashiko pointed out that the driver could drop PAGE_SIZE from the pgsize_bitmap. That is technically allowed but nothing does it, and such an iommu_domain would not be used with the DMA API today. Still, it is against the design and it is trivial to fix up. Lift the PT_WARN_ON to the if branch and just skip the fast path. Fixes: dcd6a011a8d5 ("iommupt: Add map_pages op") Signed-off-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Reviewed-by: Samiullah Khawaja Tested-by: Josua Mayer Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 19b6daf88f2a..4877b05291c9 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -920,8 +920,8 @@ static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova, return ret; /* Calculate target page size and level for the leaves */ - if (pt_has_system_page_size(common) && len == PAGE_SIZE) { - PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE)); + if (pt_has_system_page_size(common) && len == PAGE_SIZE && + likely(pgsize_bitmap & PAGE_SIZE)) { if (log2_mod(iova | paddr, PAGE_SHIFT)) return -ENXIO; map.leaf_pgsize_lg2 = PAGE_SHIFT; -- cgit v1.2.3 From 58829512ad461af8f35941069c209941e3a97b65 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 May 2026 13:46:17 -0300 Subject: iommupt: Fix the end_index calculation in __map_range_leaf() Sashiko noticed a mismatch of units in this math: num_leaves is actually the number of leaf *entries* (so a 16-item contiguous leaf is one num_leaves), while index is in items. The mismatch in maths causes __map_range_leaf() to exit early instead of efficiently filling a larger range of contiguous PTEs. The early exit is caught by the functions above and then __map_range_leaf() is re-invoked, so there is no functional issue. Correct the misuse of units by adjusting num_leaves with the leaf size and avoid the performance cost of looping externally. There are also some mismatched types for num_leaves; simplify things to remove the duplicated calculations. Fixes: d6c65b0fd621 ("iommupt: Avoid rewalking during map") Signed-off-by: Jason Gunthorpe Reviewed-by: Samiullah Khawaja Reviewd-by: Pranjal Shrivastava Tested-by: Josua Mayer Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 4877b05291c9..dc91fb4e2f61 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -534,10 +534,12 @@ static int __map_range_leaf(struct pt_range *range, void *arg, struct pt_state pts = pt_init(range, level, table); struct pt_iommu_map_args *map = arg; unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2; + unsigned int leaves_avail; unsigned int start_index; pt_oaddr_t oa = map->oa; - unsigned int num_leaves; + pt_vaddr_t num_leaves; unsigned int orig_end; + unsigned int step_lg2; pt_vaddr_t last_va; unsigned int step; bool need_contig; @@ -546,21 +548,25 @@ static int __map_range_leaf(struct pt_range *range, void *arg, PT_WARN_ON(map->leaf_level != level); PT_WARN_ON(!pt_can_have_leaf(&pts)); - step = log2_to_int_t(unsigned int, - leaf_pgsize_lg2 - pt_table_item_lg2sz(&pts)); - need_contig = leaf_pgsize_lg2 != pt_table_item_lg2sz(&pts); + step_lg2 = leaf_pgsize_lg2 - pt_table_item_lg2sz(&pts); + step = log2_to_int_t(unsigned int, step_lg2); + need_contig = step_lg2 != 0; _pt_iter_first(&pts); start_index = pts.index; orig_end = pts.end_index; - if (pts.index + map->num_leaves < pts.end_index) { + leaves_avail = + log2_div_t(unsigned int, pts.end_index - pts.index, step_lg2); + if (map->num_leaves <= leaves_avail) { /* Need to stop in the middle of the table to change sizes */ - pts.end_index = pts.index + map->num_leaves; + pts.end_index = pts.index + log2_mul(map->num_leaves, step_lg2); num_leaves = 0; } else { - num_leaves = map->num_leaves - (pts.end_index - pts.index); + num_leaves = map->num_leaves - leaves_avail; } + PT_WARN_ON( + log2_mod_t(unsigned int, pts.end_index - pts.index, step_lg2)); do { pts.type = pt_load_entry_raw(&pts); if (pts.type != PT_ENTRY_EMPTY || need_contig) { -- cgit v1.2.3 From 1bb54043ff309795c90ccadd8a6e6b13ac40ec4e Mon Sep 17 00:00:00 2001 From: Tomasz Jeznach Date: Tue, 12 May 2026 10:37:44 -0700 Subject: MAINTAINERS: update Tomasz Jeznach's email address Switch from the previous work address to a linux.dev account, as the work address is no longer actively monitored. Signed-off-by: Tomasz Jeznach Signed-off-by: Joerg Roedel --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index eec4a740f7ca..bd6a72f29d9c 100644 --- a/.mailmap +++ b/.mailmap @@ -857,6 +857,7 @@ Tobias Klauser Tobias Klauser Tobias Klauser Todor Tomov +Tomasz Jeznach Tony Luck Trilok Soni TripleX Chung diff --git a/MAINTAINERS b/MAINTAINERS index b2040011a386..55c6ab1a974a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22944,7 +22944,7 @@ N: riscv K: riscv RISC-V IOMMU -M: Tomasz Jeznach +M: Tomasz Jeznach L: iommu@lists.linux.dev L: linux-riscv@lists.infradead.org S: Maintained -- cgit v1.2.3 From c0e4fffc0f474b7ed10adee4ab2bc1a66d36fc72 Mon Sep 17 00:00:00 2001 From: Robertus Diawan Chris Date: Fri, 8 May 2026 10:39:14 +0700 Subject: ALSA: scarlett2: Add missing error check when initialise Autogain Status When initialise new control with scarlett2_add_new_ctl() function for Autogain Status, scarlett2_add_new_ctl() might throw an error. So, add error check after initialise new control for Autogain Status. This is reported by Coverity Scan with CID 1598781 as UNUSED_VALUE. Fixes: 0a995e38dc44 ("ALSA: scarlett2: Add support for software-controllable input gain") Signed-off-by: Robertus Diawan Chris Link: https://patch.msgid.link/20260508033914.111596-1-robertusdchris@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 8eaa96222759..0f83f8981213 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -6707,6 +6707,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer) err = scarlett2_add_new_ctl( mixer, &scarlett2_autogain_status_ctl, i, 1, s, &private->autogain_status_ctls[i]); + if (err < 0) + return err; } /* Add autogain target controls */ -- cgit v1.2.3 From 2149c011510cbdcf183a13b26756e4a02071f0f2 Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Fri, 8 May 2026 12:49:34 +0000 Subject: ALSA: usb-audio: Add iface reset and delay quirk for TTGK Technology USB-C Audio Setting up the interface when suspended/resumeing fail on this card. Adding a reset and delay quirk will eliminate this problem. usb 1-1: new full-speed USB device number 2 using xhci-hcd usb 1-1: New USB device found, idVendor=3302, idProduct=17c2 usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-1: Product: USB-C Audio usb 1-1: Manufacturer: TTGK Technology usb 1-1: SerialNumber: 170120210706 Signed-off-by: Lianqin Hu Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/TYUPR06MB621720E4E8F99A42E162FD51D23D2@TYUPR06MB6217.apcprd06.prod.outlook.com --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 17983d9774f8..31cbe383ae65 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2479,6 +2479,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x3255, 0x0000, /* Luxman D-10X */ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), + DEVICE_FLG(0x3302, 0x17c2, /* TTGK Technology USB-C Audio */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x339b, 0x3a07, /* Synaptics HONOR USB-C HEADSET */ QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x3443, 0x930d, /* NexiGo N930W 60fps Webcam */ -- cgit v1.2.3 From dd074f04e04648d89d9d10ae9846cd057c97b385 Mon Sep 17 00:00:00 2001 From: Nicholas Bonello Date: Fri, 8 May 2026 18:55:07 -0400 Subject: ALSA: hda/realtek: Fix Legion 7 16ITHG6 speaker amp binding The Lenovo Legion 7 16ITHG6 uses codec SSID 17aa:3855, but its PCI SSID is 17aa:3811. The latter is now also used by the Legion S7 15IMH05 quirk, which is matched before codec SSID fallback and incorrectly routes Legion 7 16ITHG6 machines to ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS. That fixup does not bind the CLSA0101 CS35L41 companion amplifiers, making the built-in speakers silent even though playback appears to be active. Add a codec SSID quirk for 17aa:3855 before the conflicting PCI SSID quirk so that the Legion 7 16ITHG6 uses ALC287_FIXUP_LEGION_16ITHG6. This restores CS35L41 firmware loading and binds both speaker amplifiers. Fixes: 67f4c61a73e9 ("ALSA: hda/realtek: Add quirk for Legion S7 15IMH") Cc: stable@vger.kernel.org Tested-by: Nicholas Bonello Assisted-by: Codex:GPT-5 Signed-off-by: Nicholas Bonello Link: https://patch.msgid.link/20260508225507.47667-1-hadobedo@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 55bb98e2e55a..4e2c8401c404 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7675,11 +7675,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x3802, "DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8", ALC287_FIXUP_TAS2781_I2C), - /* Yoga Pro 9 16IMH9 shares PCI SSID 17aa:3811 with Legion S7 15IMH05; - * use codec SSID to distinguish them + /* Yoga Pro 9 16IMH9 and Legion 7 16ITHG6 share PCI SSID 17aa:3811 + * with Legion S7 15IMH05; use codec SSID to distinguish them */ HDA_CODEC_QUIRK(0x17aa, 0x38d5, "Lenovo Yoga Pro 9 16IMH9", ALC287_FIXUP_TAS2781_I2C), HDA_CODEC_QUIRK(0x17aa, 0x38d6, "Lenovo Yoga Pro 9 16IMH9", ALC287_FIXUP_TAS2781_I2C), + HDA_CODEC_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6), SND_PCI_QUIRK(0x17aa, 0x3811, "Legion S7 15IMH05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), -- cgit v1.2.3 From 0a9c56dd387605d17dabeedd9fdd2c4c1d0bab7b Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Wed, 13 May 2026 15:57:00 +0900 Subject: drm/loongson: Use managed KMS polling lsdc_pci_probe() initializes KMS polling before setting up vblank support, requesting the IRQ and registering the DRM device. If any of those later steps fails, probe returns without finalizing polling. The driver also never finalizes polling on regular removal. Use drmm_kms_helper_poll_init() so polling is tied to the DRM device lifetime and automatically finalized on probe failure and device removal. This issue was identified during our ongoing static-analysis research while reviewing kernel code. Fixes: f39db26c5428 ("drm: Add kms driver for loongson display controller") Cc: stable@vger.kernel.org Co-developed-by: Ijae Kim Signed-off-by: Ijae Kim Reviewed-by: Thomas Zimmermann Acked-by: Jianmin Lv Reviewed-by: Huacai Chen Signed-off-by: Myeonghun Pak Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20260513065706.23803-1-mhun512@gmail.com --- drivers/gpu/drm/loongson/lsdc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/loongson/lsdc_drv.c b/drivers/gpu/drm/loongson/lsdc_drv.c index 1ece1ea42f78..34405073c4d4 100644 --- a/drivers/gpu/drm/loongson/lsdc_drv.c +++ b/drivers/gpu/drm/loongson/lsdc_drv.c @@ -293,7 +293,7 @@ static int lsdc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) vga_client_register(pdev, lsdc_vga_set_decode); - drm_kms_helper_poll_init(ddev); + drmm_kms_helper_poll_init(ddev); if (loongson_vblank) { ret = drm_vblank_init(ddev, descp->num_of_crtc); -- cgit v1.2.3 From 814b2c9b30e56074e11fc0a6e5419b3fee0639bc Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Mon, 11 May 2026 01:36:37 -0300 Subject: ALSA: usb-audio: qcom: Check offload mapping failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uaudio_transfer_buffer_setup() calls dma_get_sgtable() and then passes the sg_table to uaudio_iommu_map_xfer_buf() without checking whether sg table construction succeeded. If dma_get_sgtable() fails, the sg_table contents are not valid. uaudio_iommu_map_pa() also ignores iommu_map() failures for the event and transfer rings and still returns the allocated IOVA to the QMI response. That can expose an unmapped IOVA to the audio DSP. For transfer rings, the failed mapping also leaves the IOVA allocator state marked in use. Check both operations. Free the coherent transfer buffer when sg table construction fails, free the sg table when transfer-buffer IOMMU mapping fails, and release the transfer-ring IOVA if iommu_map() fails. Also return the existing event-ring IOVA when the event ring is already mapped, matching the pre-split helper behavior. Fixes: 326bbc348298 ("ALSA: usb-audio: qcom: Introduce QC USB SND offloading support") Fixes: 44499ecb4f28 ("ALSA: usb: qcom: Fix false-positive address space check") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260511-alsa-usb-qcom-offload-map-errors-v1-1-6502695e58bc@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/qcom/qc_audio_offload.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 5f993b88448c..a0009503b2c5 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -565,6 +565,7 @@ static unsigned long uaudio_iommu_map_pa(enum mem_type mtype, bool dma_coherent, unsigned long iova = 0; bool map = true; int prot = uaudio_iommu_map_prot(dma_coherent); + int ret; switch (mtype) { case MEM_EVENT_RING: @@ -582,10 +583,24 @@ static unsigned long uaudio_iommu_map_pa(enum mem_type mtype, bool dma_coherent, dev_err(uaudio_qdev->data->dev, "unknown mem type %d\n", mtype); } - if (!iova || !map) + if (!iova) return 0; - iommu_map(uaudio_qdev->data->domain, iova, pa, size, prot, GFP_KERNEL); + if (!map) + return iova; + + ret = iommu_map(uaudio_qdev->data->domain, iova, pa, size, prot, + GFP_KERNEL); + if (ret) { + dev_err(uaudio_qdev->data->dev, + "failed to map %zu bytes at iova 0x%08lx: %d\n", + size, iova, ret); + if (mtype == MEM_XFER_RING) + uaudio_put_iova(iova, size, + &uaudio_qdev->xfer_ring_list, + &uaudio_qdev->xfer_ring_iova_size); + return 0; + } return iova; } @@ -1054,15 +1069,17 @@ static int uaudio_transfer_buffer_setup(struct snd_usb_substream *subs, if (!xfer_buf) return -ENOMEM; - dma_get_sgtable(subs->dev->bus->sysdev, &xfer_buf_sgt, xfer_buf, - xfer_buf_dma, len); + ret = dma_get_sgtable(subs->dev->bus->sysdev, &xfer_buf_sgt, xfer_buf, + xfer_buf_dma, len); + if (ret) + goto free_xfer_buf; /* map the physical buffer into sysdev as well */ xfer_buf_dma_sysdev = uaudio_iommu_map_xfer_buf(dma_coherent, len, &xfer_buf_sgt); if (!xfer_buf_dma_sysdev) { ret = -ENOMEM; - goto unmap_sync; + goto free_sgt; } mem_info->dma = xfer_buf_dma; @@ -1073,7 +1090,9 @@ static int uaudio_transfer_buffer_setup(struct snd_usb_substream *subs, return 0; -unmap_sync: +free_sgt: + sg_free_table(&xfer_buf_sgt); +free_xfer_buf: usb_free_coherent(subs->dev, len, xfer_buf, xfer_buf_dma); return ret; -- cgit v1.2.3 From 74e8409821ac8cda70bf23eb593f2c7f6e3b5a2f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 11 May 2026 11:41:48 +0100 Subject: ALSA: doc: cs35l56: Update path to HDA driver source The HDA drivers were moved to sound/hda/... so update a Documentation reference that still pointed to the old location. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260511104148.36382-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- Documentation/sound/codecs/cs35l56.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/sound/codecs/cs35l56.rst b/Documentation/sound/codecs/cs35l56.rst index d5363b08f515..b3f8c1c23851 100644 --- a/Documentation/sound/codecs/cs35l56.rst +++ b/Documentation/sound/codecs/cs35l56.rst @@ -40,7 +40,7 @@ There are two drivers in the kernel *For systems using SoundWire*: sound/soc/codecs/cs35l56.c and associated files -*For systems using HDA*: sound/pci/hda/cs35l56_hda.c +*For systems using HDA*: sound/hda/codecs/side-codecs/cs35l56_hda.c Firmware ======== -- cgit v1.2.3 From d02d2d51a50d1bbf44a50eda094aa2b10fecf023 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Mon, 11 May 2026 15:15:58 -0300 Subject: ALSA: hda/realtek: Limit mic boost on Positivo DN50E The internal mic boost on the Positivo DN50E is too high. Fix this by applying the ALC269_FIXUP_LIMIT_INT_MIC_BOOST fixup to the machine to limit the gain. Signed-off-by: Edson Juliano Drosdeck Link: https://patch.msgid.link/20260511181558.670563-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 4e2c8401c404..3323793cdc14 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7830,6 +7830,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1e39, 0xca14, "MEDION NM14LNL", ALC233_FIXUP_MEDION_MTL_SPK), + SND_PCI_QUIRK(0x1e50, 0x7007, "Positivo DN50E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1ee7, 0x2078, "HONOR BRB-X M1010", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1ee7, 0x2081, "HONOR MRB-XXX M1020", ALC256_FIXUP_HONOR_MRB_XXX_M1020_AUDIO), SND_PCI_QUIRK(0x1f4c, 0xe001, "Minisforum V3 (SE)", ALC245_FIXUP_BASS_HP_DAC), -- cgit v1.2.3 From 67c73815220784074ff13ec07df955911caf1b73 Mon Sep 17 00:00:00 2001 From: Daniel Schaefer Date: Wed, 13 May 2026 23:55:13 +0800 Subject: ALSA: hda/realtek: fix mic boost on Framework PTL In addition to the mic jack fix, also need to avoid boosting the internal mic too much, otherwise >50% input volume clips a lot. Also add a second SSID. We have one for the classic chassis/speaker and one for the new Pro chassis/speaker. To: Jaroslav Kysela To: Takashi Iwai To: linux-sound@vger.kernel.org Cc: Dustin L. Howett Cc: linux@frame.work Signed-off-by: Daniel Schaefer Link: https://patch.msgid.link/20260513155513.11683-1-dhs@frame.work Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 3323793cdc14..ef1eccda70df 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -4095,6 +4095,7 @@ enum { ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED, ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE, + ALC295_FIXUP_FRAMEWORK_LAPTOP_LIMIT_INT_MIC_BOOST, ALC287_FIXUP_LEGION_16ITHG6, ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, @@ -6346,6 +6347,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC295_FIXUP_FRAMEWORK_LAPTOP_LIMIT_INT_MIC_BOOST] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE, + }, [ALC287_FIXUP_LEGION_16ITHG6] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_legion_16ithg6_speakers, @@ -7856,7 +7863,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000b, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x000c, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0xf111, 0x000f, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x000f, "Framework Laptop 13 Pro PTL", ALC295_FIXUP_FRAMEWORK_LAPTOP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0xf111, 0x010f, "Framework Laptop 13 PTL", ALC295_FIXUP_FRAMEWORK_LAPTOP_LIMIT_INT_MIC_BOOST), #if 0 /* Below is a quirk table taken from the old code. -- cgit v1.2.3 From 2891bb13ef158281736b6314b1ceaef6d08d57f4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 13 May 2026 18:27:58 +0200 Subject: ALSA: hda/cs35l56: Drop malformed default N from Kconfig First of all, it has to be 'default n' (small letter n), otherwise it looks for CONFIG_N which is absent and in case of appearance will enable something unrelated. Second and most important is that 'n' *is* the default 'default' already. Hence just drop malformed line. Signed-off-by: Andy Shevchenko Reviewed-by: Richard Fitzgerald Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260513162758.365972-1-andriy.shevchenko@linux.intel.com --- sound/hda/codecs/side-codecs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/hda/codecs/side-codecs/Kconfig b/sound/hda/codecs/side-codecs/Kconfig index fc5651e555e3..e51964c0a091 100644 --- a/sound/hda/codecs/side-codecs/Kconfig +++ b/sound/hda/codecs/side-codecs/Kconfig @@ -94,7 +94,6 @@ menu "CS35L56 driver options" config SND_HDA_SCODEC_CS35L56_CAL_DEBUGFS bool "CS35L56 create debugfs for factory calibration" - default N depends on DEBUG_FS select SND_SOC_CS35L56_CAL_DEBUGFS_COMMON help -- cgit v1.2.3 From fd87b510f5f543125ecf51e7c706a9f4bc3352be Mon Sep 17 00:00:00 2001 From: Markus Kramer Date: Thu, 14 May 2026 00:28:18 +0200 Subject: ALSA: hda/realtek: Add quirk for Samsung Galaxy Book5 360 headphone The Samsung Galaxy Book5 360 (NP750QHA, PCI subsystem ID 0x144d:0xc902) has severe audio distortion on the 3.5mm headphone jack. Applying ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET corrects the output path configuration, consistent with fixes already applied to other Samsung Galaxy Book models using the same ALC256 codec. Cc: stable@vger.kernel.org Link: https://github.com/thesofproject/linux/issues/5648 Signed-off-by: Markus Kramer Link: https://patch.msgid.link/20260513222818.14351-1-linux@markus-kramer.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index ef1eccda70df..0c501e9fdc27 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7511,6 +7511,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc870, "Samsung Galaxy Book2 Pro (NP950XED)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS), SND_PCI_QUIRK(0x144d, 0xc872, "Samsung Galaxy Book2 Pro (NP950XEE)", ALC298_FIXUP_SAMSUNG_AMP_V2_2_AMPS), SND_PCI_QUIRK(0x144d, 0xc886, "Samsung Galaxy Book3 Pro (NP964XFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), + SND_PCI_QUIRK(0x144d, 0xc902, "Samsung Galaxy Book5 360 (NP750QHA)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc1ca, "Samsung Galaxy Book3 Pro 360 (NP960QFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), SND_PCI_QUIRK(0x144d, 0xc1cb, "Samsung Galaxy Book3 Pro 360 (NP965QFG)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), SND_PCI_QUIRK(0x144d, 0xc1cc, "Samsung Galaxy Book3 Ultra (NT960XFH)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), -- cgit v1.2.3 From 9921941929ab014038c357b30567d93d20393a94 Mon Sep 17 00:00:00 2001 From: Quan Sun <2022090917019@std.uestc.edu.cn> Date: Thu, 14 May 2026 21:22:45 +0800 Subject: ALSA: hda: Fix NULL pointer dereference in snd_hda_ctl_add() snd_hda_ctl_add() dereferences kctl->id.subdevice without checking whether kctl is NULL. Multiple callers in sound/hda/codecs/ca0132.c pass the return value of snd_ctl_new1() directly to snd_hda_ctl_add() without a NULL check: return snd_hda_ctl_add(codec, nid, snd_ctl_new1(&knew, codec)); snd_ctl_new1() returns NULL when the underlying snd_ctl_new() fails on memory allocation (kzalloc_flex),which can occur under memory pressure or via fault injection. Add a NULL check at the entry of snd_hda_ctl_add(), matching the pattern already used by snd_ctl_add_replace() at the same call path (sound/core/control.c:515). Return -EINVAL to let callers handle the error gracefully. Fixes: 44f0c9782cc6 ("ALSA: hda/ca0132: Add tuning controls") Signed-off-by: Quan Sun <2022090917019@std.uestc.edu.cn> Link: https://patch.msgid.link/20260514132245.3062884-1-2022090917019@std.uestc.edu.cn Signed-off-by: Takashi Iwai --- sound/hda/common/codec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/hda/common/codec.c b/sound/hda/common/codec.c index c2af2511a831..81f266b9b850 100644 --- a/sound/hda/common/codec.c +++ b/sound/hda/common/codec.c @@ -1699,6 +1699,9 @@ int snd_hda_ctl_add(struct hda_codec *codec, hda_nid_t nid, unsigned short flags = 0; struct hda_nid_item *item; + if (!kctl) + return -EINVAL; + if (kctl->id.subdevice & HDA_SUBDEV_AMP_FLAG) { flags |= HDA_NID_ITEM_AMP; if (nid == 0) -- cgit v1.2.3 From 83dca2530fb3ba63f47bad339d890bc30aa06ab5 Mon Sep 17 00:00:00 2001 From: Jackie Dong Date: Thu, 14 May 2026 23:39:40 +0800 Subject: ALSA: hda/realtek: ALC269 fixup for Lenovo Yoga Pro 7 15ASH111 audio Volume control for the speakers on the Lenovo Yoga Pro 7 15ASH11 laptop doesn't work. The DAC routing is the same as on the ThinkPad X1 Gen7 function, so reuse the alc285_fixup_thinkpad_x1_gen7 to get it working. Signed-off-by: Jackie Dong Link: https://patch.msgid.link/20260514153940.7320-1-xy-jackie@139.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 0c501e9fdc27..9946ae185f4a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7762,6 +7762,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38df, "Y990 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38f9, "Thinkbook 16P Gen5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x38fc, "Lenovo Yoga Pro 7 15ASH11", ALC245_FIXUP_BASS_HP_DAC), SND_PCI_QUIRK(0x17aa, 0x38fd, "ThinkBook plus Gen5 Hybrid", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), -- cgit v1.2.3 From 7d1051ad68df3d584b5f24bfa1fb19f3a24db278 Mon Sep 17 00:00:00 2001 From: Adrien Burnett Date: Thu, 14 May 2026 18:59:05 +0200 Subject: ALSA: hda/realtek: Add mute LED quirk for HP Pavilion Laptop 16-ag0xxx Add a SND_PCI_QUIRK entry for the HP Pavilion Laptop 16-ag0xxx (subsystem 0x103c:0x8cbc, Realtek ALC245). The ALC245_FIXUP_HP_X360_MUTE_LEDS fixup is already used by the neighbouring HP Pavilion Aero Laptop 13-bg0xxx (0x103c:0x8cbd); it chains the master-mute COEF handler with the GPIO mic-mute LED handler, which is what this machine needs. Tested on the affected hardware: both the mute and mic-mute key LEDs respond correctly to the keyboard hotkeys after this change. Cc: Signed-off-by: Adrien Burnett Link: https://patch.msgid.link/20260514165905.21175-1-an.arctic.pigeon@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 9946ae185f4a..2f5d13032fd7 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7200,6 +7200,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8caf, "HP Elite mt645 G8 Mobile Thin Client", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8cbc, "HP Pavilion Laptop 16-ag0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8cbd, "HP Pavilion Aero Laptop 13-bg0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8cde, "HP OmniBook Ultra Flip Laptop 14t", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), -- cgit v1.2.3 From bc62216dc8e221e3781afa14430f45208bfa9af9 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 13 May 2026 09:01:36 +0200 Subject: batman-adv: frag: disallow unicast fragment in fragment batadv_frag_skb_buffer() is called by batadv_batman_skb_recv() when a BATADV_UNICAST_FRAG packet is received. Once all fragments are collected and the packet is reassembled, batadv_recv_frag_packet() calls batadv_batman_skb_recv() again to process the defragmented payload. A malicious sender can craft a BATADV_UNICAST_FRAG packet whose reassembled payload is itself a BATADV_UNICAST_FRAG packet (matryoshka-style nesting). Each nesting level recurses through batadv_batman_skb_recv() without bound, growing the kernel stack until it is exhausted. Since refragmentation or fragments in fragments are not actually allowed, discard all packets which are still BATADV_UNICAST_FRAG packets after the defragmentation process. Cc: stable@kernel.org Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reviewed-by: Yuan Tan Signed-off-by: Sven Eckelmann --- net/batman-adv/fragmentation.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 1152c2ce0c1e..4a594aa2ebf6 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -304,6 +304,31 @@ free: return skb_out; } +/** + * batadv_skb_is_frag() - check if newly merged skb is gain a unicast packet + * @skb: newly merged skb + * + * Return: if newly skb is of type BATADV_UNICAST_FRAG + */ +static bool batadv_skb_is_frag(struct sk_buff *skb) +{ + struct batadv_ogm_packet *batadv_ogm_packet; + + /* packet should hold at least type and version */ + if (unlikely(!pskb_may_pull(skb, 2))) + return false; + + batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; + + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) + return false; + + if (batadv_ogm_packet->packet_type != BATADV_UNICAST_FRAG) + return false; + + return true; +} + /** * batadv_frag_skb_buffer() - buffer fragment for later merge * @skb: skb to buffer @@ -337,6 +362,16 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb, if (!skb_out) goto out_err; + /* fragment in fragment is not allowed. otherwise it is possible + * to exhaust the stack when receiving a matryoshka-style + * "fragments in a fragment packet" + */ + if (batadv_skb_is_frag(skb_out)) { + kfree_skb(skb_out); + skb_out = NULL; + goto out_err; + } + out: ret = true; out_err: -- cgit v1.2.3 From d5487249a81ea658717614009c8f46acc5b7101a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 13 May 2026 10:43:54 +0200 Subject: batman-adv: tp_meter: directly shut down timer on cleanup batadv_tp_sender_cleanup() was calling timer_delete_sync() followed by timer_delete() to guard against the timer handler re-arming itself between the two calls. This double-deletion hack relied on the sending status being set to 0 to suppress re-arming. Replace both calls with a single timer_shutdown_sync(). This function both waits for any running timer callback to complete (like timer_delete_sync()) and permanently disarms the timer so it cannot be re-armed afterwards, making re-arming prevention unconditional and self-documenting. The re-arming property is also required because otherwise: 1. context 0 (batadv_tp_recv_ack()) checks in batadv_tp_reset_sender_timer() if sending is still 1 -> it is 2. context 1 changes in batadv_tp_sender_shutdown() sending to 0 and in this process forces the kthread to stop timer in batadv_tp_sender_cleanup() 3. context 0 continues in batadv_tp_reset_sender_timer() and rearms the timer -> but the reference for it is already gone Cc: stable@kernel.org Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index a3593d104caa..1fd1526059d8 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -401,13 +401,7 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) batadv_tp_list_detach(tp_vars); /* kill the timer and remove its reference */ - timer_delete_sync(&tp_vars->timer); - /* the worker might have rearmed itself therefore we kill it again. Note - * that if the worker should run again before invoking the following - * timer_delete(), it would not re-arm itself once again because the status - * is OFF now - */ - timer_delete(&tp_vars->timer); + timer_shutdown_sync(&tp_vars->timer); batadv_tp_vars_put(tp_vars); } -- cgit v1.2.3 From 6fd9f6e870ea285f05102e8e00e6a7f4495a9a02 Mon Sep 17 00:00:00 2001 From: Matt DeVillier Date: Thu, 7 May 2026 09:58:41 -0500 Subject: ALSA: hda/ca0132: Disable auto-detect on manual output select Commit 778031e1658d ("ALSA: hda/ca0132: Set HP/Speaker auto-detect default from headphone pin verb") enables HP/Speaker auto-detect by default when the headphone pin supports presence detect. With auto-detect enabled, ca0132_select_out() and ca0132_alt_select_out() choose the output from jack presence instead of the manual HP/Speaker selection. This means selecting speaker output while headphones are plugged in updates the control state, but audio still routes to the headphones. Treat an explicit manual output selection as a request to leave auto-detect mode. Clear the HP/Speaker auto-detect switch before applying the manual selection, and notify userspace so the auto-detect control state is updated in mixers. Do this for both the normal HP/Speaker Playback Switch and the alternate Output Select control used by desktop cards. This keeps auto-detect enabled by default for devices with jack presence detection, while preserving the expected behavior that a manual output choice takes effect immediately. Fixes: 778031e1658d ("ALSA: hda/ca0132: Set HP/Speaker auto-detect default from headphone pin verb") Signed-off-by: Matt DeVillier Link: https://lore.kernel.org/CAFTm+6AfeXKf=b2frG4xC5yC4jjM9TkD6c8+dOWWFw6BDjDESw@mail.gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/ca0132.c | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/sound/hda/codecs/ca0132.c b/sound/hda/codecs/ca0132.c index ad533b04ab29..be565ffaade0 100644 --- a/sound/hda/codecs/ca0132.c +++ b/sound/hda/codecs/ca0132.c @@ -5498,6 +5498,30 @@ static int zxr_headphone_gain_set(struct hda_codec *codec, long val) return 0; } +/* + * Manual output selection (HP/Speaker Playback Switch or alt Output Select) + * is meaningful only when HP/Speaker auto-detect is disabled, since the + * select_out path always prefers jack presence when auto-detect is on. When + * the user explicitly chooses an output, turn auto-detect off so the manual + * choice actually takes effect, and notify userspace so the auto-detect + * control reflects the new state. + */ +static void ca0132_disable_hp_auto_detect(struct hda_codec *codec) +{ + struct ca0132_spec *spec = codec->spec; + struct snd_kcontrol *kctl; + + if (!spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID]) + return; + + spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID] = 0; + kctl = snd_hda_find_mixer_ctl(codec, + "HP/Speaker Auto Detect Playback Switch"); + if (kctl) + snd_ctl_notify(codec->card, SNDRV_CTL_EVENT_MASK_VALUE, + &kctl->id); +} + static int ca0132_vnode_switch_set(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -5510,14 +5534,11 @@ static int ca0132_vnode_switch_set(struct snd_kcontrol *kcontrol, int auto_jack; if (nid == VNID_HP_SEL) { - auto_jack = - spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID]; - if (!auto_jack) { - if (ca0132_use_alt_functions(spec)) - ca0132_alt_select_out(codec); - else - ca0132_select_out(codec); - } + ca0132_disable_hp_auto_detect(codec); + if (ca0132_use_alt_functions(spec)) + ca0132_alt_select_out(codec); + else + ca0132_select_out(codec); return 1; } @@ -5978,7 +5999,6 @@ static int ca0132_alt_output_select_put(struct snd_kcontrol *kcontrol, struct ca0132_spec *spec = codec->spec; int sel = ucontrol->value.enumerated.item[0]; unsigned int items = NUM_OF_OUTPUTS; - unsigned int auto_jack; if (sel >= items) return 0; @@ -5988,10 +6008,8 @@ static int ca0132_alt_output_select_put(struct snd_kcontrol *kcontrol, spec->out_enum_val = sel; - auto_jack = spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID]; - - if (!auto_jack) - ca0132_alt_select_out(codec); + ca0132_disable_hp_auto_detect(codec); + ca0132_alt_select_out(codec); return 1; } -- cgit v1.2.3 From a9aba21a539c668a66b58eeb08ad3909e5a54c2a Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Wed, 1 Apr 2026 18:29:24 +0300 Subject: iio: adc: nxp-sar-adc: fix division by zero in write_raw Add a validation check for the sampling frequency value before using it as a divisor. A user writing zero or a negative value to the sampling_frequency sysfs attribute triggers a division by zero in the kernel. Also prevent unsigned integer underflow when the computed cycle count is smaller than NXP_SAR_ADC_CONV_TIME, which would wrap the u32 inpsamp to a huge value. Fixes: 4434072a893e ("iio: adc: Add the NXP SAR ADC support for the s32g2/3 platforms") Signed-off-by: Antoniu Miclaus Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/nxp-sar-adc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/nxp-sar-adc.c b/drivers/iio/adc/nxp-sar-adc.c index 705dd7da1bd2..1711cae7d872 100644 --- a/drivers/iio/adc/nxp-sar-adc.c +++ b/drivers/iio/adc/nxp-sar-adc.c @@ -569,6 +569,9 @@ static int nxp_sar_adc_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: + if (val <= 0) + return -EINVAL; + /* * Configures the sample period duration in terms of the SAR * controller clock. The minimum acceptable value is 8. @@ -577,7 +580,11 @@ static int nxp_sar_adc_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec * sampling timing which gives us the number of cycles expected. * The value is 8-bit wide, consequently the max value is 0xFF. */ - inpsamp = clk_get_rate(info->clk) / val - NXP_SAR_ADC_CONV_TIME; + inpsamp = clk_get_rate(info->clk) / val; + if (inpsamp < NXP_SAR_ADC_CONV_TIME) + return -EINVAL; + + inpsamp -= NXP_SAR_ADC_CONV_TIME; nxp_sar_adc_conversion_timing_set(info, inpsamp); return 0; -- cgit v1.2.3 From a093999355084bdbfe6e97f1dd232e58a1525f0b Mon Sep 17 00:00:00 2001 From: Benoît Monin Date: Wed, 1 Apr 2026 17:24:58 +0200 Subject: iio: buffer: Fix DMA fence leak in iio_buffer_enqueue_dmabuf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit iio_buffer_enqueue_dmabuf() allocates a struct iio_dma_fence (104 bytes, kmalloc-128) via kmalloc_obj()+dma_fence_init(), which sets the initial kref to 1. It then calls dma_resv_add_fence() which takes a second reference (kref=2), and stores a raw pointer in block->fence. On the success path the function returns without calling dma_fence_put() to release the initial reference, so every buffer enqueue permanently leaks one kmalloc-128 allocation. The iio_buffer_cleanup() work item only releases the temporary reference taken during completion signalling by iio_buffer_signal_dmabuf_done(); the initial reference from dma_fence_init() is never released. With four iio_rwdev instances at 240kHz and 512 samples per buffer, this produces ~1875 kmalloc-128 allocations per second matching the observed slab growth exactly. A test with ftrace confirmed that the dma_fence_destroy event was never triggered. Fix by calling dma_fence_put() after dma_resv_add_fence(), transferring ownership of the fence to the DMA reservation object. The DMA fence then gets properly discarded after being signalled. Fixes: 3e26d9f08fbe0 ("iio: core: Add new DMABUF interface infrastructure") Originally-by: James Nuss Signed-off-by: Benoît Monin Reviewed-by: Paul Cercueil Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 46f36a6ed271..5c3df993bea2 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -1909,6 +1909,7 @@ static int iio_buffer_enqueue_dmabuf(struct iio_dev_buffer_pair *ib, dma_resv_add_fence(dmabuf->resv, &fence->base, dma_to_ram ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); + dma_fence_put(&fence->base); dma_resv_unlock(dmabuf->resv); cookie = dma_fence_begin_signalling(); -- cgit v1.2.3 From 49f79cd28f1e3333cbe0d616ce59ead0b24bf34e Mon Sep 17 00:00:00 2001 From: Advait Dhamorikar Date: Tue, 7 Apr 2026 12:50:59 +0530 Subject: iio: magnetometer: st_magn: fix default DRDY pin selection for LIS2MDL The device tree binding for st,lis2mdl does not support st,drdy-int-pin property. However, when no platform data is provided and the property is absent, the driver falls back to default_magn_pdata which hardcodes drdy_int_pin = 2. This causes `st_sensors_set_drdy_int_pin` to fail with -EINVAL because the LIS2MDL sensor settings have no INT2 DRDY mask defined. Fix this by checking the sensor's INT2 DRDY mask availability at probe time and selecting the appropriate default pin. Sensors that do not support INT2 DRDY will default to INT1, while all others retain the existing default of INT2. Fixes: 38934daf7b5c ("iio: magnetometer: st_magn: Provide default platform data") Signed-off-by: Advait Dhamorikar Reviewed-by: Andy Shevchenko Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/st_magn_core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index ef348d316c00..7644bd04654b 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -506,6 +506,11 @@ static const struct st_sensors_platform_data default_magn_pdata = { .drdy_int_pin = 2, }; +/* LIS2MDL only supports DRDY on INT1 */ +static const struct st_sensors_platform_data alt_magn_pdata = { + .drdy_int_pin = 1, +}; + static int st_magn_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *ch, int *val, int *val2, long mask) @@ -628,8 +633,12 @@ int st_magn_common_probe(struct iio_dev *indio_dev) mdata->current_fullscale = &mdata->sensor_settings->fs.fs_avl[0]; mdata->odr = mdata->sensor_settings->odr.odr_avl[0].hz; - if (!pdata) - pdata = (struct st_sensors_platform_data *)&default_magn_pdata; + if (!pdata) { + if (mdata->sensor_settings->drdy_irq.int2.mask) + pdata = (struct st_sensors_platform_data *)&default_magn_pdata; + else + pdata = (struct st_sensors_platform_data *)&alt_magn_pdata; + } err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) -- cgit v1.2.3 From 1f4f0bcc5255dec5c4c3a1551bf49d8c33b69b20 Mon Sep 17 00:00:00 2001 From: Aldo Conte Date: Tue, 7 Apr 2026 17:17:01 +0200 Subject: iio: light: cm3323: fix reg_conf not being initialized correctly The code stores the return value of i2c_smbus_write_word_data() in data->reg_conf; however, this value represents the result of the write operation and not the value actually written to the configuration register. This meant that the contents of data->reg_conf did not truly reflect the contents of the hardware register. Instead, save the value of the register before the write and use this value in the I2C write. The bug was found by code inspection: i2c_smbus_write_word_data() returns 0 on success, not the value written to the register. Tested using i2c-stub on a Raspberry Pi 3B running a custom 6.19.10 kernel. Before loading the driver, the configuration register 0x00 CM3323_CMD_CONF was populated with 0x0030 using `i2cset -y 11 0x10 0x00 0x0030 w`, encoding an integration time of 320ms in bits[6:4]. Due to incorrect initialization of data->reg_conf in cm3323_init(), the print of integration_time returns 0.040000 instead of the expected 0.320000. This happens because the read of the integration_time depends on cm3323_get_it_bits() that is based on the value of data->reg_conf, which is erroneously set to 0. With this fix applied, data->reg_conf correctly saves 0x0030 after init and the successive integration_time reports 0.320000 as expected. Fixes: 8b0544263761 ("iio: light: Add support for Capella CM3323 color sensor") Cc: stable@vger.kernel.org Signed-off-by: Aldo Conte Signed-off-by: Jonathan Cameron --- drivers/iio/light/cm3323.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/cm3323.c b/drivers/iio/light/cm3323.c index 79ad6e2209ca..0fe61b8a7029 100644 --- a/drivers/iio/light/cm3323.c +++ b/drivers/iio/light/cm3323.c @@ -89,15 +89,14 @@ static int cm3323_init(struct iio_dev *indio_dev) /* enable sensor and set auto force mode */ ret &= ~(CM3323_CONF_SD_BIT | CM3323_CONF_AF_BIT); + data->reg_conf = ret; - ret = i2c_smbus_write_word_data(data->client, CM3323_CMD_CONF, ret); + ret = i2c_smbus_write_word_data(data->client, CM3323_CMD_CONF, data->reg_conf); if (ret < 0) { dev_err(&data->client->dev, "Error writing reg_conf\n"); return ret; } - data->reg_conf = ret; - return 0; } -- cgit v1.2.3 From 8ce176501f836634f9c0419c0820140f968e9dc5 Mon Sep 17 00:00:00 2001 From: Shuvam Pandey Date: Mon, 6 Apr 2026 15:38:24 +0545 Subject: iio: adc: nxp-sar-adc: zero-initialize dma_slave_config nxp_sar_adc_start_cyclic_dma() only fills the RX-side members of dma_slave_config before passing it to dmaengine_slave_config(). Zero-initialize the structure so unused members do not contain stack garbage. Some DMA engines consult optional dma_slave_config fields, so leaving them uninitialized can cause DMA setup failures. Fixes: 4434072a893e ("iio: adc: Add the NXP SAR ADC support for the s32g2/3 platforms") Signed-off-by: Shuvam Pandey Reviewed-by: David Lechner Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/nxp-sar-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/nxp-sar-adc.c b/drivers/iio/adc/nxp-sar-adc.c index 1711cae7d872..8f4ed3db94f0 100644 --- a/drivers/iio/adc/nxp-sar-adc.c +++ b/drivers/iio/adc/nxp-sar-adc.c @@ -676,7 +676,7 @@ static void nxp_sar_adc_dma_cb(void *data) static int nxp_sar_adc_start_cyclic_dma(struct iio_dev *indio_dev) { struct nxp_sar_adc *info = iio_priv(indio_dev); - struct dma_slave_config config; + struct dma_slave_config config = { }; struct dma_async_tx_descriptor *desc; int ret; -- cgit v1.2.3 From 387c86b582e0782ab332e7bfcd4e6e3f93922961 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Apr 2026 15:40:47 +0200 Subject: iio: pressure: bmp280: fix stack leak in bmp580 trigger handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bmp580_trigger_handler() declares its scan buffer on the stack without an initializer and then memcpy()s 3 bytes of 24-bit sensor data into each 4-byte __le32 field. The high byte of comp_temp and comp_press is left uninitialized, and the channel storagebits is 32, so two bytes of stack are pushed to userspace per scan. This is a regression from when the buffer lived in the private data, the move to a stack-local struct dropped the implicit zeroing. bme280_trigger_handler() was fixed up to handle this bug, but this driver was not fixed because there was no padding hole, but rather a short-fill issue. Fix this all by just zero-initializing the structure on the stack. Cc: Jonathan Cameron Cc: David Lechner Cc: "Nuno Sá" Cc: Andy Shevchenko Fixes: 872c8014e05e ("iio: pressure: bmp280: drop sensor_data array") Cc: stable Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Reviewed-by: David Lechner Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index d983ce9c0b99..9b489766e457 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -2616,7 +2616,7 @@ static irqreturn_t bmp580_trigger_handler(int irq, void *p) __le32 comp_temp; __le32 comp_press; aligned_s64 timestamp; - } buffer; + } buffer = { }; int ret; guard(mutex)(&data->lock); -- cgit v1.2.3 From c9d8e9adaa63150ef7e833480b799d0bab83a276 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Apr 2026 15:40:48 +0200 Subject: iio: imu: st_lsm6dsx: fix stack leak in tagged FIFO buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tagged FIFO path declares iio_buff on the stack with __aligned(8) but no initializer, but there is a hole in the structure, which will then leak to userspace as ST_LSM6DSX_SAMPLE_SIZE bytes (6) will be copied, but the space between that and the timestamp are not initialized. Commit c14edb4d0bdc ("iio:imu:st_lsm6dsx Fix alignment and data leak issues") moved the untagged FIFO path to a kzalloc'd buffer in hw->scan, but for the tagged path it only added the alignment qualifier and not the initializer :( Fix this by just zero-initializing the structure on the stack. Cc: Lorenzo Bianconi Cc: Jonathan Cameron Cc: David Lechner Cc: "Nuno Sá" Cc: Andy Shevchenko Fixes: c14edb4d0bdc ("iio:imu:st_lsm6dsx Fix alignment and data leak issues") Cc: stable Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Reviewed-by: David Lechner Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 5b28a3ffcc3d..48291203d1cd 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -609,7 +609,7 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw) * must be passed a buffer that is aligned to 8 bytes so * as to allow insertion of a naturally aligned timestamp. */ - u8 iio_buff[ST_LSM6DSX_IIO_BUFF_SIZE] __aligned(8); + u8 iio_buff[ST_LSM6DSX_IIO_BUFF_SIZE] __aligned(8) = { }; u8 tag; bool reset_ts = false; int i, err, read_len; -- cgit v1.2.3 From 474f8928d50b09f7dcf507049f08732640b88b49 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Apr 2026 15:40:49 +0200 Subject: iio: imu: adis16550: fix stack leak in trigger handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adis16550_trigger_handler() declares the scan data array on the stack without initializing it. The memcpy() at the bottom fills only the first 28 bytes (TEMP + 6 channels of GYRO/ACCEL data), and iio_push_to_buffers_with_timestamp() writes the s64 timestamp at the 8-byte-aligned offset 32. Bytes 28-31 remain uninitialized stack data which leaks to userspace on ever trigger. Fix this all by just zero-initializing the structure on the stack. Cc: Lars-Peter Clausen Cc: Michael Hennerich Cc: Jonathan Cameron Cc: David Lechner Cc: "Nuno Sá" Cc: Andy Shevchenko Fixes: e4570f4bb231 ("iio: imu: adis16550: align buffers for timestamp") Cc: stable Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Reviewed-by: David Lechner Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16550.c b/drivers/iio/imu/adis16550.c index 1f2af506f4bd..75679612052f 100644 --- a/drivers/iio/imu/adis16550.c +++ b/drivers/iio/imu/adis16550.c @@ -836,7 +836,7 @@ static irqreturn_t adis16550_trigger_handler(int irq, void *p) u16 dummy; bool valid; struct iio_poll_func *pf = p; - __be32 data[ADIS16550_MAX_SCAN_DATA] __aligned(8); + __be32 data[ADIS16550_MAX_SCAN_DATA] __aligned(8) = { }; struct iio_dev *indio_dev = pf->indio_dev; struct adis16550 *st = iio_priv(indio_dev); struct adis *adis = iio_device_get_drvdata(indio_dev); -- cgit v1.2.3 From 5ace794c3ded38038a1f97f9ea26b9a8c835c111 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 10 Apr 2026 13:12:13 +0300 Subject: iio: adc: qcom-spmi-adc5-gen3: Fix off by one in adc5_gen3_get_fw_channel_data() The > in "if (chan > ADC5_MAX_CHANNEL)" should be >= to prevent an out of bound read of the adc->data->adc_chans[] array. Fixes: baff45179e90 ("iio: adc: Add support for QCOM PMIC5 Gen3 ADC") Signed-off-by: Dan Carpenter Reviewed-by: Konrad Dybcio Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-adc5-gen3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-adc5-gen3.c b/drivers/iio/adc/qcom-spmi-adc5-gen3.c index f8168a14b907..48c793b18d11 100644 --- a/drivers/iio/adc/qcom-spmi-adc5-gen3.c +++ b/drivers/iio/adc/qcom-spmi-adc5-gen3.c @@ -482,7 +482,7 @@ static int adc5_gen3_get_fw_channel_data(struct adc5_chip *adc, sid = FIELD_GET(ADC5_GEN3_VIRTUAL_SID_MASK, chan); chan = FIELD_GET(ADC5_GEN3_CHANNEL_MASK, chan); - if (chan > ADC5_MAX_CHANNEL) + if (chan >= ADC5_MAX_CHANNEL) return dev_err_probe(dev, -EINVAL, "%s invalid channel number %d\n", name, chan); -- cgit v1.2.3 From f9bbd943c34a9ad60e593a4b99ce2394e4e2381b Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Mon, 27 Apr 2026 21:12:38 +0100 Subject: iio: adc: mt6359: fix unchecked return value in mt6358_read_imp In mt6358_read_imp(), the variable val_v is passed to regmap_read() but the return value is not checked. If the read fails, val_v remains uninitialized and its random stack content is subsequently reported as a measurement result. Initialize val_v to zero to ensure a predictable value is reported in case of bus failure and to prevent potential stack data leakage. This also satisfies static analyzers that might otherwise flag the variable as used uninitialized. Fixes: 3587914bf61d ("iio: adc: Add support for MediaTek MT6357/8/9 Auxiliary ADC") Signed-off-by: Salah Triki Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mt6359-auxadc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/mt6359-auxadc.c b/drivers/iio/adc/mt6359-auxadc.c index 6b9ed9b1fde2..1d9724ef0983 100644 --- a/drivers/iio/adc/mt6359-auxadc.c +++ b/drivers/iio/adc/mt6359-auxadc.c @@ -497,6 +497,7 @@ static int mt6358_read_imp(struct mt6359_auxadc *adc_dev, return ret; /* Read the params before stopping */ + val_v = 0; regmap_read(regmap, reg_adc0 + (cinfo->imp_adc_num << 1), &val_v); mt6358_stop_imp_conv(adc_dev); -- cgit v1.2.3 From d0a228d903425e653f18a4341e60c0538afb6d41 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Mon, 27 Apr 2026 22:33:19 +0100 Subject: iio: dac: max5821: fix return value check in powerdown sync The function max5821_sync_powerdown_mode() returned the result of i2c_master_send() directly. If a partial transfer occurred, it would be incorrectly treated as a success by the caller. While the caller currently handles the positive return value of 2 as success, this patch refactors the function to return 0 on full success and -EIO on short writes. This ensures robust error handling for incomplete transfers and improves code maintainability by using sizeof(outbuf). Fixes: 472988972737 ("iio: add support of the max5821") Signed-off-by: Salah Triki Reviewed-by: Andy Shevchenko Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/max5821.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iio/dac/max5821.c b/drivers/iio/dac/max5821.c index e7e29359f8fe..dd4e35460195 100644 --- a/drivers/iio/dac/max5821.c +++ b/drivers/iio/dac/max5821.c @@ -90,6 +90,7 @@ static int max5821_sync_powerdown_mode(struct max5821_data *data, const struct iio_chan_spec *chan) { u8 outbuf[2]; + int ret; outbuf[0] = MAX5821_EXTENDED_COMMAND_MODE; @@ -103,7 +104,13 @@ static int max5821_sync_powerdown_mode(struct max5821_data *data, else outbuf[1] |= MAX5821_EXTENDED_POWER_UP; - return i2c_master_send(data->client, outbuf, 2); + ret = i2c_master_send(data->client, outbuf, sizeof(outbuf)); + if (ret < 0) + return ret; + if (ret != sizeof(outbuf)) + return -EIO; + + return 0; } static ssize_t max5821_write_dac_powerdown(struct iio_dev *indio_dev, -- cgit v1.2.3 From ba121d7582361fe74405f32724976aeff5c35177 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Mon, 27 Apr 2026 19:26:31 +0800 Subject: iio: adc: meson-saradc: fix calibration buffer leak on error meson_sar_adc_temp_sensor_init() allocates a buffer with nvmem_cell_read(), but the old code leaked it if syscon_regmap_lookup_by_phandle() failed. Fix this by adding missing kfree(buf). Fixes: d6f2eac64403 ("iio: adc: meson: no devm for nvmem_cell_get") Signed-off-by: Felix Gu Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 23991a3612bd..000e39ca5c62 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -817,9 +817,11 @@ static int meson_sar_adc_temp_sensor_init(struct iio_dev *indio_dev) } priv->tsc_regmap = syscon_regmap_lookup_by_phandle(dev->of_node, "amlogic,hhi-sysctrl"); - if (IS_ERR(priv->tsc_regmap)) + if (IS_ERR(priv->tsc_regmap)) { + kfree(buf); return dev_err_probe(dev, PTR_ERR(priv->tsc_regmap), "failed to get amlogic,hhi-sysctrl regmap\n"); + } trimming_bits = priv->param->temperature_trimming_bits; trimming_mask = BIT(trimming_bits) - 1; -- cgit v1.2.3 From eedf7602fbd929e97e0c480da501dc7a34beb2a8 Mon Sep 17 00:00:00 2001 From: Sanjay Chitroda Date: Sun, 26 Apr 2026 14:47:04 +0530 Subject: iio: ssp_sensors: cancel delayed work_refresh on remove The work_refresh may still be pending or running when the device is removed, cancel the delayed work_refresh in remove path. Fixes: 50dd64d57eee ("iio: common: ssp_sensors: Add sensorhub driver") Signed-off-by: Sanjay Chitroda Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/ssp_sensors/ssp_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c index da09c9f3ceb6..e2538a84c812 100644 --- a/drivers/iio/common/ssp_sensors/ssp_dev.c +++ b/drivers/iio/common/ssp_sensors/ssp_dev.c @@ -590,6 +590,7 @@ static void ssp_remove(struct spi_device *spi) ssp_clean_pending_list(data); free_irq(data->spi->irq, data); + cancel_delayed_work_sync(&data->work_refresh); timer_delete_sync(&data->wdt_timer); cancel_work_sync(&data->work_wdt); -- cgit v1.2.3 From ecae2ae606d493cf11457946436335bd0e726663 Mon Sep 17 00:00:00 2001 From: Rodrigo Alencar Date: Fri, 1 May 2026 10:14:54 +0100 Subject: iio: dac: ad5686: fix ref bit initialization for single-channel parts The reference bit position was ignored when writing the register at the probe() function (!!val was used). When such bit is 1, internal voltage reference is disabled so that an external one can be used. For multi-channel devices, bit 0 of the Internal Reference Setup command behaves the same way, so AD5686_REF_BIT_MSK is created. The issue exists since support for single-channel devices were first introduced. Fixes: be1b24d24541 ("iio:dac:ad5686: Add AD5691R/AD5692R/AD5693/AD5693R support") Reviewed-by: Andy Shevchenko Signed-off-by: Rodrigo Alencar Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5686.c | 6 +++--- drivers/iio/dac/ad5686.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c index 4b18498aa074..b85d5c5a864b 100644 --- a/drivers/iio/dac/ad5686.c +++ b/drivers/iio/dac/ad5686.c @@ -509,7 +509,7 @@ int ad5686_probe(struct device *dev, break; case AD5686_REGMAP: cmd = AD5686_CMD_INTERNAL_REFER_SETUP; - ref_bit_msk = 0; + ref_bit_msk = AD5686_REF_BIT_MSK; break; case AD5693_REGMAP: cmd = AD5686_CMD_CONTROL_REG; @@ -520,9 +520,9 @@ int ad5686_probe(struct device *dev, return -EINVAL; } - val = (has_external_vref | ref_bit_msk); + val = has_external_vref ? ref_bit_msk : 0; - ret = st->write(st, cmd, 0, !!val); + ret = st->write(st, cmd, 0, val); if (ret) return ret; diff --git a/drivers/iio/dac/ad5686.h b/drivers/iio/dac/ad5686.h index e7d36bae3e59..36e16c5c4581 100644 --- a/drivers/iio/dac/ad5686.h +++ b/drivers/iio/dac/ad5686.h @@ -46,6 +46,7 @@ #define AD5310_REF_BIT_MSK BIT(8) #define AD5683_REF_BIT_MSK BIT(12) +#define AD5686_REF_BIT_MSK BIT(0) #define AD5693_REF_BIT_MSK BIT(12) /** -- cgit v1.2.3 From d01220ee5e43c65a206df827b39bf5cf5f7b9dce Mon Sep 17 00:00:00 2001 From: Rodrigo Alencar Date: Fri, 1 May 2026 10:14:55 +0100 Subject: iio: dac: ad5686: fix input raw value check Fix range check for input raw value, which is off by one, i.e., for a 10-bit DAC the max valid value is 1023, but 1 << 10 equals 1024, which passes the previous check, allowing an out-of-range write. The issue exists since the ad5686 driver was first introduced. Fixes: c2f37c8dcadc ("iio: dac: New driver for AD5686R, AD5685R, AD5684R Digital to analog converters") Reviewed-by: Andy Shevchenko Signed-off-by: Rodrigo Alencar Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5686.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c index b85d5c5a864b..27878a6318ff 100644 --- a/drivers/iio/dac/ad5686.c +++ b/drivers/iio/dac/ad5686.c @@ -154,7 +154,7 @@ static int ad5686_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - if (val > (1 << chan->scan_type.realbits) || val < 0) + if (val >= (1 << chan->scan_type.realbits) || val < 0) return -EINVAL; mutex_lock(&st->lock); -- cgit v1.2.3 From 6f5ed4f2c7c83f33344e0ba179f72a12e5dad4a4 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Thu, 30 Apr 2026 21:29:06 +0800 Subject: iio: buffer: hw-consumer: fix use-after-free in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the err_put_buffers cleanup path of iio_hw_consumer_alloc(), the code was using list_for_each_entry() to iterate through buffers while calling iio_buffer_put() which can free the current buffer if refcount drops to 0. The list_for_each_entry() loop macro then evaluates buf->head.next to continue iteration, accessing the freed buffer. Fix this by using list_for_each_entry_safe(). Fixes: 48b66f8f936f ("iio: Add hardware consumer buffer support") Reported-by: sashiko Closes: https://sashiko.dev/#/patchset/20260427-iio_buf-v1-1-2bbdac844647%40gmail.com Signed-off-by: Felix Gu Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Reviewed-by: Maxwell Doose Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/buffer/industrialio-hw-consumer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/buffer/industrialio-hw-consumer.c b/drivers/iio/buffer/industrialio-hw-consumer.c index 24d7df603760..700528c9a0a4 100644 --- a/drivers/iio/buffer/industrialio-hw-consumer.c +++ b/drivers/iio/buffer/industrialio-hw-consumer.c @@ -85,7 +85,7 @@ static struct hw_consumer_buffer *iio_hw_consumer_get_buffer( */ struct iio_hw_consumer *iio_hw_consumer_alloc(struct device *dev) { - struct hw_consumer_buffer *buf; + struct hw_consumer_buffer *buf, *tmp; struct iio_hw_consumer *hwc; struct iio_channel *chan; int ret; @@ -116,7 +116,7 @@ struct iio_hw_consumer *iio_hw_consumer_alloc(struct device *dev) return hwc; err_put_buffers: - list_for_each_entry(buf, &hwc->buffers, head) + list_for_each_entry_safe(buf, tmp, &hwc->buffers, head) iio_buffer_put(&buf->buffer); iio_channel_release_all(hwc->channels); err_free_hwc: -- cgit v1.2.3 From ebd250c2581ec46c64c73fdfa918c9a7f757505e Mon Sep 17 00:00:00 2001 From: Kim Seer Paller Date: Tue, 5 May 2026 12:34:32 +0800 Subject: iio: dac: ad3530r: Fix AD3531/AD3531R powerdown mode strings The AD3531/AD3531R has different output operating modes from the AD3530/AD3530R. According to the AD3531/AD3531R datasheet, the powerdown modes are: 01: 500 Ohm output impedance 10: 3.85 kOhm output impedance 11: 16 kOhm output impedance The driver currently uses the AD3530R modes (1k, 7.7k, 32k) for all variants, which is incorrect for AD3531/AD3531R. Add AD3531R-specific powerdown mode strings and assign them to the AD3531/AD3531R chip variants. Fixes: 93583174a3df ("iio: dac: ad3530r: Add driver for AD3530R and AD3531R") Signed-off-by: Kim Seer Paller Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad3530r.c | 54 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/iio/dac/ad3530r.c b/drivers/iio/dac/ad3530r.c index b97b46090d80..d9db3226ecd6 100644 --- a/drivers/iio/dac/ad3530r.c +++ b/drivers/iio/dac/ad3530r.c @@ -105,6 +105,12 @@ static const char * const ad3530r_powerdown_modes[] = { "32kohm_to_gnd", }; +static const char * const ad3531r_powerdown_modes[] = { + "500ohm_to_gnd", + "3.85kohm_to_gnd", + "16kohm_to_gnd", +}; + static int ad3530r_get_powerdown_mode(struct iio_dev *indio_dev, const struct iio_chan_spec *chan) { @@ -133,6 +139,13 @@ static const struct iio_enum ad3530r_powerdown_mode_enum = { .set = ad3530r_set_powerdown_mode, }; +static const struct iio_enum ad3531r_powerdown_mode_enum = { + .items = ad3531r_powerdown_modes, + .num_items = ARRAY_SIZE(ad3531r_powerdown_modes), + .get = ad3530r_get_powerdown_mode, + .set = ad3530r_set_powerdown_mode, +}; + static ssize_t ad3530r_get_dac_powerdown(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, @@ -276,7 +289,20 @@ static const struct iio_chan_spec_ext_info ad3530r_ext_info[] = { { } }; -#define AD3530R_CHAN(_chan) \ +static const struct iio_chan_spec_ext_info ad3531r_ext_info[] = { + { + .name = "powerdown", + .shared = IIO_SEPARATE, + .read = ad3530r_get_dac_powerdown, + .write = ad3530r_set_dac_powerdown, + }, + IIO_ENUM("powerdown_mode", IIO_SEPARATE, &ad3531r_powerdown_mode_enum), + IIO_ENUM_AVAILABLE("powerdown_mode", IIO_SHARED_BY_TYPE, + &ad3531r_powerdown_mode_enum), + { } +}; + +#define AD3530R_CHAN(_chan, _ext_info) \ { \ .type = IIO_VOLTAGE, \ .indexed = 1, \ @@ -284,25 +310,25 @@ static const struct iio_chan_spec_ext_info ad3530r_ext_info[] = { .output = 1, \ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ BIT(IIO_CHAN_INFO_SCALE), \ - .ext_info = ad3530r_ext_info, \ + .ext_info = _ext_info, \ } static const struct iio_chan_spec ad3530r_channels[] = { - AD3530R_CHAN(0), - AD3530R_CHAN(1), - AD3530R_CHAN(2), - AD3530R_CHAN(3), - AD3530R_CHAN(4), - AD3530R_CHAN(5), - AD3530R_CHAN(6), - AD3530R_CHAN(7), + AD3530R_CHAN(0, ad3530r_ext_info), + AD3530R_CHAN(1, ad3530r_ext_info), + AD3530R_CHAN(2, ad3530r_ext_info), + AD3530R_CHAN(3, ad3530r_ext_info), + AD3530R_CHAN(4, ad3530r_ext_info), + AD3530R_CHAN(5, ad3530r_ext_info), + AD3530R_CHAN(6, ad3530r_ext_info), + AD3530R_CHAN(7, ad3530r_ext_info), }; static const struct iio_chan_spec ad3531r_channels[] = { - AD3530R_CHAN(0), - AD3530R_CHAN(1), - AD3530R_CHAN(2), - AD3530R_CHAN(3), + AD3530R_CHAN(0, ad3531r_ext_info), + AD3530R_CHAN(1, ad3531r_ext_info), + AD3530R_CHAN(2, ad3531r_ext_info), + AD3530R_CHAN(3, ad3531r_ext_info), }; static const struct ad3530r_chip_info ad3530_chip = { -- cgit v1.2.3 From 4701e471c16866e7aa8f5e6a3a6b0d31e097e2c9 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Tue, 5 May 2026 08:10:24 +0100 Subject: iio: temperature: tsys01: fix broken PROM checksum validation The current implementation of tsys01_crc_valid() incorrectly sums the first word (n_prom[0]) repeatedly instead of iterating over the 8 words retrieved from the PROM. This leads to a checksum mismatch and probe failure on hardware. According to the TSYS01 datasheet, the PROM consists of 8 words. A valid check must iterate through all 8 words to verify the integrity of the calibration data. The current driver only checks the first word 8 times. Note: This fix was identified during a code audit and is based on datasheet specifications. It has not been tested on real hardware. Fixes: 43e53407f680 ("Add tsys01 meas-spec driver support") Signed-off-by: Salah Triki Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/tsys01.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/temperature/tsys01.c b/drivers/iio/temperature/tsys01.c index 334bba6fdae6..104dd45598b0 100644 --- a/drivers/iio/temperature/tsys01.c +++ b/drivers/iio/temperature/tsys01.c @@ -119,7 +119,7 @@ static bool tsys01_crc_valid(u16 *n_prom) u8 sum = 0; for (cnt = 0; cnt < TSYS01_PROM_WORDS_NB; cnt++) - sum += ((n_prom[0] >> 8) + (n_prom[0] & 0xFF)); + sum += ((n_prom[cnt] >> 8) + (n_prom[cnt] & 0xFF)); return (sum == 0); } -- cgit v1.2.3 From 5237c3175cae5ab05f18878cec3301a04403859e Mon Sep 17 00:00:00 2001 From: Rodrigo Alencar Date: Tue, 5 May 2026 13:35:04 +0100 Subject: iio: dac: ad5686: acquire lock when doing powerdown control Protect access of pwr_down_mode and pwr_down_mask fields with existing mutex lock. Each channel exposes their own attributes for controlling powerdown modes and powerdown state. This fixes potential race conditions as those the write functions perform non-atomic read-modify-write operations to those pwr_down_* fields. This issue exists since the ad5686 driver was first introduced. Fixes: c2f37c8dcadc ("iio: dac: New driver for AD5686R, AD5685R, AD5684R Digital to analog converters") Signed-off-by: Rodrigo Alencar Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5686.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c index 27878a6318ff..2e443fcfeb39 100644 --- a/drivers/iio/dac/ad5686.c +++ b/drivers/iio/dac/ad5686.c @@ -30,6 +30,8 @@ static int ad5686_get_powerdown_mode(struct iio_dev *indio_dev, { struct ad5686_state *st = iio_priv(indio_dev); + guard(mutex)(&st->lock); + return ((st->pwr_down_mode >> (chan->channel * 2)) & 0x3) - 1; } @@ -39,6 +41,8 @@ static int ad5686_set_powerdown_mode(struct iio_dev *indio_dev, { struct ad5686_state *st = iio_priv(indio_dev); + guard(mutex)(&st->lock); + st->pwr_down_mode &= ~(0x3 << (chan->channel * 2)); st->pwr_down_mode |= ((mode + 1) << (chan->channel * 2)); @@ -57,6 +61,8 @@ static ssize_t ad5686_read_dac_powerdown(struct iio_dev *indio_dev, { struct ad5686_state *st = iio_priv(indio_dev); + guard(mutex)(&st->lock); + return sysfs_emit(buf, "%d\n", !!(st->pwr_down_mask & (0x3 << (chan->channel * 2)))); } @@ -77,6 +83,8 @@ static ssize_t ad5686_write_dac_powerdown(struct iio_dev *indio_dev, if (ret) return ret; + guard(mutex)(&st->lock); + if (readin) st->pwr_down_mask |= (0x3 << (chan->channel * 2)); else -- cgit v1.2.3 From 8aeaf25a85263a7a43357e16ad78ab969f6f8aeb Mon Sep 17 00:00:00 2001 From: Rodrigo Alencar Date: Tue, 5 May 2026 13:35:05 +0100 Subject: iio: dac: ad5686: fix powerdown control on dual-channel devices Fix powerdown control by using a proper bit shift for the powerdown mask values. During initialization, powerdown bits are initialized so that unused bits are set to 1 and the correct bit shift is used. Dual-channel devices use one-hot encoding in the address and that reflects on the position of the powerdown bits, which are not channel-index based for that case. Quad-channel devices also use one-hot encoding for the channel address but the result of log2(address) coincides with the channel index value. Mask as 0x3U is used rather than 0x3, because shift can reach value of 30 (last channel of a 16-channel device), which would mess with the sign bit. The issue was introduced when first adding support for dual-channel devices, which overlooked powerdown control differences. Fixes: 7dc8faeab3e3 ("iio: dac: ad5686: add support for AD5338R") Signed-off-by: Rodrigo Alencar Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5686.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c index 2e443fcfeb39..a7213bc6b156 100644 --- a/drivers/iio/dac/ad5686.c +++ b/drivers/iio/dac/ad5686.c @@ -25,26 +25,37 @@ static const char * const ad5686_powerdown_modes[] = { "three_state" }; +static inline unsigned int ad5686_pd_mask_shift(const struct iio_chan_spec *chan) +{ + if (chan->channel == chan->address) + return chan->channel * 2; + + /* one-hot encoding is used in dual/quad channel devices */ + return __ffs(chan->address) * 2; +} + static int ad5686_get_powerdown_mode(struct iio_dev *indio_dev, const struct iio_chan_spec *chan) { + unsigned int shift = ad5686_pd_mask_shift(chan); struct ad5686_state *st = iio_priv(indio_dev); guard(mutex)(&st->lock); - return ((st->pwr_down_mode >> (chan->channel * 2)) & 0x3) - 1; + return ((st->pwr_down_mode >> shift) & 0x3U) - 1; } static int ad5686_set_powerdown_mode(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, unsigned int mode) { + unsigned int shift = ad5686_pd_mask_shift(chan); struct ad5686_state *st = iio_priv(indio_dev); guard(mutex)(&st->lock); - st->pwr_down_mode &= ~(0x3 << (chan->channel * 2)); - st->pwr_down_mode |= ((mode + 1) << (chan->channel * 2)); + st->pwr_down_mode &= ~(0x3U << shift); + st->pwr_down_mode |= (mode + 1) << shift; return 0; } @@ -59,12 +70,12 @@ static const struct iio_enum ad5686_powerdown_mode_enum = { static ssize_t ad5686_read_dac_powerdown(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf) { + unsigned int shift = ad5686_pd_mask_shift(chan); struct ad5686_state *st = iio_priv(indio_dev); guard(mutex)(&st->lock); - return sysfs_emit(buf, "%d\n", !!(st->pwr_down_mask & - (0x3 << (chan->channel * 2)))); + return sysfs_emit(buf, "%d\n", !!(st->pwr_down_mask & (0x3U << shift))); } static ssize_t ad5686_write_dac_powerdown(struct iio_dev *indio_dev, @@ -86,9 +97,9 @@ static ssize_t ad5686_write_dac_powerdown(struct iio_dev *indio_dev, guard(mutex)(&st->lock); if (readin) - st->pwr_down_mask |= (0x3 << (chan->channel * 2)); + st->pwr_down_mask |= 0x3U << ad5686_pd_mask_shift(chan); else - st->pwr_down_mask &= ~(0x3 << (chan->channel * 2)); + st->pwr_down_mask &= ~(0x3U << ad5686_pd_mask_shift(chan)); switch (st->chip_info->regmap_type) { case AD5310_REGMAP: @@ -468,7 +479,7 @@ int ad5686_probe(struct device *dev, { struct ad5686_state *st; struct iio_dev *indio_dev; - unsigned int val, ref_bit_msk; + unsigned int val, ref_bit_msk, shift; bool has_external_vref; u8 cmd; int ret, i; @@ -492,9 +503,18 @@ int ad5686_probe(struct device *dev, has_external_vref = ret != -ENODEV; st->vref_mv = has_external_vref ? ret / 1000 : st->chip_info->int_vref_mv; + /* Initialize masks to all ones provided the max shift (last channel) */ + shift = ad5686_pd_mask_shift(&st->chip_info->channels[st->chip_info->num_channels - 1]); + st->pwr_down_mask = GENMASK(shift + 1, 0); + st->pwr_down_mode = GENMASK(shift + 1, 0); + /* Set all the power down mode for all channels to 1K pulldown */ - for (i = 0; i < st->chip_info->num_channels; i++) - st->pwr_down_mode |= (0x01 << (i * 2)); + for (i = 0; i < st->chip_info->num_channels; i++) { + shift = ad5686_pd_mask_shift(&st->chip_info->channels[i]); + st->pwr_down_mask &= ~(0x3U << shift); /* powered up state */ + st->pwr_down_mode &= ~(0x3U << shift); + st->pwr_down_mode |= 0x01U << shift; + } indio_dev->name = name; indio_dev->info = &ad5686_info; -- cgit v1.2.3 From 6bdc3023d62ed5c7d591f0eb27a5adb37fb892ae Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 5 May 2026 14:37:48 +0100 Subject: iio: gyro: itg3200: fix i2c read into the wrong stack location itg3200_read_all_channels() takes `__be16 *buf' as a parameter and fills the i2c_msg destination as `(char *)&buf'. Since `buf' is the parameter (a pointer), `&buf' is the address of the local pointer slot on the stack of itg3200_read_all_channels(), not the address of the caller's scan buffer. The (char *) cast hides the type mismatch. i2c_transfer() therefore writes ITG3200_SCAN_ELEMENTS * sizeof(s16) = 8 bytes into the parameter's stack slot, which is discarded when the function returns. The caller's scan buffer in itg3200_trigger_handler() is never written to, so iio_push_to_buffers_with_timestamp() pushes uninitialised stack contents to userspace via /dev/iio:deviceX every scan -- both a functional bug (no actual gyroscope or temperature data is delivered through the triggered buffer) and an information leak. The non-buffered read_raw() path is unaffected: it goes through itg3200_read_reg_s16() which uses `&out' on a local s16 value, where that is correct. Drop the spurious `&' so the i2c read writes into the caller's buffer. Fixes: 9dbf091da080 ("iio: gyro: Add itg3200") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Reviewed-by: Andy Shevchenko Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/itg3200_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index cf97adfa9727..87efa2c74ca4 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -34,7 +34,7 @@ static int itg3200_read_all_channels(struct i2c_client *i2c, __be16 *buf) .addr = i2c->addr, .flags = i2c->flags | I2C_M_RD, .len = ITG3200_SCAN_ELEMENTS * sizeof(s16), - .buf = (char *)&buf, + .buf = (char *)buf, }, }; -- cgit v1.2.3 From 422b5bbf333f75fb486855ad0eedc23cf21f3277 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Thu, 7 May 2026 20:07:51 +0100 Subject: iio: adc: viperboard: Fix error handling in vprbrd_iio_read_raw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver proceeds to the reception phase even if the preceding transmission fails. This uses a goto error label for an early bail out and ensures the mutex is properly unlocked in case of failure. Fixes: ffd8a6e7a778 ("iio: adc: Add viperboard adc driver") Signed-off-by: Salah Triki Reviewed-by: Joshua Crofts Reviewed-by: Maxwell Doose Reviewed-by: Nuno Sá Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/viperboard_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/viperboard_adc.c b/drivers/iio/adc/viperboard_adc.c index 9bb0b83c8f67..6efe1c618ef7 100644 --- a/drivers/iio/adc/viperboard_adc.c +++ b/drivers/iio/adc/viperboard_adc.c @@ -70,8 +70,10 @@ static int vprbrd_iio_read_raw(struct iio_dev *iio_dev, VPRBRD_USB_TYPE_OUT, 0x0000, 0x0000, admsg, sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS); if (ret != sizeof(struct vprbrd_adc_msg)) { - dev_err(&iio_dev->dev, "usb send error on adc read\n"); + mutex_unlock(&vb->lock); error = -EREMOTEIO; + dev_err(&iio_dev->dev, "usb send error on adc read\n"); + goto error; } ret = usb_control_msg(vb->usb_dev, -- cgit v1.2.3 From 8a220d1c312c66194f4a33dd52d1fba42bc2b341 Mon Sep 17 00:00:00 2001 From: Hongling Zeng Date: Wed, 13 May 2026 18:34:06 +0800 Subject: cachefiles: Fix error return when vfs_mkdir() fails When vfs_mkdir() fails, the error code is not extracted from the returned error pointer. This causes mkdir_error to be reached with ret=0, which leads to returning ERR_PTR(0) (NULL) instead of a proper error pointer. Fix this by extracting the error code from the error pointer when vfs_mkdir() fails. Fixes: 406fad7698f5 ("cachefiles: Fix oops in vfs_mkdir from cachefiles_get_directory") Signed-off-by: Hongling Zeng Link: https://patch.msgid.link/20260513103406.202320-1-zenghongling@kylinos.cn Signed-off-by: Christian Brauner --- fs/cachefiles/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 1b83ed0e0a63..2937db690b40 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -130,6 +130,8 @@ retry: ret = cachefiles_inject_write_error(); if (ret == 0) { subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL); + if (IS_ERR(subdir)) + ret = PTR_ERR(subdir); } else { end_creating(subdir); subdir = ERR_PTR(ret); -- cgit v1.2.3 From 3f9ed5f5aa9ecffd284218fffd6abc29617f51d5 Mon Sep 17 00:00:00 2001 From: "Uwe Kleine-König (The Capable Hub)" Date: Tue, 28 Apr 2026 16:45:53 +0200 Subject: drm/msm: Don't use UTS_RELEASE directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit UTS_RELEASE evaluates to a static string and changes quite easily (e.g. uncommitted changes in the source tree or new commits). So when checking if a patch introduces changes to the resulting binary each usage of UTS_RELEASE is source of annoyance. Instead of using UTS_RELEASE directly use init_utsname()->release which evaluates to the same string but with that a change of UTS_RELEASE doesn't affect msm_disp_snapshot_util.o or msm_gpu.o. Signed-off-by: Uwe Kleine-König (The Capable Hub) Patchwork: https://patchwork.freedesktop.org/patch/721948/ Message-ID: <20260428144553.1103785-2-u.kleine-koenig@baylibre.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 4 ++-- drivers/gpu/drm/msm/msm_gpu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index 19b470968f4d..636bcb65a2a4 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -5,7 +5,7 @@ #define pr_fmt(fmt) "[drm:%s:%d] " fmt, __func__, __LINE__ -#include +#include #include "msm_disp_snapshot.h" @@ -79,7 +79,7 @@ void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p) } drm_printf(p, "---\n"); - drm_printf(p, "kernel: " UTS_RELEASE "\n"); + drm_printf(p, "kernel: %s\n", init_utsname()->release); drm_printf(p, "module: " KBUILD_MODNAME "\n"); drm_printf(p, "dpu devcoredump\n"); drm_printf(p, "time: %ptSp\n", &state->time); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index d901304fff6d..b0228ec42030 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -13,11 +13,11 @@ #include "msm_gpu_trace.h" //#include "adreno/adreno_gpu.h" -#include #include #include #include #include +#include /* * Power Management: @@ -196,7 +196,7 @@ static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, p = drm_coredump_printer(&iter); drm_printf(&p, "---\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "kernel: %s\n", init_utsname()->release); drm_printf(&p, "module: " KBUILD_MODNAME "\n"); drm_printf(&p, "time: %ptSp\n", &state->time); if (state->comm) -- cgit v1.2.3 From 439e16c91aeeff2c7b503b317ccce2458a021191 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 15 May 2026 23:36:35 +0800 Subject: MAINTAINERS: Remove Jianjun Wang as PCIe mediatek maintainer Email to Jianjun Wang bounces with error: "550 Relaying mail to jianjun.wang@mediatek.com is not allowed". Remove the address to avoid sending future kernel maintenance queries to an unreachable destination. The MediaTek PCIe driver remains supported by Ryder Lee. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260515153635.136054-1-18255117159@163.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ab2f91f62c54..025fcdb10a61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20700,7 +20700,6 @@ F: drivers/pci/controller/dwc/pcie-intel-gw.c PCIE DRIVER FOR MEDIATEK M: Ryder Lee -M: Jianjun Wang L: linux-pci@vger.kernel.org L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported -- cgit v1.2.3 From 3392291fc509d8ad6e4ad90f15b0a193f721cbc9 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Fri, 8 May 2026 14:57:21 +0800 Subject: drm/msm: Fix shrinker deadlock With PROVE_LOCKING on an Snapdragon X1 and VM reclaim pressure, we see: ====================================================== WARNING: possible circular locking dependency detected 7.0.0-debug+ #43 Tainted: G W ------------------------------------------------------ kswapd0/82 is trying to acquire lock: ffff800080ec3870 (reservation_ww_class_acquire){+.+.}-{0:0}, at: msm_gem_shrinker_scan+0x17c/0x400 [msm] but task is already holding lock: ffffc31709b263b8 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x88/0x988 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x4d0/0xad0 lock_acquire.part.0+0xc4/0x248 lock_acquire+0x8c/0x248 fs_reclaim_acquire+0xd0/0xf0 dma_resv_lockdep+0x224/0x348 do_one_initcall+0x84/0x5d0 do_initcalls+0x194/0x1d8 kernel_init_freeable+0x128/0x180 kernel_init+0x2c/0x160 ret_from_fork+0x10/0x20 -> #1 (reservation_ww_class_mutex){+.+.}-{4:4}: __lock_acquire+0x4d0/0xad0 lock_acquire.part.0+0xc4/0x248 lock_acquire+0x8c/0x248 dma_resv_lockdep+0x1a8/0x348 do_one_initcall+0x84/0x5d0 do_initcalls+0x194/0x1d8 kernel_init_freeable+0x128/0x180 kernel_init+0x2c/0x160 ret_from_fork+0x10/0x20 -> #0 (reservation_ww_class_acquire){+.+.}-{0:0}: check_prev_add+0x114/0x790 validate_chain+0x594/0x6f0 __lock_acquire+0x4d0/0xad0 lock_acquire.part.0+0xc4/0x248 lock_acquire+0x8c/0x248 drm_gem_lru_scan+0x1ac/0x440 msm_gem_shrinker_scan+0x17c/0x400 [msm] do_shrink_slab+0x150/0x4a0 shrink_slab+0x144/0x460 shrink_one+0x9c/0x1b0 shrink_many+0x27c/0x5c0 shrink_node+0x344/0x550 balance_pgdat+0x2c0/0x988 kswapd+0x11c/0x318 kthread+0x10c/0x128 ret_from_fork+0x10/0x20 other info that might help us debug this: Chain exists of: reservation_ww_class_acquire --> reservation_ww_class_mutex --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(reservation_ww_class_mutex); lock(fs_reclaim); lock(reservation_ww_class_acquire); *** DEADLOCK *** 1 lock held by kswapd0/82: #0: ffffc31709b263b8 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x88/0x988 stack backtrace: CPU: 4 UID: 0 PID: 82 Comm: kswapd0 Tainted: G W 7.0.0-debug+ #43 PREEMPT(full) Tainted: [W]=WARN Hardware name: LENOVO 21BX0016US/21BX0016US, BIOS N3HET94W (1.66 ) 09/15/2025 Call trace: show_stack+0x20/0x40 (C) dump_stack_lvl+0x9c/0xd0 dump_stack+0x18/0x30 print_circular_bug+0x114/0x120 check_noncircular+0x178/0x198 check_prev_add+0x114/0x790 validate_chain+0x594/0x6f0 __lock_acquire+0x4d0/0xad0 lock_acquire.part.0+0xc4/0x248 lock_acquire+0x8c/0x248 drm_gem_lru_scan+0x1ac/0x440 msm_gem_shrinker_scan+0x17c/0x400 [msm] do_shrink_slab+0x150/0x4a0 shrink_slab+0x144/0x460 shrink_one+0x9c/0x1b0 shrink_many+0x27c/0x5c0 shrink_node+0x344/0x550 balance_pgdat+0x2c0/0x988 kswapd+0x11c/0x318 kthread+0x10c/0x128 ret_from_fork+0x10/0x20 kswapd0 holding fs_reclaim calls the MSM shrinker, which calls dma_resv_lock. This in turn acquires fs_reclaim. Fix this deadlock by using dma_resv_trylock() instead, dropping the subsequently unused passed wait-wound lock 'ticket'. Cc: stable@vger.kernel.org Signed-off-by: Daniel J Blueman Fixes: fe4952b5f27c ("drm/msm: Convert vm locking") Patchwork: https://patchwork.freedesktop.org/patch/723564/ Message-ID: <20260508065722.18785-1-daniel@quora.org> [rob: fixup compile errors, replace lockdep splat with something legible] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_shrinker.c | 40 ++++++++++++++-------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 31fa51a44f86..6e39e4e578bb 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -43,8 +43,7 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) } static bool -with_vm_locks(struct ww_acquire_ctx *ticket, - void (*fn)(struct drm_gem_object *obj), +with_vm_locks(void (*fn)(struct drm_gem_object *obj), struct drm_gem_object *obj) { /* @@ -52,7 +51,7 @@ with_vm_locks(struct ww_acquire_ctx *ticket, * success paths */ struct drm_gpuvm_bo *vm_bo, *last_locked = NULL; - int ret = 0; + bool locked = true; drm_gem_for_each_gpuvm_bo (vm_bo, obj) { struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm); @@ -60,23 +59,14 @@ with_vm_locks(struct ww_acquire_ctx *ticket, if (resv == obj->resv) continue; - ret = dma_resv_lock(resv, ticket); - - /* - * Since we already skip the case when the VM and obj - * share a resv (ie. _NO_SHARE objs), we don't expect - * to hit a double-locking scenario... which the lock - * unwinding cannot really cope with. - */ - WARN_ON(ret == -EALREADY); - /* - * Don't bother with slow-lock / backoff / retry sequence, - * if we can't get the lock just give up and move on to - * the next object. + * dma_resv_lock can't be used due to acquiring 'ticket' before the + * fs_reclaim lock, which is held in shrinker context */ - if (ret) + if (!dma_resv_trylock(resv)) { + locked = false; goto out_unlock; + } /* * Hold a ref to prevent the vm_bo from being freed @@ -108,11 +98,11 @@ out_unlock: } } - return ret == 0; + return locked; } static bool -purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) +purge(struct drm_gem_object *obj, struct ww_acquire_ctx *) { if (!is_purgeable(to_msm_bo(obj))) return false; @@ -120,11 +110,11 @@ purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) if (msm_gem_active(obj)) return false; - return with_vm_locks(ticket, msm_gem_purge, obj); + return with_vm_locks(msm_gem_purge, obj); } static bool -evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) +evict(struct drm_gem_object *obj, struct ww_acquire_ctx *) { if (is_unevictable(to_msm_bo(obj))) return false; @@ -132,7 +122,7 @@ evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) if (msm_gem_active(obj)) return false; - return with_vm_locks(ticket, msm_gem_evict, obj); + return with_vm_locks(msm_gem_evict, obj); } static bool @@ -164,7 +154,6 @@ static unsigned long msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct msm_drm_private *priv = shrinker->private_data; - struct ww_acquire_ctx ticket; struct { struct drm_gem_lru *lru; bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket); @@ -185,11 +174,14 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) { if (!stages[i].cond) continue; + /* + * 'ticket' not needed on trylock paths + */ stages[i].freed = drm_gem_lru_scan(stages[i].lru, nr, &stages[i].remaining, stages[i].shrink, - &ticket); + NULL); nr -= stages[i].freed; freed += stages[i].freed; remaining += stages[i].remaining; -- cgit v1.2.3 From a828abbb897657451d96ad7bf20f1893ac983bb9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 May 2026 13:32:32 +0200 Subject: bpf: make bpf_session_is_return() reference optional Building without CONFIG_BPF_EVENTS produces a build-time warning: WARN: resolve_btfids: unresolved symbol bpf_session_is_return The function is actually defined in kernel/trace/bpf_trace.o, which is built conditionally based on configuration. Make the reference to this function conditional as well, as is already done in the bpf verifier for other functions. Fixes: 8fe4dc4f6456 ("bpf: change prototype of bpf_session_{cookie,is_return}") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20260515113242.2706303-1-arnd@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 69d75515ed3f..88b40c979b56 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11263,7 +11263,11 @@ BTF_ID(func, bpf_task_work_schedule_resume) BTF_ID(func, bpf_arena_alloc_pages) BTF_ID(func, bpf_arena_free_pages) BTF_ID(func, bpf_arena_reserve_pages) +#ifdef CONFIG_BPF_EVENTS BTF_ID(func, bpf_session_is_return) +#else +BTF_ID_UNUSED +#endif BTF_ID(func, bpf_stream_vprintk) BTF_ID(func, bpf_stream_print_stack) -- cgit v1.2.3 From ccd25890f73c082fe2657ed227b497d6ac5fdc40 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2026 10:19:09 -0600 Subject: io_uring/net: punt IORING_OP_BIND async if it needs file create For two reasons: 1) An opcode cannot block inside io_uring_enter() doing submissions, as it'll stall the submission side pipeline. 2) Ending up in sb_start_write() -> __sb_start_write() -> percpu_down_read_freezable() introduces a new lockdep edge, which it correctly complains about. Check if the socket type is AF_UNIX and has a non-empty pathname. If it does, mark it REQ_F_FORCE_ASYNC to punt the submission to io-wq rather than attempt to do it inline. Fixes: 7481fd93fa0a ("io_uring: Introduce IORING_OP_BIND") Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- io_uring/net.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 30cd22c0b934..8df15b639358 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -1799,11 +1800,29 @@ out: return IOU_COMPLETE; } +/* + * Check if bind request would potentially end up with filename_create(), + * which in turn end up in mnt_want_write() which will grab the fs + * percpu start write sem. This can trigger a lockdep warning. + */ +static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len) +{ + const struct sockaddr_un *sun; + + if (io->addr.ss_family != AF_UNIX) + return 0; + if (addr_len <= offsetof(struct sockaddr_un, sun_path)) + return 0; + sun = (const struct sockaddr_un *) &io->addr; + return sun->sun_path[0] != '\0'; +} + int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); struct sockaddr __user *uaddr; struct io_async_msghdr *io; + int ret; if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; @@ -1814,7 +1833,12 @@ int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) io = io_msg_alloc_async(req); if (unlikely(!io)) return -ENOMEM; - return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); + ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); + if (unlikely(ret)) + return ret; + if (io_bind_file_create(io, bind->addr_len)) + req->flags |= REQ_F_FORCE_ASYNC; + return 0; } int io_bind(struct io_kiocb *req, unsigned int issue_flags) -- cgit v1.2.3 From ed831e7ea1a860bdbab3eadeb95f7f73e9d212df Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 May 2026 09:45:37 -0700 Subject: PCI: brcmstb: Assign pcie->gen from of_pci_get_max_link_speed() After commit 03f920936977 ("PCI: controller: Validate max-link-speed"), pcie->gen stopped being assigned and as a result the established PCIe link would stop supporting Gen3 speeds on 2712 since pcie->gen is used to populate LnkCntl2 and LnkCap in brcm_pcie_set_gen(). If the 'max-link-speed' property is not specified, or it exceeds Gen3, resort to the HW defaults. Link: https://github.com/raspberrypi/linux/issues/7343 Reported-by: Dom Cobley Reported-by: Phil Elwell Fixes: 03f920936977 ("PCI: controller: Validate max-link-speed") Signed-off-by: Florian Fainelli Signed-off-by: Bjorn Helgaas Reviewed-by: Hans Zhang <18255117159@163.com> Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260506164537.103196-1-florian.fainelli@broadcom.com --- drivers/pci/controller/pcie-brcmstb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 714bcab97b60..08a0e7091ced 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -2072,8 +2072,10 @@ static int brcm_pcie_probe(struct platform_device *pdev) return PTR_ERR(pcie->clk); ret = of_pci_get_max_link_speed(np); - if (pcie_get_link_speed(ret) == PCI_SPEED_UNKNOWN) + if (ret < 0 || ret > 3) pcie->gen = 0; + else + pcie->gen = ret; pcie->ssc = of_property_read_bool(np, "brcm,enable-ssc"); -- cgit v1.2.3 From 96350db80e0acd733e9b9ef61c0d910790b27289 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 May 2026 12:57:09 +0200 Subject: ring-buffer remote: Avoid unexpected symbol warnings (arm, s390) The now more verbose check found more architecture specific symbol missing from the whitelist, during randconfig testing on s390 and 32-bit arm: Unexpected symbols in kernel/trace/simple_ring_buffer.o: U __aeabi_unwind_cpp_pr1 Unexpected symbols in kernel/trace/simple_ring_buffer.o: U __s390_indirect_jump_r1 U __s390_indirect_jump_r10 U __s390_indirect_jump_r14 U __s390_indirect_jump_r2 U __s390_indirect_jump_r5 U __s390_indirect_jump_r7 U __s390_indirect_jump_r8 U __s390_indirect_jump_r9 make[6]: *** [/home/arnd/arm-soc/kernel/trace/Makefile:160: kernel/trace/simple_ring_buffer.o.checked] Error 1 Add these to the list and keep it roughly sorted into sanitizer and architecture symbols. Cc: Masami Hiramatsu Cc: Marc Zyngier Cc: Nathan Chancellor Cc: Vincent Donnefort Cc: Mathieu Desnoyers Cc: Paolo Bonzini Link: https://patch.msgid.link/20260515105717.1023007-1-arnd@kernel.org Fixes: 1211907ac0b5 ("tracing: Generate undef symbols allowlist for simple_ring_buffer") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 1decdce8cbef..9b0834134cae 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -143,8 +143,8 @@ obj-$(CONFIG_TRACE_REMOTE_TEST) += remote_test.o targets += undefsyms_base.o KASAN_SANITIZE_undefsyms_base.o := y -UNDEFINED_ALLOWLIST = __asan __gcov __kasan __kcsan __hwasan __sancov __sanitizer __tsan __ubsan __x86_indirect_thunk \ - __msan simple_ring_buffer \ +UNDEFINED_ALLOWLIST = __asan __gcov __kasan __kcsan __hwasan __sancov __sanitizer __tsan __ubsan __msan \ + __aeabi_unwind_cpp __s390_indirect_jump __x86_indirect_thunk simple_ring_buffer \ $(shell $(NM) -u $(obj)/undefsyms_base.o 2>/dev/null | awk '{print $$2}') quiet_cmd_check_undefined = NM $< -- cgit v1.2.3 From 915fab69823a14c170dbaa3b41978768e0fe62fc Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 12 May 2026 16:51:14 -0400 Subject: ipv4: raw: reject IP_HDRINCL packets with ihl < 5 raw_send_hdrinc() validates that the caller-supplied IPv4 header fits within the message length: iphlen = iph->ihl * 4; err = -EINVAL; if (iphlen > length) goto error_free; if (iphlen >= sizeof(*iph)) { /* fix up saddr, tot_len, id, csum, transport_header */ } It does not, however, reject ihl < 5. For such a packet the "if (iphlen >= sizeof(*iph))" branch is skipped, leaving the crafted iphdr untouched, but the packet is still handed to __ip_local_out() and onward. Downstream consumers that read iph->ihl assume a sane value: net/ipv4/ah4.c:ah_output() in particular subtracts sizeof(struct iphdr) from top_iph->ihl * 4 and passes the (signed-int-negative, then cast to size_t) result to memcpy(), producing an OOB access of length close to SIZE_MAX and a host kernel panic. An IPv4 header with ihl < 5 is malformed by definition (RFC 791: "Internet Header Length is the length of the internet header in 32 bit words ... Note that the minimum value for a correct header is 5."). The kernel should not be willing to inject such a packet into its own output path. Reject "iphlen < sizeof(*iph)" alongside the existing "iphlen > length" check. This matches the principle that locally constructed packets that re-enter the IP stack must pass the same basic sanity tests that a foreign packet would be subjected to. Once this lands, the "if (iphlen >= sizeof(*iph))" wrapper around the fixup branch becomes redundant; left in place to keep the patch minimal and backport-friendly. A follow-up can unwrap it. Note that commit 86f4c90a1c5c ("ipv4, ipv6: ensure raw socket message is big enough to hold an IP header") ensures the message buffer is large enough to hold an iphdr, but does not constrain the self-reported iph->ihl. Reachability: the malformed packet source is any caller with CAP_NET_RAW, including an unprivileged process in a user+net namespace on a kernel with CONFIG_USER_NS=y. The reproduced AH crash also requires a matching xfrm AH policy on the outgoing route; a container granted CAP_NET_ADMIN can install that state and policy in its netns. Loopback bypasses xfrm_output, so the trigger uses a real netdev. Reproduced on UML + KASAN: kernel-mode fault at addr 0x0 with memcpy_orig at the crash site. Same shape reproduces inside a rootless Docker container with --cap-add NET_ADMIN on a stock distro kernel. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Suggested-by: Herbert Xu Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/77ec2b5e8111961c2c39883c92e8aa2709039c17.1778614451.git.michael.bommarito@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/raw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5aaf9c62c8e1..68e88cb3e55c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -391,7 +391,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, * in, reject the frame as invalid */ err = -EINVAL; - if (iphlen > length) + if (iphlen > length || iphlen < sizeof(*iph)) goto error_free; if (iphlen >= sizeof(*iph)) { -- cgit v1.2.3 From dc366607c41c45fd0ae6f3db090f31dd611b644a Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Wed, 13 May 2026 12:30:50 +0800 Subject: drm: Replace old pointer to new idr Commit 5e28b7b94408 introduced a logical error by failing to replace the newly generated IDR pointer to old id's pointer at the correct location within the "change handle" logic; this resulted in the issue reported by syzbot [1]. Specifically, the new IDR object pointer is intended to replace the original id's pointer during the normal execution flow. Additionally, an unnecessary conditional check for the ret exit path has been removed. [1] !RB_EMPTY_ROOT(&prime_fpriv->dmabufs) WARNING: drivers/gpu/drm/drm_prime.c:224 at drm_prime_destroy_file_private+0x48/0x60 drivers/gpu/drm/drm_prime.c:224, CPU#0: syz.0.17/5833 Call Trace: drm_file_free.part.0+0x7e6/0xcc0 drivers/gpu/drm/drm_file.c:269 drm_file_free drivers/gpu/drm/drm_file.c:237 [inline] drm_close_helper.isra.0+0x186/0x200 drivers/gpu/drm/drm_file.c:290 drm_release+0x1ab/0x360 drivers/gpu/drm/drm_file.c:438 Fixes: 5e28b7b94408 ("drm: Set old handle to NULL before prime swap in change_handle") Reported-by: syzbot+d7c9eed171647e421013@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d7c9eed171647e421013 Cc: stable@vger.kernel.org Tested-by: syzbot+d7c9eed171647e421013@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Dave Airlie Link: https://patch.msgid.link/tencent_C267296443AAA4567771176886DFF364A305@qq.com --- drivers/gpu/drm/drm_gem.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 51a887cc7fd7..8afab57fc055 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1067,17 +1067,12 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, spin_unlock(&file_priv->table_lock); - if (ret < 0) - goto out_unlock; - if (obj->dma_buf) { ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, handle); if (ret < 0) { spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); - idrobj = idr_replace(&file_priv->object_idr, obj, handle); - WARN_ON(idrobj != NULL); spin_unlock(&file_priv->table_lock); goto out_unlock; } @@ -1089,7 +1084,9 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, args->handle); + idrobj = idr_replace(&file_priv->object_idr, obj, handle); spin_unlock(&file_priv->table_lock); + WARN_ON(idrobj != NULL); out_unlock: mutex_unlock(&file_priv->prime.lock); -- cgit v1.2.3 From cfd08f09723c5408eb3025b945fff08a99343911 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 13 May 2026 15:45:18 +0300 Subject: IB/IPoIB: ndo_set_rx_mode_async conversion The commit in the fixes tag added a warning for devices that are netdev ops locked that they should be converted to .ndo_set_rx_mode_async. IPoIB for mlx5 is such a driver which was missed during the conversion because the flow is more complex: - mlx5 part of IPoIB device was converted to ops-lock in commit [1]. - ipoib_intf_init() then overrides netdev_ops with ipoib_netdev_ops_{pf,vf}, which still wired ndo_set_rx_mode to the legacy sync path -- tripping the new warning on every probe. So now we have the following splat: netdevice: ib0 (uninitialized): ops-locked drivers should use ndo_set_rx_mode_async WARNING: net/core/dev.c:11366 at register_netdevice+0x83c/0x21d0 ... register_netdev+0x1f/0x40 ipoib_add_one+0x35c/0x880 [ib_ipoib] This patch implements .ndo_set_rx_mode_async but it simply schedules the multicast restart task like before. This is done to maintain the assumption that this task and others [2] must run on the same order workqueue to avoid racing with themselves. The race between ipoib_mcast_join_task() and ipoib_mcast_restart_task() would be the most obvious example. [1] 8f7b00307bf1, "net/mlx5e: Convert mlx5 netdevs to instance locking") [2] ipoib_mcast_join_task, ipoib_mcast_restart_task, ipoib_mcast_carrier_on_task, ipoib_reap_ah, ipoib_reap_neigh Fixes: 3cbd22938877 ("net: warn ops-locked drivers still using ndo_set_rx_mode") Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Acked-by: Jason Gunthorpe Link: https://patch.msgid.link/20260513124519.3357165-1-dtatulea@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 402671567736..3e1e1e861739 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1297,7 +1297,9 @@ static int ipoib_hard_header(struct sk_buff *skb, return IPOIB_HARD_LEN; } -static void ipoib_set_mcast_list(struct net_device *dev) +static void ipoib_set_rx_mode_async(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -2160,7 +2162,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_fix_features = ipoib_fix_features, .ndo_start_xmit = ipoib_start_xmit, .ndo_tx_timeout = ipoib_timeout, - .ndo_set_rx_mode = ipoib_set_mcast_list, + .ndo_set_rx_mode_async = ipoib_set_rx_mode_async, .ndo_get_iflink = ipoib_get_iflink, .ndo_set_vf_link_state = ipoib_set_vf_link_state, .ndo_get_vf_config = ipoib_get_vf_config, @@ -2183,7 +2185,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_fix_features = ipoib_fix_features, .ndo_start_xmit = ipoib_start_xmit, .ndo_tx_timeout = ipoib_timeout, - .ndo_set_rx_mode = ipoib_set_mcast_list, + .ndo_set_rx_mode_async = ipoib_set_rx_mode_async, .ndo_get_iflink = ipoib_get_iflink, .ndo_get_stats64 = ipoib_get_stats, .ndo_eth_ioctl = ipoib_ioctl, -- cgit v1.2.3 From c0bf0a4f3f1f5f57aa83e1400ba4f56f0abfd542 Mon Sep 17 00:00:00 2001 From: Sam Daly Date: Wed, 13 May 2026 18:42:53 +0200 Subject: octeontx2-af: CGX: add bounds check to cgx_speed_mbps index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cgx_speed_mbps has 13 elements but RESP_LINKSTAT_SPEED can yield values 0-15. If it returns a value >= 13, this causes an out-of-bounds array access. Add a bounds check and default to speed 0 if the index is out of range. Fixes: 61071a871ea6 ("octeontx2-af: Forward CGX link notifications to PFs") Cc: Sunil Goutham Cc: Linu Cherian Cc: Geetha sowjanya Cc: hariprasad Cc: Subbaraya Sundeep Cc: Andrew Lunn Cc: stable Signed-off-by: Sam Daly Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026051352-refined-demise-e88d@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 4f33a816bc7a..2e94d5105016 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1294,13 +1294,18 @@ static inline void link_status_user_format(u64 lstat, struct cgx_link_user_info *linfo, struct cgx *cgx, u8 lmac_id) { + unsigned int speed; + linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); - linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat); linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat); linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat); + speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat); + linfo->speed = speed < ARRAY_SIZE(cgx_speed_mbps) ? + cgx_speed_mbps[speed] : 0; + if (linfo->lmac_type_id >= LMAC_MODE_MAX) { dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d", linfo->lmac_type_id, cgx->cgx_id, lmac_id); -- cgit v1.2.3 From ae38d9179190a956e2a87a69ef1dd6f451b51c4d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 May 2026 11:29:48 +0200 Subject: vsock/virtio: fix zerocopy completion for multi-skb sends When a large message is fragmented into multiple skbs, the zerocopy uarg is only allocated and attached to the last skb in the loop. Non-final skbs carry pinned user pages with no completion tracking, so the kernel has no way to notify userspace when those pages are safe to reuse. If the loop breaks early the uarg is never allocated at all, leaking pinned pages with no completion notification. Fix this by following the approach used by TCP: allocate the zerocopy uarg (if not provided by the caller) before the send loop and attach it to every skb via skb_zcopy_set(), which takes a reference per skb. Each skb's completion properly decrements the refcount, and the notification only fires after the last skb is freed. On failure, if no data was sent, the uarg is cleanly aborted via net_zcopy_put_abort(). This issue was initially discovered by sashiko while reviewing commit 1cb36e252211 ("vsock/virtio: fix MSG_ZEROCOPY pinned-pages accounting") but was pre-existing. Fixes: 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") Closes: https://sashiko.dev/#/patchset/20260420132051.217589-1-sgarzare%40redhat.com Reported-by: Maher Azzouzi Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Acked-by: Arseniy Krasnov Link: https://patch.msgid.link/20260514092948.268720-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 83 ++++++++++++++------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 989cc252d3d3..1e3409d28164 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -70,34 +70,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, return true; } -static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, - struct sk_buff *skb, - struct msghdr *msg, - size_t pkt_len, - bool zerocopy) -{ - struct ubuf_info *uarg; - - if (msg->msg_ubuf) { - uarg = msg->msg_ubuf; - net_zcopy_get(uarg); - } else { - struct ubuf_info_msgzc *uarg_zc; - - uarg = msg_zerocopy_realloc(sk_vsock(vsk), - pkt_len, NULL, false); - if (!uarg) - return -1; - - uarg_zc = uarg_to_msgzc(uarg); - uarg_zc->zerocopy = zerocopy ? 1 : 0; - } - - skb_zcopy_init(skb, uarg); - - return 0; -} - static int virtio_transport_fill_skb(struct sk_buff *skb, struct virtio_vsock_pkt_info *info, size_t len, @@ -317,8 +289,10 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; + struct ubuf_info *uarg = NULL; u32 pkt_len = info->pkt_len; bool can_zcopy = false; + bool have_uref = false; u32 rest_len; int ret; @@ -360,6 +334,25 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (can_zcopy) max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, (MAX_SKB_FRAGS * PAGE_SIZE)); + + if (info->msg->msg_flags & MSG_ZEROCOPY && + info->op == VIRTIO_VSOCK_OP_RW) { + uarg = info->msg->msg_ubuf; + + if (!uarg) { + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + pkt_len, NULL, false); + if (!uarg) { + virtio_transport_put_credit(vvs, pkt_len); + return -ENOMEM; + } + + if (!can_zcopy) + uarg_to_msgzc(uarg)->zerocopy = 0; + + have_uref = true; + } + } } rest_len = pkt_len; @@ -378,27 +371,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } - /* We process buffer part by part, allocating skb on - * each iteration. If this is last skb for this buffer - * and MSG_ZEROCOPY mode is in use - we must allocate - * completion for the current syscall. - * - * Pass pkt_len because msg iter is already consumed - * by virtio_transport_fill_skb(), so iter->count - * can not be used for RLIMIT_MEMLOCK pinned-pages - * accounting done by msg_zerocopy_realloc(). - */ - if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && - skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { - if (virtio_transport_init_zcopy_skb(vsk, skb, - info->msg, - pkt_len, - can_zcopy)) { - kfree_skb(skb); - ret = -ENOMEM; - break; - } - } + skb_zcopy_set(skb, uarg, NULL); virtio_transport_inc_tx_pkt(vvs, skb); @@ -422,6 +395,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, virtio_transport_put_credit(vvs, rest_len); + /* msg_zerocopy_realloc() initializes the ubuf_info refcnt to 1. + * skb_zcopy_set() increases it for each skb, so we can drop that + * initial reference to keep it balanced. + */ + if (have_uref) { + if (rest_len == pkt_len) + /* No data sent, abort the notification. */ + net_zcopy_put_abort(uarg, true); + else + net_zcopy_put(uarg); + } + /* Return number of bytes, if any data has been sent. */ if (rest_len != pkt_len) ret = pkt_len - rest_len; -- cgit v1.2.3 From 080ecbd05432970a8df1f70586b925a18b5ea6f4 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 8 May 2026 21:42:11 +0800 Subject: btrfs: mark file extent range dirty after converting prealloc extents When writing into a preallocated extent, ordered extent completion calls btrfs_mark_extent_written() to convert the file extent item from the BTRFS_FILE_EXTENT_PREALLOC type to the BTRFS_FILE_EXTENT_REG type. If the preallocated extent was created beyond i_size with fallocate keep-size, and the inode is evicted and loaded again before the write, the inode's file_extent_tree is initialized only up to i_size. The beyond i_size prealloc extent is therefore not tracked there. After a write into that extent extends i_size, btrfs_mark_extent_written() updates the file extent item, but the corresponding range is not marked dirty in the inode's file_extent_tree. This can leave disk_i_size stale when the filesystem does not use the no-holes feature, so after remount the file size can go back to the old value. The following reproducer triggers the problem: $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f -O ^no-holes $DEV mount $DEV $MNT touch $MNT/file fallocate -n -l 2M $MNT/file umount $MNT mount $DEV $MNT dd if=/dev/zero of=$MNT/file bs=1M count=1 conv=notrunc ls -lh $MNT/file umount $MNT mount $DEV $MNT ls -lh $MNT/file umount $MNT Running the reproducer gives the following result: $ ./test.sh (...) 1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.000596024 s, 1.8 GB/s -rw-rw-r-- 1 root root 1.0M May 8 16:34 /mnt/sdi/file -rw-rw-r-- 1 root root 0 May 8 16:34 /mnt/sdi/file Fix this by marking the written range dirty in the inode's file_extent_tree after successfully converting the prealloc extent to a regular extent. Fixes: 9ddc959e802b ("btrfs: use the file extent tree infrastructure") Reviewed-by: Filipe Manana Signed-off-by: Robbie Ko [ Minor change log updates ] Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cf1cb5c4db75..8c171ed07008 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -633,7 +633,7 @@ again: trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, end - other_start); - return 0; + goto mark_dirty; } } @@ -661,7 +661,7 @@ again: other_end - start); btrfs_set_file_extent_offset(leaf, fi, start - orig_offset); - return 0; + goto mark_dirty; } } @@ -788,7 +788,12 @@ again: } } - return 0; +mark_dirty: + ret = btrfs_inode_set_file_extent_range(inode, start, end - start); + if (ret) + btrfs_abort_transaction(trans, ret); + + return ret; } /* -- cgit v1.2.3 From 975e63c7a8074d83e195577b7f76dadc9a3d14b7 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 8 May 2026 13:11:26 -0700 Subject: btrfs: always drop root->inodes lock before cond_resched() find_first_inode() and find_first_inode_to_shrink() lock root->inodes, then loop over them, occasionally skipping some inodes. When they skip an inode, they attempt to share the cpu/lock with cond_resched_lock(). However, that has a subtle problem associated with it. cond_resched_lock() only drops the lock if it needs to actually call schedule(). With CONFIG_PREEMPT_NONE, this means the full timeslice as detected at ticks. With 8+ cpus and default tunables, this is 2.8ms. So regardless of HZ, we will run for at least 2.8ms in this loop without dropping the lock, assuming it finds no suitable inodes. If HZ is small enough, it might be even worse as the tick granularity becomes bigger than the timeslice. The knock-on effect of this is that callers to btrfs_del_inode_from_root() like kswapd trying to shrink the inode slab or userspace threads calling evict() will spin on xa_lock(&root->inodes) for 2.8ms, so the extent map shrinker dominates the lock even though ostensibly it is intending to share it. This produces memory pressure as there is only one kswapd and it runs sequentially so it can get stuck in the inode slab shrinking. To fix it, simply replace cond_resched_lock() with an open coded variant which unconditionally does unlock/lock around cond_resched. Sharing the lock is decoupled from sharing the CPU, and all the users of the lock now share it fairly. I was able to reproduce this on test systems by producing a lot of empty files (to make a big root->inodes xarray), then producing memory pressure by reading large files larger than ram, triggering kswapd and the extent_map shrinker. The lock contention is visible with perf or lockstat. This patch also relieved a user-apparent bottleneck on a production system from the original report. Tested-by: Rik van Riel Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 4 +++- fs/btrfs/inode.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 095a561d733f..fa9d183f4f86 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1246,7 +1246,9 @@ static struct btrfs_inode *find_first_inode_to_shrink(struct btrfs_root *root, write_unlock(&tree->lock); next: from = btrfs_ino(inode) + 1; - cond_resched_lock(&root->inodes.xa_lock); + xa_unlock(&root->inodes); + cond_resched(); + xa_lock(&root->inodes); } xa_unlock(&root->inodes); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 75136a172710..1ca1cbdf25bc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10699,7 +10699,9 @@ struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino) break; from = btrfs_ino(inode) + 1; - cond_resched_lock(&root->inodes.xa_lock); + xa_unlock(&root->inodes); + cond_resched(); + xa_lock(&root->inodes); } xa_unlock(&root->inodes); -- cgit v1.2.3 From 1e92637722ae4bd417f7a37e8d1485dc23b93935 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Mon, 11 May 2026 13:07:11 -0700 Subject: btrfs: check for subvolume before deleting squota qgroup The invariant that we want to maintain with subvolume qgroups is that the qgroup can only be deleted if there is no root. With squotas, we thought that it was sufficient to just check the usage, because we assumed that deleting a subvolume will drive it's qgroups usage to 0, and thus 0 usage implies no subvolume. However, this is false, for two reasons: - A subvol whose extents are all from before squotas was enabled. - A subvol that was created in this transaction and for which we have not yet run any delayed refs. In both cases, deleting the qgroup breaks the desired invariant and we are left with a subvolume with no qgroup but squotas are enabled. Fix this by unifying the deletion check logic between full qgroups and squotas. Squotas do all the same checks *and* the additional usage == 0 check, which is the one extra rule peculiar to squotas. Link: https://lore.kernel.org/linux-btrfs/adnBhWfJQ1n3hZC8@merlins.org/ Fixes: a8df35619948 ("btrfs: forbid deleting live subvol qgroup") Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cdf736d3a4e5..86c036a089f6 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1715,32 +1715,24 @@ out: return ret; } -static bool can_delete_parent_qgroup(struct btrfs_qgroup *qgroup) - +static bool can_delete_parent_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) { ASSERT(btrfs_qgroup_level(qgroup->qgroupid)); + if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) + squota_check_parent_usage(fs_info, qgroup); return list_empty(&qgroup->members); } /* - * Return true if we can delete the squota qgroup and false otherwise. - * - * Rules for whether we can delete: - * - * A subvolume qgroup can be removed iff the subvolume is fully deleted, which - * is iff there is 0 usage in the qgroup. - * - * A higher level qgroup can be removed iff it has no members. - * Note: We audit its usage to warn on inconsitencies without blocking deletion. + * Because a shared extent can outlive its owning subvolume, we cannot delete a + * subvol squota qgroup until all of the extents it owns are gone, even if the + * subvolume itself has been deleted. */ -static bool can_delete_squota_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) +static bool can_delete_squota_subvol_qgroup(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup *qgroup) { ASSERT(btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE); - - if (btrfs_qgroup_level(qgroup->qgroupid) > 0) { - squota_check_parent_usage(fs_info, qgroup); - return can_delete_parent_qgroup(qgroup); - } + ASSERT(btrfs_qgroup_level(qgroup->qgroupid) == 0); return !(qgroup->rfer || qgroup->excl || qgroup->rfer_cmpr || qgroup->excl_cmpr); } @@ -1754,14 +1746,11 @@ static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup { struct btrfs_key key; BTRFS_PATH_AUTO_FREE(path); - - /* Since squotas cannot be inconsistent, they have special rules for deletion. */ - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) - return can_delete_squota_qgroup(fs_info, qgroup); + int ret; /* For higher level qgroup, we can only delete it if it has no child. */ if (btrfs_qgroup_level(qgroup->qgroupid)) - return can_delete_parent_qgroup(qgroup); + return can_delete_parent_qgroup(fs_info, qgroup); /* * For level-0 qgroups, we can only delete it if it has no subvolume @@ -1777,10 +1766,21 @@ static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup return -ENOMEM; /* - * The @ret from btrfs_find_root() exactly matches our definition for - * the return value, thus can be returned directly. + * Any subvol qgroup, regardless of mode, cannot be deleted if the + * subvol still exists. + */ + ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL); + /* + * btrfs_find_root returns <0 on error, 0 if found, and >0 if not, + * so the "found" and "error" cases match our desired return values. */ - return btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL); + if (ret <= 0) + return ret; + + /* Squotas require additional checks, even if the subvol is deleted. */ + if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) + return can_delete_squota_subvol_qgroup(fs_info, qgroup); + return 1; } int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) -- cgit v1.2.3 From d7c600554816b8ef70adffe078a0e360c055d82b Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Mon, 11 May 2026 19:53:46 -0700 Subject: btrfs: fix squota accounting during enable generation The first transaction that enables squotas is special and a bit tricky. We have to set BTRFS_FS_QUOTA_ENABLED after the transaction to avoid a deadlock, so any delayed refs that run before we set the bit are not squota accounted. For data this is fine, we don't get an owner_ref, so there is no real harm, it's as if the extent predated squotas. However for metadata, the tree block will have gen == enable_gen so when we free it later, we will decrement the squota accounting, which can result in an underflow. Before it is freed, btrfs check shows errors, as we have mismatched usage between the node generations/owners and the squota values. There are two angles to this fix: 1. For extents that come in delayed_refs that run during the enable_gen transaction, we must actually set enable_gen to the *next* transaction. That is the first transaction that we can really properly account in any way. 2. For extents that come in between the end of our transaction handle and the time we set the BTRFS_FS_QUOTA_ENABLED bit, we need an additional bit, BTRFS_FS_SQUOTA_ENABLING which only affects recording squota deltas, so we do pick up those extents. Otherwise, we would miss them, even for enable_gen + 1. Fixes: bd7c1ea3a302 ("btrfs: qgroup: check generation when recording simple quota delta") Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.h | 1 + fs/btrfs/qgroup.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index a4758d94b32e..a8aa086a4df8 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -155,6 +155,7 @@ enum { BTRFS_FS_LOG_RECOVERING, BTRFS_FS_OPEN, BTRFS_FS_QUOTA_ENABLED, + BTRFS_FS_SQUOTA_ENABLING, BTRFS_FS_UPDATE_UUID_TREE_GEN, BTRFS_FS_CREATING_FREE_SPACE_TREE, BTRFS_FS_BTREE_ERR, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 86c036a089f6..5f33727a7972 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1107,7 +1107,13 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info, if (simple) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE; btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA); - btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid); + /* + * Set the enable generation to the next transaction, as we cannot + * ensure that extents written during this transaction will see any + * state we have set here. So we should treat all extents of the + * transaction as coming in before squotas was enabled. + */ + btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid + 1); } else { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; } @@ -1210,7 +1216,15 @@ out_add_root: goto out_free_path; } - fs_info->qgroup_enable_gen = trans->transid; + /* + * Set fs_info->qgroup_enable_gen and BTRFS_FS_SQUOTA_ENABLING + * under the transaction handle. We want to ensure that all extents in + * the next transaction definitely see them. + */ + if (simple) { + fs_info->qgroup_enable_gen = trans->transid + 1; + set_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); + } mutex_unlock(&fs_info->qgroup_ioctl_lock); /* @@ -1224,9 +1238,15 @@ out_add_root: */ ret = btrfs_commit_transaction(trans); trans = NULL; + mutex_lock(&fs_info->qgroup_ioctl_lock); - if (ret) + if (ret) { + if (simple) { + clear_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); + fs_info->qgroup_enable_gen = 0; + } goto out_free_path; + } /* * Set quota enabled flag after committing the transaction, to avoid @@ -1236,6 +1256,8 @@ out_add_root: spin_lock(&fs_info->qgroup_lock); fs_info->quota_root = quota_root; set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + if (simple) + clear_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); spin_unlock(&fs_info->qgroup_lock); /* Skip rescan for simple qgroups. */ @@ -4922,7 +4944,8 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, u64 num_bytes = delta->num_bytes; const int sign = (delta->is_inc ? 1 : -1); - if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE && + !test_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags)) return 0; if (!btrfs_is_fstree(root)) -- cgit v1.2.3 From 99aacd195141ff77295c535388888f072ec89e82 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Mon, 11 May 2026 13:06:24 -0700 Subject: btrfs: clamp to avoid squota underflow Simple quota accounting can undercount metadata tree block allocations in certain scenarios. When an undercounted subvolume is deleted and its tree blocks freed, the free deltas decrement rfer/excl past zero, wrapping the u64 to a value near U64_MAX. Once wrapped, can_delete_squota_qgroup() sees non-zero rfer and refuses to delete the qgroup. The qgroup becomes permanently orphaned in the quota tree, since there is no subvolume left to generate frees that would bring the counter back to zero. While we ultimately want to fix any mis-accounting at the source, it is also helpful and worthwhile to mitigate the damage by clamping rfer and excl to zero on underflow rather than allowing the u64 to wrap. This at least allows us to clean up the messed up qgroups on subvol deletion. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5f33727a7972..e9e7091e1452 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4967,8 +4967,19 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, list_for_each_entry(qg, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; - qg->excl += num_bytes * sign; - qg->rfer += num_bytes * sign; + ASSERT(qg->excl == qg->rfer); + if (WARN_ON_ONCE(sign < 0 && qg->excl < num_bytes)) { + btrfs_warn(fs_info, + "squota underflow qg %hu/%llu excl %llu num_bytes %llu", + btrfs_qgroup_level(qg->qgroupid), + btrfs_qgroup_subvolid(qg->qgroupid), + qg->excl, num_bytes); + qg->excl = 0; + qg->rfer = 0; + } else { + qg->excl += num_bytes * sign; + qg->rfer += num_bytes * sign; + } qgroup_dirty(fs_info, qg); list_for_each_entry(glist, &qg->groups, next_group) -- cgit v1.2.3 From f13342e15deafb7538a7a8577ed5f4c33c56f64e Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 12 May 2026 09:55:28 -0700 Subject: btrfs: swallow btrfs_record_squota_delta() ENOENT I thought that it was likely I could harden squota deletion to the point that it was impossible to end up with an extent accounted to a qgroup outliving its qgroup. Several recent bugs have made me re-consider that position. Ultimately, this is a tradeoff between short term stability and long term strictness, but I think given that there could be another layer of bugs behind the 2-3 I just fixed, I would feel much more confident in people using squotas if the risk was "your values can get a bit out of whack which you can fix by deleting stuff or disabling/re-enabling/repairing" vs "it will abort your filesystem". As the final nail in the coffin, the Meta production kernel was lacking earlier fixes from me and Qu regarding subvol qgroup lifetime, so this is what we have been testing at scale, so I think at least for now upstream should have the same extra layer of protection. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e9e7091e1452..6838faceb6d5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4957,8 +4957,9 @@ int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, spin_lock(&fs_info->qgroup_lock); qgroup = find_qgroup_rb(fs_info, root); - if (!qgroup) { - ret = -ENOENT; + if (WARN_ON_ONCE(!qgroup)) { + btrfs_warn(fs_info, "squota failed to find qgroup for root %llu", root); + ret = 0; goto out; } -- cgit v1.2.3 From aaec7096f9961eb223b5b149abe9495525c205d9 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Wed, 13 May 2026 19:38:38 -0400 Subject: net: hsr: defer node table free until after RCU readers HSR node-list and node-status generic-netlink operations run under rcu_read_lock(). They walk hsr->node_db through hsr_get_next_node() and hsr_get_node_data(), but RTM_DELLINK teardown removes the same node table with plain list_del() and frees each node immediately. That lets a generic-netlink reader hold a struct hsr_node pointer across hsr_dellink(). In a KASAN build, widening the reader window after hsr_get_next_node() obtains the node reproduces a slab-use-after-free when the reader copies node->macaddress_A; the freeing stack is hsr_del_nodes() from hsr_dellink(). Use list_del_rcu() and defer the free through the existing hsr_free_node_rcu() callback. This matches the lifetime rule used by the HSR prune paths, which already delete nodes with list_del_rcu() and call_rcu(). Fixes: b9a1e627405d ("hsr: implement dellink to clean up resources") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260513233838.3064715-2-michael.bommarito@gmail.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_framereg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 124619920d38..b514e43766ef 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -163,8 +163,8 @@ void hsr_del_nodes(struct list_head *node_db) struct hsr_node *tmp; list_for_each_entry_safe(node, tmp, node_db, mac_list) { - list_del(&node->mac_list); - hsr_free_node(node); + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, hsr_free_node_rcu); } } -- cgit v1.2.3 From 7e68ba282165b8880d11eac8a816d54d449b7d80 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 14 May 2026 09:06:07 +0000 Subject: ASoC: qcom: q6apm-dai: Allocate an extra page for PCM buffers Some Old DSP firmware versions use 32-bit address arithmetic and size for validating the PCM buffer address range. If a buffer is allocated near the top of the 32-bit address space, arithmetic calculations involving the end address can overflow and fail checks. Work around this by increasing the preallocated PCM buffer size by one page. The DSP is still passed the usable buffer size, excluding the extra page, which prevents the firmware from seeing an end address that crosses the 32-bit boundary. This was not hit before because PCM buffer allocation and DSP-side mapping happened at different points, and the size mapped on the DSP was usually nperiods * period_size. Therefore the mapped size was unlikely to match the full preallocated buffer size exactly, although the issue was still possible. With early buffer mapping on the DSP, the full preallocated buffer is mapped during PCM creation, making the failure reproducible at boot. Fixes: 8ea6e25c8536 ("ASoC: qcom: q6apm: Add support for early buffer mapping on DSP") Cc: Stable@vger.kernel.org Reported-by: Jens Glathe Closes: https://lore.kernel.org/all/7f10abbd-fb78-4c3a-ab90-7ca78239891a@oldschoolsolutions.biz/ Signed-off-by: Srinivas Kandagatla Tested-by: Jens Glathe Link: https://patch.msgid.link/20260514090607.2435484-1-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-dai.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index ede19fdea6e9..3a1be41df096 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -497,7 +497,12 @@ static int q6apm_dai_pcm_new(struct snd_soc_component *component, struct snd_soc { struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0); struct snd_pcm *pcm = rtd->pcm; - int size = BUFFER_BYTES_MAX; + /* + * Allocate one extra page as a workaround for a DSP bug where 32-bit + * address arithmetic can overflow when the buffer is placed near the + * end of the addressable range. + */ + int size = BUFFER_BYTES_MAX + PAGE_SIZE; int graph_id, ret; struct snd_pcm_substream *substream; -- cgit v1.2.3 From 1afd8f06dcb1d561af3b239c5b14a88b87c13454 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Mon, 11 May 2026 13:42:02 -0300 Subject: ASoC: amd: acp-sdw-legacy: check CPU DAI name before logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_kasprintf() can fail and return NULL. The legacy AMD SoundWire machine driver logs cpus->dai_name before checking the allocation result. Move the debug print after the NULL check, matching the ordering used by the SOF AMD SoundWire path after commit 5726b68473f7 ("ASoC: amd/sdw_utils: avoid NULL deref when devm_kasprintf() fails"). Fixes: 2981d9b0789c ("ASoC: amd: acp: add soundwire machine driver for legacy stack") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260511-asoc-amd-acp-sdw-legacy-dai-name-null-v1-1-dc6151b6da8a@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-legacy-mach.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index 0f21e5f64531..09b475c83c49 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c @@ -260,9 +260,9 @@ static int create_sdw_dailink(struct snd_soc_card *card, cpus->dai_name = devm_kasprintf(dev, GFP_KERNEL, "SDW%d Pin%d", link_num, cpu_pin_id); - dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); if (!cpus->dai_name) return -ENOMEM; + dev_dbg(dev, "cpu->dai_name:%s\n", cpus->dai_name); codec_maps[j].cpu = 0; codec_maps[j].codec = j; -- cgit v1.2.3 From 496ba79b9496b8b3747cbc764ebd33ee7325e806 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Sun, 10 May 2026 01:55:37 +0800 Subject: spi: mtk-snfi: Fix resource leak in mtk_snand_read_page_cache() When DMA read times out in mtk_snand_read_page_cache(), the original code erroneously jumped to cleanup label which skips DMA unmapping and ECC disable, causing a resource leak. Fixes: 764f1b748164 ("spi: add driver for MTK SPI NAND Flash Interface") Signed-off-by: Felix Gu Link: https://patch.msgid.link/20260510-snfi-v1-1-bc375cf1af8e@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-mtk-snfi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-mtk-snfi.c b/drivers/spi/spi-mtk-snfi.c index e616e6800e92..6e96e50fedad 100644 --- a/drivers/spi/spi-mtk-snfi.c +++ b/drivers/spi/spi-mtk-snfi.c @@ -961,7 +961,7 @@ static int mtk_snand_read_page_cache(struct mtk_snand *snf, &snf->op_done, usecs_to_jiffies(SNFI_POLL_INTERVAL))) { dev_err(snf->dev, "DMA timed out for reading from cache.\n"); ret = -ETIMEDOUT; - goto cleanup; + goto cleanup2; } // Wait for BUS_SEC_CNTR returning expected value -- cgit v1.2.3 From 6e4bfd9da8851f562f04503d8e43221957f96eeb Mon Sep 17 00:00:00 2001 From: Jasper Smet Date: Wed, 13 May 2026 07:21:37 +0200 Subject: ASoC: amd: acp: Add DMI quirk for ASUS Zenbook S16 UM5606GA The ASUS Zenbook S16 (UM5606GA) with AMD Ryzen AI 9 465 (Strix Point, ACP 7.0) has a BIOS that incorrectly sets the ACPI property 'acp-audio-config-flag' to 0x10 (FLAG_AMD_LEGACY_ONLY_DMIC) for the ACP device. This prevents snd_pci_ps from probing the SoundWire bus, resulting in no internal audio (dummy output only). The hardware uses a Cirrus Logic CS42L43 (headphone/jack) and four CS35L56 smart amplifiers (speakers), all on SoundWire link 1. The corresponding machine table entry (acp70_cs42l43_l1u0_cs35l56x4_l1u0123) already exists in amd-acp70-acpi-match.c and correctly describes this topology. Add a DMI quirk to override the flag to 0, consistent with the existing entry for the HN7306EA. Signed-off-by: Jasper Smet Link: https://patch.msgid.link/20260513052137.56703-1-josbeir@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/acp-config.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/acp-config.c b/sound/soc/amd/acp-config.c index 1604ed679224..309dc9ed6e0d 100644 --- a/sound/soc/amd/acp-config.c +++ b/sound/soc/amd/acp-config.c @@ -30,6 +30,13 @@ static const struct dmi_system_id acp70_acpi_flag_override_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HN7306EA"), }, }, + { + /* ASUS Zenbook S16 UM5606GA (Strix Point, ACP 7.0) */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "Zenbook S16 UM5606GA"), + }, + }, {} }; -- cgit v1.2.3 From 79d8be262377f7112cfa3088dfc4142d5a2533f3 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 15 May 2026 11:45:31 -0400 Subject: xfrm: ah: use skb_to_full_sk in async output callbacks When AH output is offloaded to an asynchronous crypto provider (hardware accelerators such as AMD CCP, or a forced-async software shim used for testing), the digest completion fires ah_output_done() / ah6_output_done() on a workqueue. The egress skb at that point may have been originated by a TCP listener sending a SYN-ACK, which sets skb->sk to a request_sock via skb_set_owner_edemux(); it may also have been originated by an inet_timewait_sock retransmit. Neither is a full struct sock, and passing the raw skb->sk to xfrm_output_resume() then forwards a non-full socket through the rest of the xfrm output chain. xfrm_output_resume() and its downstream consumers expect a full sk where they dereference at all. The natural egress path through ah_output_done() does not crash today because the consumers that read past sock_common are either gated by sk_fullsock() or short-circuit on flags that are clear on a fresh request_sock; an exhaustive walk of the 50 most plausible consumers under sch_fq, dev_queue_xmit, netfilter, tc-egress and cgroup-egress BPF found no current unguarded deref. The bug is still a real type confusion that future consumer changes could turn into a memory-corruption primitive. This is the same bug class fixed for ESP in commit 1620c88887b1 ("xfrm: Fix the usage of skb->sk"). Apply the analogous fix to AH: convert skb->sk to a full socket pointer (or NULL) via skb_to_full_sk() before handing it to xfrm_output_resume(). The same async AH callbacks were touched recently for an independent ESN-related ICV layout bug in commit ec54093e6a8f ("xfrm: ah: account for ESN high bits in async callbacks"); the sk type-confusion addressed here is orthogonal. This patch is part of an ongoing audit of the AH callback paths; an ah_output ihl-validation hardening series is also currently under review on netdev. Reproduced under UML + KASAN + lockdep with a forced-async hmac(sha1) shim that registers at priority 9999 and wraps the sync in-tree hmac-sha1-lib. With the shim loaded, ah_output_done runs on every SYN-ACK egress through a transport-mode AH SA and skb->sk arrives as a request_sock (TCP_NEW_SYN_RECV); after this patch, xfrm_output_resume() receives the listener (the result of sk_to_full_sk()) and consumer derefs land on full-sock fields as intended. Fixes: 9ab1265d5231 ("xfrm: Use actual socket sk instead of skb socket for xfrm_output_resume") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Signed-off-by: Steffen Klassert --- net/ipv4/ah4.c | 2 +- net/ipv6/ah6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 4366cbac3f06..6fd642d2278d 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -143,7 +143,7 @@ static void ah_output_done(void *data, int err) } kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } static int ah_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index de1e68199a01..76f7a2de9108 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -337,7 +337,7 @@ static void ah6_output_done(void *data, int err) ah6_restore_hdrs(top_iph, iph_ext, extlen); kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) -- cgit v1.2.3 From 1afc25ae75288b3ce59e9e5a4b448bd354c9e565 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 9 May 2026 10:27:06 +0200 Subject: netfilter: nf_conntrack_helper: fix possible null deref during error log Reported by sashiko: there is a small race window. If a helper module is unloaded or a userspace-defined helper is removed, nf_conntrack_helper_unregister() sets ->helper to NULL. Handle this safely. This needs a second patch to close related race during nf_conntrack_helper_unregister(). Fixes: b20ab9cc63ca ("netfilter: nf_ct_helper: better logging for dropped packets") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index b594cd244fe1..17e971bd4c74 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -321,8 +321,8 @@ __printf(3, 4) void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, const char *fmt, ...) { + const char *helper_name = "(null)"; const struct nf_conn_help *help; - const struct nf_conntrack_helper *helper; struct va_format vaf; va_list args; @@ -331,14 +331,17 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, vaf.fmt = fmt; vaf.va = &args; - /* Called from the helper function, this call never fails */ help = nfct_help(ct); + if (help) { + const struct nf_conntrack_helper *helper; - /* rcu_read_lock()ed by nf_hook_thresh */ - helper = rcu_dereference(help->helper); + helper = rcu_dereference(help->helper); + if (helper) + helper_name = helper->name; + } nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, - "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); + "helper %s dropping packet: %pV ", helper_name, &vaf); va_end(args); } -- cgit v1.2.3 From 5522d65d81a711c60a9969d37a485d48d0ad1496 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 10 May 2026 13:46:05 +0300 Subject: ipvs: avoid possible loop in ip_vs_dst_event on resizing Sashiko points out that unprivileged user can frequently call ip_vs_flush() or ip_vs_del_service() to trigger svc_table_changes updates that can lead to infinite loop in ip_vs_dst_event(). This can also happen if the user triggers frequent table resizing without deleting all services. We should also consider the possible effects if the user triggers many NETDEV_DOWN events. One way to solve it is to hold svc_resize_sem in ip_vs_dst_event() but this can block the dev notifier during the whole resizing process. Instead, use new rw_semaphore svc_replace_sem to protect just the svc_table replacement which is a short code section. Then hold svc_replace_sem in ip_vs_dst_event() to serialize with replacing the svc_table. As result, loop is avoided as there is no need to repeat the table walking from the start. By this way changes in svc_table_changes can happen only when all services are removed and all dev references dropped which allows us to abort the table walking. As IP_VS_WORK_SVC_NORESIZE is the flag used to stop the svc_resize_work under service_mutex, we should check only this flag often but not while under service_mutex. To remove the mutex_trylock() for service_mutex in the second phase where the resizer installs the new table after rehashing, we will avoid holding the service_mutex there. As result, the code in configuration context which is under service_mutex should access ipvs->svc_table under RCU because it can be replaced at anytime and released after a RCU grace period. As for ip_vs_zero_all(), it needs different solution as a table walker which can escape single RCU read-side critical section: to hold the svc_replace_sem to prevent table to be replaced. In ip_vs_status_show() prefer to hold svc_replace_sem to avoid many loops, just detect if the svc_table is removed. Prefer the newly attached table for the u_thresh/l_thresh checks to know when to grow/shrink while adding or deleting services because the new table size is based on the latest parameters. Link: https://sashiko.dev/#/patchset/20260505001648.360569-1-pablo%40netfilter.org Fixes: 840aac3d900d ("ipvs: use resizable hash table for services") Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 +- net/netfilter/ipvs/ip_vs_ctl.c | 187 +++++++++++++++++++++++++++-------------- 2 files changed, 124 insertions(+), 66 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 02762ce73a0c..a02e569813d2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1186,8 +1186,9 @@ struct netns_ipvs { struct timer_list dest_trash_timer; /* expiration timer */ struct mutex service_mutex; /* service reconfig */ struct rw_semaphore svc_resize_sem; /* svc_table resizing */ + struct rw_semaphore svc_replace_sem; /* svc_table replace */ struct delayed_work svc_resize_work; /* resize svc_table */ - atomic_t svc_table_changes;/* ++ on new table */ + atomic_t svc_table_changes;/* ++ on table changes */ /* Service counters */ atomic_t num_services[IP_VS_AF_MAX]; /* Services */ atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c7c7f6a7a9f6..bd9cae44d214 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -327,18 +327,22 @@ ip_vs_use_count_dec(void) /* Service hashing: * Operation Locking order * --------------------------------------------------------------------------- - * add table service_mutex, svc_resize_sem(W) - * del table service_mutex - * move between tables svc_resize_sem(W), seqcount_t(W), bit lock - * add/del service service_mutex, bit lock + * add first table service_mutex + * attach new table service_mutex + * add/del service service_mutex, RCU, bit lock + * move between tables (rehash) svc_resize_sem(W), seqcount_t(W), bit lock + * replace old with attached svc_resize_sem(W), svc_replace_sem(W) * find service RCU, seqcount_t(R) * walk services(blocking) service_mutex, svc_resize_sem(R) * walk services(non-blocking) RCU, seqcount_t(R) + * walk services(non-blocking) svc_resize_sem(R), RCU, seqcount_t(R) + * walk services(non-blocking) svc_replace_sem(R), RCU, seqcount_t(R) + * del table service_mutex after stopped work * - * - new tables are linked/unlinked under service_mutex and svc_resize_sem - * - new table is linked on resizing and all operations can run in parallel - * in 2 tables until the new table is registered as current one - * - two contexts can modify buckets: config and table resize, both in + * - new table is attached on resizing under service_mutex and all operations + * can run in parallel in 2 tables until the new table is registered as current + * one + * - two contexts can modify buckets: config and table resize (work), both in * process context * - only table resizer can move entries, so we do not protect t->seqc[] * items with t->lock[] @@ -346,9 +350,13 @@ ip_vs_use_count_dec(void) * services are moved to new table * - move operations may disturb readers: find operation will not miss entries * but walkers may see same entry twice if they are forced to retry chains - * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to hold - * service_mutex to disallow new tables to be installed or to check + * or to walk the newly attached second table + * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to check * svc_table_changes and repeat the RCU read section if new table is installed + * - walkers may serialize with the whole resizing process (svc_resize_sem) + * to prevent seeing same service twice or just with the svc_table + * replace (svc_replace_sem) when we can see entries twice but we + * prefer to run concurrently with the rehashing. */ /* @@ -387,9 +395,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* increase its refcnt because it is referenced by the svc table */ atomic_inc(&svc->refcnt); + /* We know if new table is attached under service_mutex but rely on + * RCU to hold the old table to be freed in resizer + */ + rcu_read_lock(); + + /* This can be the old or the new table */ + t = rcu_dereference(ipvs->svc_table); + /* New entries go into recent table */ - t = rcu_dereference_protected(ipvs->svc_table, 1); - t = rcu_dereference_protected(t->new_tbl, 1); + t = rcu_dereference(t->new_tbl); if (svc->fwmark == 0) { /* @@ -410,6 +425,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) hlist_bl_add_head_rcu(&svc->s_list, head); hlist_bl_unlock(head); + rcu_read_unlock(); + return 1; } @@ -432,7 +449,13 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) return 0; } - t = rcu_dereference_protected(ipvs->svc_table, 1); + /* We know if new table is attached under service_mutex but rely on + * RCU to hold the old table to be freed in resizer + */ + rcu_read_lock(); + + /* This can be the old or the new table */ + t = rcu_dereference(ipvs->svc_table); hash_key = READ_ONCE(svc->hash_key); /* We need to lock the bucket in the right table */ if (ip_vs_rht_same_table(t, hash_key)) { @@ -443,13 +466,13 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* Moved to new table ? */ if (hash_key != hash_key2) { hlist_bl_unlock(head); - t = rcu_dereference_protected(t->new_tbl, 1); + t = rcu_dereference(t->new_tbl); head = t->buckets + (hash_key2 & t->mask); hlist_bl_lock(head); } } else { /* It is already moved to new table */ - t = rcu_dereference_protected(t->new_tbl, 1); + t = rcu_dereference(t->new_tbl); head = t->buckets + (hash_key & t->mask); hlist_bl_lock(head); } @@ -459,6 +482,8 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) svc->flags &= ~IP_VS_SVC_F_HASHED; atomic_dec(&svc->refcnt); hlist_bl_unlock(head); + + rcu_read_unlock(); return 1; } @@ -666,15 +691,14 @@ static void svc_resize_work_handler(struct work_struct *work) goto unlock_sem; more_work = false; clear_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags); - if (!READ_ONCE(ipvs->enable) || - test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + if (!READ_ONCE(ipvs->enable)) goto unlock_m; t = rcu_dereference_protected(ipvs->svc_table, 1); /* Do nothing if table is removed */ if (!t) goto unlock_m; - /* New table needs to be registered? BUG! */ - if (t != rcu_dereference_protected(t->new_tbl, 1)) + /* New table already attached? BUG! */ + if (t != rcu_access_pointer(t->new_tbl)) goto unlock_m; lfactor = sysctl_svc_lfactor(ipvs); @@ -691,6 +715,7 @@ static void svc_resize_work_handler(struct work_struct *work) /* Flip the table_id */ t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK; + /* Attach new table */ rcu_assign_pointer(t->new_tbl, t_new); /* Allow add/del to new_tbl while moving from old table */ mutex_unlock(&ipvs->service_mutex); @@ -698,8 +723,8 @@ static void svc_resize_work_handler(struct work_struct *work) ip_vs_rht_for_each_bucket(t, bucket, head) { same_bucket: if (++limit >= 16) { - if (!READ_ONCE(ipvs->enable) || - test_bit(IP_VS_WORK_SVC_NORESIZE, + /* Check if work is stopped */ + if (test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) goto unlock_sem; if (resched_score >= 100) { @@ -764,16 +789,12 @@ same_bucket: goto same_bucket; } - /* Tables can be switched only under service_mutex */ - while (!mutex_trylock(&ipvs->service_mutex)) { - cond_resched(); - if (!READ_ONCE(ipvs->enable) || - test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) - goto unlock_sem; - } - if (!READ_ONCE(ipvs->enable) || - test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) - goto unlock_m; + /* Serialize with readers that don't like svc_table changes */ + down_write(&ipvs->svc_replace_sem); + + /* Check if work is stopped to avoid synchronize_rcu() */ + if (test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_repl; rcu_assign_pointer(ipvs->svc_table, t_new); /* Inform readers that new table is installed */ @@ -781,8 +802,8 @@ same_bucket: atomic_inc(&ipvs->svc_table_changes); t_free = t; -unlock_m: - mutex_unlock(&ipvs->service_mutex); +unlock_repl: + up_write(&ipvs->svc_replace_sem); unlock_sem: up_write(&ipvs->svc_resize_sem); @@ -801,6 +822,11 @@ out: test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) return; queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1); + return; + +unlock_m: + mutex_unlock(&ipvs->service_mutex); + goto unlock_sem; } static inline void @@ -1691,6 +1717,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_pe *pe = NULL; int ret_hooks = -1; int ret = 0; + bool grow; /* increase the module use count */ if (!ip_vs_use_count_inc()) @@ -1732,16 +1759,25 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, } #endif - t = rcu_dereference_protected(ipvs->svc_table, 1); + /* The old table can be freed, protect it with RCU */ + rcu_read_lock(); + t = rcu_dereference(ipvs->svc_table); if (!t) { int lfactor = sysctl_svc_lfactor(ipvs); int new_size = ip_vs_svc_desired_size(ipvs, NULL, lfactor); + rcu_read_unlock(); t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor); if (!t_new) { ret = -ENOMEM; goto out_err; } + grow = false; + } else { + /* Even the currently attached new table may need to grow */ + t = rcu_dereference(t->new_tbl); + grow = ip_vs_get_num_services(ipvs) + 1 > t->u_thresh; + rcu_read_unlock(); } if (!rcu_dereference_protected(ipvs->conn_tab, 1)) { @@ -1800,6 +1836,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, goto out_err; if (t_new) { + /* Add table for first time */ clear_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags); rcu_assign_pointer(ipvs->svc_table, t_new); t_new = NULL; @@ -1831,8 +1868,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ip_vs_svc_hash(svc); /* Schedule resize work */ - if (t && ip_vs_get_num_services(ipvs) > t->u_thresh && - !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags)) + if (grow && !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags)) queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1); @@ -2054,7 +2090,6 @@ static int ip_vs_del_service(struct ip_vs_service *svc) return -EEXIST; ipvs = svc->ipvs; ip_vs_unlink_service(svc, false); - t = rcu_dereference_protected(ipvs->svc_table, 1); /* Drop the table if no more services */ ns = ip_vs_get_num_services(ipvs); @@ -2062,6 +2097,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* Stop the resizer and drop the tables */ set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags); cancel_delayed_work_sync(&ipvs->svc_resize_work); + t = rcu_dereference_protected(ipvs->svc_table, 1); if (t) { rcu_assign_pointer(ipvs->svc_table, NULL); /* Inform readers that table is removed */ @@ -2075,11 +2111,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) t = p; } } - } else if (ns <= t->l_thresh && - !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, - &ipvs->work_flags)) { - queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, - 1); + } else { + bool shrink; + + rcu_read_lock(); + t = rcu_dereference(ipvs->svc_table); + /* Even the currently attached new table may need to shrink */ + t = rcu_dereference(t->new_tbl); + shrink = ns <= t->l_thresh; + rcu_read_unlock(); + if (shrink && !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, + &ipvs->work_flags)) + queue_delayed_work(system_unbound_wq, + &ipvs->svc_resize_work, 1); } return 0; } @@ -2184,17 +2228,21 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, struct ip_vs_service *svc; struct hlist_bl_node *e; struct ip_vs_dest *dest; - int old_gen, new_gen; + int old_gen; if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + /* Allow concurrent rehashing on resize but to avoid loop + * serialize with installing the new table. + */ + down_read(&ipvs->svc_replace_sem); + old_gen = atomic_read(&ipvs->svc_table_changes); rcu_read_lock(); -repeat: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) { hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { @@ -2207,17 +2255,17 @@ repeat: } resched_score++; if (resched_score >= 100) { - resched_score = 0; cond_resched_rcu(); - new_gen = atomic_read(&ipvs->svc_table_changes); - /* New table installed ? */ - if (old_gen != new_gen) { - old_gen = new_gen; - goto repeat; - } + /* Flushed? So no more dev refs */ + if (atomic_read(&ipvs->svc_table_changes) != old_gen) + goto done; + resched_score = 0; } } + +done: rcu_read_unlock(); + up_read(&ipvs->svc_replace_sem); return NOTIFY_DONE; } @@ -2244,6 +2292,10 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) struct ip_vs_service *svc; struct hlist_bl_node *e; + /* svc_table can not be replaced (svc_replace_sem) or + * removed (service_mutex) + */ + down_read(&ipvs->svc_replace_sem); rcu_read_lock(); ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) { @@ -2259,6 +2311,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) } rcu_read_unlock(); + up_read(&ipvs->svc_replace_sem); ip_vs_zero_stats(&ipvs->tot_stats->s); return 0; @@ -3062,6 +3115,7 @@ static int ip_vs_status_show(struct seq_file *seq, void *v) u32 sum; int i; + /* Info for conns */ rcu_read_lock(); t = rcu_dereference(ipvs->conn_tab); @@ -3123,6 +3177,12 @@ repeat_conn: } after_conns: + rcu_read_unlock(); + + /* Info for services */ + down_read(&ipvs->svc_replace_sem); + rcu_read_lock(); + t = rcu_dereference(ipvs->svc_table); count = ip_vs_get_num_services(ipvs); @@ -3133,9 +3193,7 @@ after_conns: if (!count) goto after_svc; old_gen = atomic_read(&ipvs->svc_table_changes); - loops = 0; -repeat_svc: smp_rmb(); /* ipvs->svc_table and svc_table_changes */ memset(counts, 0, sizeof(counts)); ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) { @@ -3157,15 +3215,10 @@ repeat_svc: if (resched_score >= 100) { resched_score = 0; cond_resched_rcu(); - new_gen = atomic_read(&ipvs->svc_table_changes); - /* New table installed ? */ - if (old_gen != new_gen) { - /* Too many changes? */ - if (++loops >= 5) - goto after_svc; - old_gen = new_gen; - goto repeat_svc; - } + /* Flushed? */ + if (atomic_read(&ipvs->svc_table_changes) != + old_gen) + goto after_svc; } counts[count]++; } @@ -3184,6 +3237,9 @@ repeat_svc: } after_svc: + rcu_read_unlock(); + up_read(&ipvs->svc_replace_sem); + seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n", ipvs->est_kt_count, ipvs->est_max_threads); seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max); @@ -3191,7 +3247,6 @@ after_svc: ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR * IPVS_EST_NTICKS); - rcu_read_unlock(); return 0; } @@ -3503,7 +3558,7 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, int ret = 0; lockdep_assert_held(&ipvs->svc_resize_sem); - /* All service modifications are disabled, go ahead */ + /* All svc_table modifications are disabled, go ahead */ ip_vs_rht_walk_buckets(ipvs->svc_table, head) { hlist_bl_for_each_entry(svc, e, head, s_list) { /* Only expose IPv4 entries to old interface */ @@ -3687,7 +3742,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) pr_err("length: %u != %zu\n", *len, size); return -EINVAL; } - /* Protect against table resizer moving the entries. + /* Prevent modifications to the list with services. * Try reverse locking, so that we do not hold the mutex * while waiting for semaphore. */ @@ -4029,6 +4084,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int start = cb->args[0]; int idx = 0; + /* Make sure we do not see same service twice during resize */ down_read(&ipvs->svc_resize_sem); rcu_read_lock(); ip_vs_rht_walk_buckets_safe_rcu(ipvs->svc_table, head) { @@ -5072,6 +5128,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) /* Initialize service_mutex, svc_table per netns */ __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key); init_rwsem(&ipvs->svc_resize_sem); + init_rwsem(&ipvs->svc_replace_sem); INIT_DELAYED_WORK(&ipvs->svc_resize_work, svc_resize_work_handler); atomic_set(&ipvs->svc_table_changes, 0); RCU_INIT_POINTER(ipvs->svc_table, NULL); -- cgit v1.2.3 From 53d7fd878c28b28e03769071d1f28ef031a060ad Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 14 May 2026 10:55:10 +0200 Subject: netfilter: ipset: fix a potential dump-destroy race When dumping sets in order to create the proper order for restore, the list type of sets dumped last. Therefore internally we run the dumping loop twice: first with all non-list type of sets and skipping the list type ones and then secondly for the list type of sets. Sashiko noticed that there's a potential race between dump and destroy if in the first loop the last set was a list type of set: its pointer remains unreferenced and a concurrent destroy can free it. Fix the issue by resetting the variable holding the pointer. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c5a26236a0bb..0874029cb0f2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1613,6 +1613,7 @@ dump_last: ((dump_type == DUMP_ALL) == !!(set->type->features & IPSET_DUMP_LAST))) { write_unlock_bh(&ip_set_ref_lock); + set = NULL; continue; } pr_debug("List set: %s\n", set->name); -- cgit v1.2.3 From b6a91f68ebfed9c38e0e9150f58a9b85da07181c Mon Sep 17 00:00:00 2001 From: Yizhou Zhao Date: Tue, 12 May 2026 01:30:41 +0800 Subject: netfilter: nft_inner: Fix IPv6 inner_thoff desync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In nft_inner_parse_l2l3(), when processing inner IPv6 packets, ipv6_find_hdr() correctly computes the transport header offset traversing all extension headers, but the result is immediately overwritten with nhoff + sizeof(_ip6h) (40 bytes), which only accounts for the IPv6 base header. This creates a desync between inner_thoff (wrong — points to extension header start) and l4proto (correct — e.g., IPPROTO_TCP), enabling transport header forgery and potential firewall bypass. This issue affects stable versions from Linux 6.2. For comparison, the normal (non-inner) IPv6 path correctly preserves ipv6_find_hdr()'s result. Removing the incorrect overwrite ensures that ipv6_find_hdr()'s calculated transport header offset is preserved, thereby fixing the desynchronization. Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Cc: stable@vger.kernel.org Reported-by: Yizhou Zhao Reported-by: Yuxiang Yang Reported-by: Xuewei Feng Reported-by: Qi Li Reported-by: Ke Xu Assisted-by: GLM:5.1 Z.ai Signed-off-by: Yizhou Zhao Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_inner.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index 03ffb1159fc1..859aa38e333b 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -163,7 +163,6 @@ static int nft_inner_parse_l2l3(const struct nft_inner *priv, return -1; if (fragoff == 0) { - thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; -- cgit v1.2.3 From 0d3a282ab5f165fc207ff49ea5b6ad8f54616bd6 Mon Sep 17 00:00:00 2001 From: Nan Li Date: Tue, 12 May 2026 16:50:01 +0800 Subject: netfilter: ipset: stop hash:* range iteration at end The following hash set variants: hash:ip,mark hash:ip,port hash:ip,port,ip hash:ip,port,net iterate IPv4 ranges with a 32-bit iterator. The iterator must stop once the last address in the requested range has been processed. Advancing it once more can move the traversal state past the end of the request, so a later retry may continue from an unintended position. Handle the iterator increment explicitly at the end of the loop and stop once the upper bound has been processed. This keeps the existing retry behaviour intact for valid ranges while preventing traversal from continuing past the original boundary. Fixes: 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Nan Li Signed-off-by: Ren Wei Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipmark.c | 6 +++++- net/netfilter/ipset/ip_set_hash_ipport.c | 5 ++++- net/netfilter/ipset/ip_set_hash_ipportip.c | 5 ++++- net/netfilter/ipset/ip_set_hash_ipportnet.c | 5 ++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index a22ec1a6f6ec..e26ca2a370e3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -150,7 +150,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++, i++) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_ipmark4_data_next(&h->next, &e); @@ -162,6 +162,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ret = 0; + + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e977b5a9c48d..41ca24a22a02 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -186,7 +186,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { @@ -203,6 +203,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], ret = 0; } + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 39a01934b153..b9ac2efaa15c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -182,7 +182,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { @@ -199,6 +199,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = 0; } + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5c6de605a9fb..2d6652d43199 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -274,7 +274,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], p = port; ip2 = ip2_from; } - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { e.ip = htonl(ip); for (; p <= port_to; p++) { e.port = htons(p); @@ -298,6 +298,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = ip2_from; } p = port; + if (ip == ip_to) + break; + ip++; } return ret; } -- cgit v1.2.3 From a6cb3ff979855f7f0ee9450a947fe8f96c2ba37a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 12 May 2026 11:30:49 +0200 Subject: netfilter: nft_inner: release local_lock before re-enabling softirqs Quoting sashiko: In the error path, local_bh_enable() is called before local_unlock_nested_bh(). Fixes: ba36fada9ab4 ("netfilter: nft_inner: Use nested-BH locking for nft_pcpu_tun_ctx") Signed-off-by: Florian Westphal Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_inner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index 859aa38e333b..d14ca157910b 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -246,8 +246,8 @@ static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { - local_bh_enable(); local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); + local_bh_enable(); return false; } *tun_ctx = *this_cpu_tun_ctx; -- cgit v1.2.3 From 4322dcde6b4173c2d8e8e6118ed290794263bcc8 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Wed, 13 May 2026 15:57:17 +0800 Subject: netfilter: ip6t_hbh: reject oversized option lists struct ip6t_opts stores at most IP6T_OPTS_OPTSNR option descriptors, but hbh_mt6_check() does not reject larger optsnr values supplied from userspace. Validate optsnr in the rule setup path so only match data that fits the fixed-size opts array can be installed. This follows the existing xtables pattern of rejecting invalid user-provided counts in checkentry() and keeps the packet matching path unchanged. `struct ip6t_opts` has a fixed `opts[IP6T_OPTS_OPTSNR]` array, where `IP6T_OPTS_OPTSNR` is 16, then off-by-one array access is possible: [ 137.924693][ T8692] UBSAN: array-index-out-of-bounds in ../net/ipv6/netfilter/ip6t_hbh.c:110:29 [ 137.926167][ T8692] index 16 is out of range for type '__u16 [16]' Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_hbh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index e7a3fb9355ee..450dd53846a2 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -168,6 +168,10 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", optsinfo->invflags); return -EINVAL; } + if (optsinfo->optsnr > IP6T_OPTS_OPTSNR) { + pr_debug("too many supported opts specified\n"); + return -EINVAL; + } if (optsinfo->flags & IP6T_OPTS_NSTRICT) { pr_debug("Not strict - not implemented"); -- cgit v1.2.3 From c0c42a0fb27144c1cd7509f94bec0d3bcca98c72 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 14 May 2026 10:55:11 +0200 Subject: netfilter: ipset: Fix data race between add and list header in all hash types The "ipset list -terse" command is actually a dump operation which may run parallel with "ipset add" commands, which can trigger an internal resizing of the hash type of sets just being dumped. However, dumping just the header part of the set was not protected against underlying resizing. Fix it by protecting the header dumping part as well. Fixes: c4c997839cf9 ("netfilter: ipset: Fix parallel resizing and listing of the same set") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 0874029cb0f2..3706b4a85a0f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1649,13 +1649,13 @@ dump_last: if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN && nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index))) goto nla_put_failure; + if (set->variant->uref) + set->variant->uref(set, cb, true); ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; if (dump_flags & IPSET_FLAG_LIST_HEADER) goto next_set; - if (set->variant->uref) - set->variant->uref(set, cb, true); fallthrough; default: ret = set->variant->list(set, skb, cb); -- cgit v1.2.3 From 2358f7427ccd6ec8867a48205d8fcec973683a3f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 8 May 2026 22:58:58 +0200 Subject: netfilter: ipset: Fix data race between add and dump in all hash types When adding a new entry to the next position in the existing hash bucket, the position index was incremented too early and parallel dump could read it before the entry was populated with the value. Move the setting of the position index after populating the entry. v2: Position counting fixed, noticed by Florian Westphal. Fixes: 18f84d41d34f ("netfilter: ipset: Introduce RCU locking in hash:* types") Reported-by: syzbot+786c889f046e8b003ca6@syzkaller.appspotmail.com Reported-by: syzbot+1da17e4b41d795df059e@syzkaller.appspotmail.com Reported-by: syzbot+421c5f3ff8e9493084d9@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index b79e5dd2af03..133ce4611eed 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -844,7 +844,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n, *old = ERR_PTR(-ENOENT); - int i, j = -1, ret; + int i, j = -1, npos = 0, ret; bool flag_exist = flags & IPSET_FLAG_EXIST; bool deleted = false, forceadd = false, reuse = false; u32 r, key, multi = 0, elements, maxelem; @@ -889,6 +889,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } + npos = n->pos; for (i = 0; i < n->pos; i++) { if (!test_bit(i, n->used)) { /* Reuse first deleted entry */ @@ -962,7 +963,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } copy_elem: - j = n->pos++; + j = npos; + npos = n->pos + 1; data = ahash_data(n, j, set->dsize); copy_data: t->hregion[r].elements++; @@ -985,6 +987,7 @@ overwrite_extensions: if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(data, set), ext->timeout); smp_mb__before_atomic(); + n->pos = npos; set_bit(j, n->used); if (old != ERR_PTR(-ENOENT)) { rcu_assign_pointer(hbucket(t, key), n); -- cgit v1.2.3 From 7f7445840b7771338618930e45ee641104b38ed8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 14 May 2026 10:55:13 +0200 Subject: netfilter: ipset: annotate "pos" for concurrent readers/writers The "pos" structure member of struct hbucket stores the first free slot in the hash bucket of a hash type of set and there are concurrent readers/writers. Annotate accesses properly. Fixes: 18f84d41d34f ("netfilter: ipset: Introduce RCU locking in hash:* types") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 62 +++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 133ce4611eed..04e4627ddfc1 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -386,8 +386,9 @@ static void mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) { int i; + u8 pos = smp_load_acquire(&n->pos); - for (i = 0; i < n->pos; i++) + for (i = 0; i < pos; i++) if (test_bit(i, n->used)) ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); } @@ -490,7 +491,7 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif - u8 htable_bits = t->htable_bits; + u8 pos, htable_bits = t->htable_bits; spin_lock_bh(&t->hregion[r].lock); for (i = ahash_bucket_start(r, htable_bits); @@ -498,7 +499,8 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) n = __ipset_dereference(hbucket(t, i)); if (!n) continue; - for (j = 0, d = 0; j < n->pos; j++) { + pos = smp_load_acquire(&n->pos); + for (j = 0, d = 0; j < pos; j++) { if (!test_bit(j, n->used)) { d++; continue; @@ -534,7 +536,7 @@ mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) /* Still try to delete expired elements. */ continue; tmp->size = n->size - AHASH_INIT_SIZE; - for (j = 0, d = 0; j < n->pos; j++) { + for (j = 0, d = 0; j < pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); @@ -623,7 +625,7 @@ mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; struct htable *t, *orig; - u8 htable_bits; + u8 pos, htable_bits; size_t hsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; @@ -685,7 +687,8 @@ retry: n = __ipset_dereference(hbucket(orig, i)); if (!n) continue; - for (j = 0; j < n->pos; j++) { + pos = smp_load_acquire(&n->pos); + for (j = 0; j < pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); @@ -809,9 +812,10 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) { struct htype *h = set->data; const struct htable *t; - u32 i, j, r; struct hbucket *n; struct mtype_elem *data; + u32 i, j, r; + u8 pos; t = rcu_dereference_bh(h->table); for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { @@ -820,7 +824,8 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) n = rcu_dereference_bh(hbucket(t, i)); if (!n) continue; - for (j = 0; j < n->pos; j++) { + pos = smp_load_acquire(&n->pos); + for (j = 0; j < pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, set->dsize); @@ -844,10 +849,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n, *old = ERR_PTR(-ENOENT); - int i, j = -1, npos = 0, ret; + int i, j = -1, ret; bool flag_exist = flags & IPSET_FLAG_EXIST; bool deleted = false, forceadd = false, reuse = false; u32 r, key, multi = 0, elements, maxelem; + u8 npos = 0; rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); @@ -889,8 +895,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } - npos = n->pos; - for (i = 0; i < n->pos; i++) { + npos = smp_load_acquire(&n->pos); + for (i = 0; i < npos; i++) { if (!test_bit(i, n->used)) { /* Reuse first deleted entry */ if (j == -1) { @@ -934,7 +940,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (elements >= maxelem) goto set_full; /* Create a new slot */ - if (n->pos >= n->size) { + if (npos >= n->size) { #ifdef IP_SET_HASH_WITH_MULTI if (h->bucketsize >= AHASH_MAX_TUNED) goto set_full; @@ -963,8 +969,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } copy_elem: - j = npos; - npos = n->pos + 1; + j = npos++; data = ahash_data(n, j, set->dsize); copy_data: t->hregion[r].elements++; @@ -987,7 +992,8 @@ overwrite_extensions: if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(data, set), ext->timeout); smp_mb__before_atomic(); - n->pos = npos; + /* Ensure all data writes are visible before updating position */ + smp_store_release(&n->pos, npos); set_bit(j, n->used); if (old != ERR_PTR(-ENOENT)) { rcu_assign_pointer(hbucket(t, key), n); @@ -1046,6 +1052,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, int i, j, k, r, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; size_t dsize = set->dsize; + u8 pos; /* Userspace add and resize is excluded by the mutex. * Kernespace add does not trigger resize. @@ -1061,7 +1068,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, n = rcu_dereference_bh(hbucket(t, key)); if (!n) goto out; - for (i = 0, k = 0; i < n->pos; i++) { + pos = smp_load_acquire(&n->pos); + for (i = 0, k = 0; i < pos; i++) { if (!test_bit(i, n->used)) { k++; continue; @@ -1075,8 +1083,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, ret = 0; clear_bit(i, n->used); smp_mb__after_atomic(); - if (i + 1 == n->pos) - n->pos--; + if (i + 1 == pos) + smp_store_release(&n->pos, --pos); t->hregion[r].elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) @@ -1097,11 +1105,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, x->flags = flags; } } - for (; i < n->pos; i++) { + for (; i < pos; i++) { if (!test_bit(i, n->used)) k++; } - if (k == n->pos) { + if (k == pos) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); @@ -1112,7 +1120,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!tmp) goto out; tmp->size = n->size - AHASH_INIT_SIZE; - for (j = 0, k = 0; j < n->pos; j++) { + for (j = 0, k = 0; j < pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); @@ -1173,6 +1181,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, int ret, i, j = 0; #endif u32 key, multi = 0; + u8 pos; pr_debug("test by nets\n"); for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { @@ -1190,7 +1199,8 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, n = rcu_dereference_bh(hbucket(t, key)); if (!n) continue; - for (i = 0; i < n->pos; i++) { + pos = smp_load_acquire(&n->pos); + for (i = 0; i < pos; i++) { if (!test_bit(i, n->used)) continue; data = ahash_data(n, i, set->dsize); @@ -1224,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct mtype_elem *data; int i, ret = 0; u32 key, multi = 0; + u8 pos; rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); @@ -1246,7 +1257,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, ret = 0; goto out; } - for (i = 0; i < n->pos; i++) { + pos = smp_load_acquire(&n->pos); + for (i = 0; i < pos; i++) { if (!test_bit(i, n->used)) continue; data = ahash_data(n, i, set->dsize); @@ -1363,6 +1375,7 @@ mtype_list(const struct ip_set *set, /* We assume that one hash bucket fills into one page */ void *incomplete; int i, ret = 0; + u8 pos; atd = nla_nest_start(skb, IPSET_ATTR_ADT); if (!atd) @@ -1381,7 +1394,8 @@ mtype_list(const struct ip_set *set, cb->args[IPSET_CB_ARG0], t, n); if (!n) continue; - for (i = 0; i < n->pos; i++) { + pos = smp_load_acquire(&n->pos); + for (i = 0; i < pos; i++) { if (!test_bit(i, n->used)) continue; e = ahash_data(n, i, set->dsize); -- cgit v1.2.3 From b2870fc21601db9133bc70c48c603b487614fa3b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 14 May 2026 16:46:38 +0200 Subject: netfilter: br_netfilter: Reallocate headroom if necessary in neigh_hh_bridge() neigh_hh_bridge() assumes the skb always has sufficient headroom to copy the aligned L2 header. This assumption can trigger the crash reported below using the following netfilter setup: $modprobe br_netfilter $sysctl -w net.bridge.bridge-nf-call-iptables=1 $root@OpenWrt:~# nft list ruleset table ip nat { chain prerouting { type nat hook prerouting priority dstnat; policy accept; ip daddr 192.168.83.123 dnat to 192.168.83.120 } } - iperf3 client (192.168.83.119) --> bridge (192.168.83.118) --> iperf3 server (192.168.83.120) the iperf3 client is sending packet for 192.168.83.123 to the bridge device. [ 1579.036575] Unable to handle kernel write to read-only memory at virtual address ffffff8004d76ffe [ 1579.045482] Mem abort info: [ 1579.048273] ESR = 0x000000009600004f [ 1579.052024] EC = 0x25: DABT (current EL), IL = 32 bits [ 1579.057363] SET = 0, FnV = 0 [ 1579.060417] EA = 0, S1PTW = 0 [ 1579.063550] FSC = 0x0f: level 3 permission fault [ 1579.068345] Data abort info: [ 1579.071224] ISV = 0, ISS = 0x0000004f, ISS2 = 0x00000000 [ 1579.076720] CM = 0, WnR = 1, TnD = 0, TagAccess = 0 [ 1579.081770] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 1579.087092] swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000080dc4000 [ 1579.093794] [ffffff8004d76ffe] pgd=180000009ffff003, p4d=180000009ffff003, pud=180000009ffff003, pmd=180000009ffe3003, pte=0060000084d76787 [ 1579.106343] Internal error: Oops: 000000009600004f [#1] SMP [ 1579.193824] CPU: 0 UID: 0 PID: 235 Comm: napi/qdma_eth-3 Tainted: G O 6.12.57 #0 [ 1579.202614] Tainted: [O]=OOT_MODULE [ 1579.206102] Hardware name: Airoha AN7581 Evaluation Board (DT) [ 1579.211929] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1579.218889] pc : br_nf_pre_routing_finish_bridge+0x1ac/0xcc8 [br_netfilter] [ 1579.225859] lr : br_nf_pre_routing_finish_bridge+0x18c/0xcc8 [br_netfilter] [ 1579.232822] sp : ffffffc0817cba20 [ 1579.236128] x29: ffffffc0817cba20 x28: 0000000000000000 x27: ffffff8002b89000 [ 1579.243273] x26: ffffff8004d7700e x25: 0000000000000008 x24: 0000000000000000 [ 1579.250416] x23: ffffffc08179d4c0 x22: 0000000000000000 x21: ffffffc08179d4c0 [ 1579.257561] x20: ffffff8004d9b800 x19: ffffff8015010000 x18: 0000000000000014 [ 1579.264704] x17: ffffffbf9e930000 x16: ffffffc0817c8000 x15: 0000000000000070 [ 1579.271848] x14: 0000000000000080 x13: 0000000000000001 x12: 0000000000000000 [ 1579.278993] x11: ffffffc0798caae0 x10: ffffff8014db6fd8 x9 : 0000000000000000 [ 1579.286136] x8 : 0000000000000003 x7 : ffffffc08171f628 x6 : 000000001a3b83d3 [ 1579.293281] x5 : 0000000000000000 x4 : 1beb76f22fee0000 x3 : ffffff8004d7700e [ 1579.300425] x2 : 0000000000000000 x1 : ffffff8004d9b8bc x0 : ffffff80026ed000 [ 1579.307570] Call trace: [ 1579.310018] br_nf_pre_routing_finish_bridge+0x1ac/0xcc8 [br_netfilter] [ 1579.316632] br_nf_hook_thresh+0xd4/0x14bc [br_netfilter] [ 1579.322032] br_nf_hook_thresh+0x250/0x14bc [br_netfilter] [ 1579.327517] br_nf_hook_thresh+0x76c/0x14bc [br_netfilter] [ 1579.333003] br_handle_frame+0x180/0x480 [ 1579.336935] __netif_receive_skb_core.constprop.0+0x540/0xf40 [ 1579.342682] __netif_receive_skb_one_core+0x28/0x50 [ 1579.347561] process_backlog+0x98/0x1e0 [ 1579.351398] __napi_poll+0x34/0x1c4 [ 1579.354887] net_rx_action+0x178/0x330 [ 1579.358638] handle_softirqs+0x108/0x2d4 [ 1579.362560] __do_softirq+0x10/0x18 [ 1579.366051] ____do_softirq+0xc/0x20 [ 1579.369627] call_on_irq_stack+0x30/0x4c [ 1579.373550] do_softirq_own_stack+0x18/0x20 [ 1579.377734] do_softirq+0x4c/0x60 [ 1579.381050] __local_bh_enable_ip+0x88/0x98 [ 1579.385234] napi_threaded_poll_loop+0x188/0x21c [ 1579.389853] napi_threaded_poll+0x70/0x80 [ 1579.393863] kthread+0xd8/0xdc [ 1579.396918] ret_from_fork+0x10/0x20 [ 1579.400499] Code: 88dffc22 3707ffc2 f9406663 f9406684 (f81f0064) [ 1579.406589] ---[ end trace 0000000000000000 ]--- [ 1579.411209] Kernel panic - not syncing: Oops: Fatal exception in interrupt [ 1579.418083] SMP: stopping secondary CPUs [ 1579.422012] Kernel Offset: disabled Fix the issue reallocating the skb headroom if necessary in neigh_hh_bridge routine. Fixes: e179e6322ac33 ("netfilter: bridge-netfilter: Fix MAC header handling with IP DNAT") Reviewed-by: Ido Schimmel Signed-off-by: Lorenzo Bianconi Signed-off-by: Pablo Neira Ayuso --- include/net/neighbour.h | 8 ++++++-- net/bridge/br_netfilter_hooks.c | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 2dfee6d4258a..8860cc2175fc 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -489,11 +489,15 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { - unsigned int seq, hh_alen; + unsigned int seq, hh_alen = HH_DATA_ALIGN(ETH_HLEN); + int err; + + err = skb_cow_head(skb, hh_alen); + if (err) + return err; do { seq = read_seqbegin(&hh->hh_lock); - hh_alen = HH_DATA_ALIGN(ETH_HLEN); memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN); } while (read_seqretry(&hh->hh_lock, seq)); return 0; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 0ab1c94db4b9..0a394e5f4391 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -297,7 +297,11 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_ goto free_skb; } - neigh_hh_bridge(&neigh->hh, skb); + if (neigh_hh_bridge(&neigh->hh, skb)) { + neigh_release(neigh); + goto free_skb; + } + skb->dev = br_indev; ret = br_handle_frame_finish(net, sk, skb); -- cgit v1.2.3 From e196115ec330a18de415bdb9f5071aa9f08e53ce Mon Sep 17 00:00:00 2001 From: Haoze Xie Date: Fri, 15 May 2026 11:19:02 +0800 Subject: netfilter: nf_queue: hold bridge skb->dev while queued br_pass_frame_up() rewrites skb->dev from the ingress port to the bridge master before queueing bridge LOCAL_IN packets. NFQUEUE only holds references on state.in/out and bridge physdevs, so a queued bridge packet can retain a freed bridge master in skb->dev until reinjection. When the verdict is reinjected later, br_netif_receive_skb() re-enters the receive path with skb->dev still pointing at the freed bridge master, triggering a use-after-free. Store skb->dev in the queue entry, hold a reference on it for the queue lifetime, and use the saved device when dropping queued packets during NETDEV_DOWN handling. Fixes: ac2863445686 ("netfilter: bridge: add nf_afinfo to enable queuing to userspace") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Haoze Xie Signed-off-by: Ren Wei Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 1 + net/netfilter/nf_queue.c | 4 +++- net/netfilter/nfnetlink_queue.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d17035d14d96..3978c3174cdb 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -14,6 +14,7 @@ struct nf_queue_entry { struct list_head list; struct rhash_head hash_node; struct sk_buff *skb; + struct net_device *skb_dev; unsigned int id; unsigned int hook_index; /* index in hook_entries->hook[] */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a6c81c04b3a5..57b450024a99 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -61,6 +61,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ + dev_put(entry->skb_dev); dev_put(state->in); dev_put(state->out); if (state->sk) @@ -102,6 +103,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) return false; + dev_hold(entry->skb_dev); dev_hold(state->in); dev_hold(state->out); @@ -202,11 +204,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, + .skb_dev = skb->dev, .state = *state, .hook_index = index, .size = sizeof(*entry) + route_key_size, }; - __nf_queue_entry_init_physdevs(entry); if (!nf_queue_entry_get_refs(entry)) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 58304fd1f70f..984a0eb9e149 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1212,6 +1212,8 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) if (physinif == ifindex || physoutif == ifindex) return 1; #endif + if (entry->skb_dev && entry->skb_dev->ifindex == ifindex) + return 1; if (entry->state.in) if (entry->state.in->ifindex == ifindex) return 1; -- cgit v1.2.3 From 7b7d6572145c1dab2dd9bfb550b188e5f0ff3c3f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 May 2026 10:55:58 +0200 Subject: ALSA: asihpi: Fix potential OOB array access at reading cache find_control() to retrieve a cached info accesses the array with the given index blindly, which may lead to an OOB array access. Add a sanity check for avoiding it. Link: https://sashiko.dev/#/patchset/20260511230121.28606-1-rosenp%40gmail.com Cc: Link: https://patch.msgid.link/20260515085606.242284-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpicmn.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/asihpi/hpicmn.c b/sound/pci/asihpi/hpicmn.c index d846777e7462..19f0da2e6501 100644 --- a/sound/pci/asihpi/hpicmn.c +++ b/sound/pci/asihpi/hpicmn.c @@ -276,6 +276,12 @@ static short find_control(u16 control_index, return 0; } + if (control_index >= p_cache->control_count) { + HPI_DEBUG_LOG(VERBOSE, "control_index out of bounce %d\n", + control_index); + return 0; + } + *pI = p_cache->p_info[control_index]; if (!*pI) { HPI_DEBUG_LOG(VERBOSE, "Uncached Control %d\n", -- cgit v1.2.3 From 4372286ac774536e8e68bc6dfa0f0b0152b31fce Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Sat, 16 May 2026 19:15:31 +0800 Subject: ALSA: hda/realtek: Use ALC287_FIXUP_TXNW2781_I2C for ASUS Strix Gxx5 These devices were incorrectly using the ALC287_FIXUP_TAS2781_I2C quirk leading to errors: [ 18.765990] Serial bus multi instantiate pseudo device driver TXNW2781:00: error -ENXIO: IRQ index 0 not found [ 18.768153] Serial bus multi instantiate pseudo device driver TXNW2781:00: error -ENXIO: IRQ index 0 not found [ 18.768476] Serial bus multi instantiate pseudo device driver TXNW2781:00: error -ENXIO: IRQ index 0 not found [ 18.768899] Serial bus multi instantiate pseudo device driver TXNW2781:00: Instantiated 3 I2C devices. Use the ALC287_FIXUP_TXNW2781_I2C quirk instead to fix this and restore speaker audio on affected devices. Fixes: 1e9c708dc3ae ("ALSA: hda/tas2781: Add new quirk for Lenovo, ASUS, Dell projects") Link: https://lore.kernel.org/59fd4aa4-76b9-4984-8db9-a60e55ec6e80@losource.net/ Closes: https://lore.kernel.org/CACB9z7kjs8rhLstEc8fV29BCTb5dd881JwGozoKdO5cwCb=YwQ@mail.gmail.com Signed-off-by: Eric Naim Link: https://patch.msgid.link/20260516111532.111463-1-dnaim@cachyos.org Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 2f5d13032fd7..e348e8e2736b 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7453,12 +7453,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x3e00, "ASUS G814FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e20, "ASUS G814PH/PM/PP", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3e30, "ASUS TP3607SA", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f00, "ASUS Strix G815LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3ee0, "ASUS Strix G815_JHR_JMR_JPR", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3ef0, "ASUS Strix G635LR_LW_LX", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f00, "ASUS Strix G815LH_LM_LP", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f10, "ASUS Strix G835LR_LW_LX", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f20, "ASUS Strix G615LR_LW", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x1043, 0x3f30, "ASUS Strix G815LR_LW", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x1043, 0x3fd0, "ASUS B3605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3ff0, "ASUS B5405CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), -- cgit v1.2.3 From d0afd2cd356a2c337589ef8dfa2a224636600575 Mon Sep 17 00:00:00 2001 From: Sergio Boglione Date: Sat, 16 May 2026 10:16:50 -0300 Subject: ALSA: hda/realtek: Add quirk for HP 250 G10 (103c:8b34) HP 250 15.6 inch G10 Notebook PC uses the same ALC236 codec as the HP 255 15.6 inch G10 (103c:8b2f) and requires the same fixup to enable the internal speaker EAPD and microphone routing. Signed-off-by: Sergio Boglione Link: https://patch.msgid.link/20260516131651.143109-1-sboglione@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e348e8e2736b..b6f5339cf1ea 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7103,6 +7103,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad8, "HP 800 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b34, "HP 250 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b3a, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), -- cgit v1.2.3 From 0aacce7c32e4631c3634df5d19d30c72a3614ec9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 May 2026 12:56:59 +0200 Subject: ALSA: hda: Avoid quirk matching with zero PCI SSID Heiko reported that BIOS on some recent machines doesn't set up PCI SSID properly but leave with zero (e.g. on HP Dragonfly Folio 13.5 inch G3 with SSID 103c:8a05/8a06), which confuses the quirk table matching and results in the non-functional state. Fix it by skipping the PCI SSID matching when either vendor or device ID is zero and falling back to the codec SSID that is supposed to be more stable for those cases. Reported-by: Heiko Schmid Tested-by: Heiko Schmid Closes: https://lore.kernel.org/20260514133110.12302-1-heiko@future-machines.org Link: https://patch.msgid.link/20260515105700.276420-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/common/auto_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/hda/common/auto_parser.c b/sound/hda/common/auto_parser.c index 8923813ce424..5bc95d3116ff 100644 --- a/sound/hda/common/auto_parser.c +++ b/sound/hda/common/auto_parser.c @@ -1013,7 +1013,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec, const char *name = NULL; const char *type = NULL; unsigned int vendor, device; - u16 pci_vendor, pci_device; + u16 pci_vendor = 0, pci_device = 0; u16 codec_vendor, codec_device; if (codec->fixup_id != HDA_FIXUP_ID_NOT_SET) @@ -1066,7 +1066,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec, /* match primarily with the PCI SSID */ for (q = quirk; q->subvendor || q->subdevice; q++) { /* if the entry is specific to codec SSID, check with it */ - if (!codec->bus->pci || q->match_codec_ssid) { + if (!pci_vendor || !pci_device || q->match_codec_ssid) { if (hda_quirk_match(codec_vendor, codec_device, q)) { type = "codec SSID"; goto found_device; -- cgit v1.2.3 From 76824d2467feb1828b745d6add2541918d7be3da Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 16 May 2026 14:53:45 +0300 Subject: drm/msm/snapshot: fix dumping of the unaligned regions The snapshotting code internally aligns data segment to 16 bytes. This works fine for DPU code (where most of the regions are aligned), but fails for snapshotting of the DSI data (because DSI data region is shifted by 4 bytes). Fix the code by removing length alignment and by accurately printing last registers in the region. While reworking the code also fix the 16x memory overallocation in msm_disp_state_dump_regs(). Fixes: 98659487b845 ("drm/msm: add support to take dpu snapshot") Reported-by: Salendarsingh Gaud Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/725449/ Message-ID: <20260516-msm-fix-dsi-dump-2-v2-1-9e49fb2d240e@oss.qualcomm.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 24 +++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index 636bcb65a2a4..01a7019f5d67 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -9,7 +9,7 @@ #include "msm_disp_snapshot.h" -static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *base_addr) +static void msm_disp_state_dump_regs(u32 **reg, u32 len, void __iomem *base_addr) { u32 len_padded; u32 num_rows; @@ -19,11 +19,11 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b void __iomem *end_addr; int i; - len_padded = aligned_len * REG_DUMP_ALIGN; - num_rows = aligned_len / REG_DUMP_ALIGN; + len_padded = round_up(len, REG_DUMP_ALIGN); + num_rows = DIV_ROUND_UP(len, REG_DUMP_ALIGN); addr = base_addr; - end_addr = base_addr + aligned_len; + end_addr = base_addr + len; *reg = kvzalloc(len_padded, GFP_KERNEL); if (!*reg) @@ -48,8 +48,8 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, void __iomem *base_addr, struct drm_printer *p) { + void __iomem *addr, *end_addr; int i; - void __iomem *addr; u32 num_rows; if (!dump_addr) { @@ -58,6 +58,7 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, } addr = base_addr; + end_addr = base_addr + len; num_rows = len / REG_DUMP_ALIGN; for (i = 0; i < num_rows; i++) { @@ -67,6 +68,17 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, dump_addr[i * 4 + 2], dump_addr[i * 4 + 3]); addr += REG_DUMP_ALIGN; } + + if (addr != end_addr) { + drm_printf(p, "0x%lx : %08x", + (unsigned long)(addr - base_addr), + dump_addr[i * 4]); + if (addr + 0x4 < end_addr) + drm_printf(p, " %08x", dump_addr[i * 4 + 1]); + if (addr + 0x8 < end_addr) + drm_printf(p, " %08x", dump_addr[i * 4 + 2]); + drm_printf(p, "\n"); + } } void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p) @@ -185,7 +197,7 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len, va_end(va); INIT_LIST_HEAD(&new_blk->node); - new_blk->size = ALIGN(len, REG_DUMP_ALIGN); + new_blk->size = len; new_blk->base_addr = base_addr; msm_disp_state_dump_regs(&new_blk->state, new_blk->size, base_addr); -- cgit v1.2.3 From d04a0047d619ddbc50e023aa76e4dddf86e5da3f Mon Sep 17 00:00:00 2001 From: Francesco Saverio Pavone Date: Sat, 16 May 2026 16:12:44 +0200 Subject: ALSA: pcm_drm_eld: rate-limit ELD parsing errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror of Mark Brown's ASoC: hdac_hdmi rate-limit patch (commit [lkml.kernel.org/lkml/2025/6/13/1380]) for the generic snd_parse_eld() helper used by ASoC hdmi-codec. When a HDMI sink is disconnected (e.g. a board with two HDMI outputs and only one cable), userspace audio servers like PipeWire keep probing the disconnected card and trigger: HDMI: Unknown ELD version 0 at every probe — easily 30+ messages per burst on rk3588. The same applies to malformed ELD (MNL out of range). Both conditions are expected when no sink is attached; rate-limit the dev_info() so the kernel ring buffer does not fill up. Signed-off-by: Francesco Saverio Pavone Link: https://patch.msgid.link/20260516141244.21801-1-pavone.lawyer@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm_drm_eld.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_drm_eld.c b/sound/core/pcm_drm_eld.c index cb2eebaac85f..1941ee520063 100644 --- a/sound/core/pcm_drm_eld.c +++ b/sound/core/pcm_drm_eld.c @@ -334,7 +334,7 @@ int snd_parse_eld(struct device *dev, struct snd_parsed_hdmi_eld *e, e->eld_ver = GRAB_BITS(buf, 0, 3, 5); if (e->eld_ver != ELD_VER_CEA_861D && e->eld_ver != ELD_VER_PARTIAL) { - dev_info(dev, "HDMI: Unknown ELD version %d\n", e->eld_ver); + dev_info_ratelimited(dev, "HDMI: Unknown ELD version %d\n", e->eld_ver); goto out_fail; } @@ -357,7 +357,7 @@ int snd_parse_eld(struct device *dev, struct snd_parsed_hdmi_eld *e, e->product_id = get_unaligned_le16(buf + 18); if (mnl > ELD_MAX_MNL) { - dev_info(dev, "HDMI: MNL is reserved value %d\n", mnl); + dev_info_ratelimited(dev, "HDMI: MNL is reserved value %d\n", mnl); goto out_fail; } else if (ELD_FIXED_BYTES + mnl > size) { dev_info(dev, "HDMI: out of range MNL %d\n", mnl); -- cgit v1.2.3 From b09a45601094c7f4ec4db8090b825fa61e169d93 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 14 May 2026 14:31:49 -0700 Subject: hwmon: (lm90) Stop work before releasing hwmon device Sashiko reports: In lm90_probe(), the devm action to cancel the alert_work and report_work (lm90_restore_conf) is registered in lm90_init_client() before devm_hwmon_device_register_with_info() is called. Because devm executes cleanup actions in reverse order during module unbind or probe failure, the hwmon device is unregistered and freed first. If lm90_alert_work() or lm90_report_alarms() runs in the window between the hwmon device being freed and the delayed works being cancelled, lm90_update_alarms() will dereference the freed data->hwmon_dev here. Fix the problem by canceling the workers separately after registering the hwmon device and before registering the interrupt handler. This ensures that the workers are canceled after interrupts are disabled and before the hwmon device is released. Add "shutdown" flag to indicate that device shutdown is in progress to prevent workers from being re-armed. Fixes: f6d0775119fb9 ("hwmon: (lm90) Rework alarm/status handling") Reported-by: Sashiko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 3c10a5066b53..c4a9dafff81d 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -736,6 +736,7 @@ struct lm90_data { struct hwmon_chip_info chip; struct delayed_work alert_work; struct work_struct report_work; + bool shutdown; /* true if shutting down */ bool valid; /* true if register values are valid */ bool alarms_valid; /* true if status register values are valid */ unsigned long last_updated; /* in jiffies */ @@ -1154,6 +1155,9 @@ static void lm90_report_alarms(struct work_struct *work) static int lm90_update_alarms_locked(struct lm90_data *data, bool force) { + if (data->shutdown) + return 0; + if (force || !data->alarms_valid || time_after(jiffies, data->alarms_updated + msecs_to_jiffies(data->update_interval))) { struct i2c_client *client = data->client; @@ -2584,15 +2588,23 @@ static void lm90_restore_conf(void *_data) struct lm90_data *data = _data; struct i2c_client *client = data->client; - cancel_delayed_work_sync(&data->alert_work); - cancel_work_sync(&data->report_work); - /* Restore initial configuration */ if (data->flags & LM90_HAVE_CONVRATE) lm90_write_convrate(data, data->convrate_orig); lm90_write_reg(client, LM90_REG_CONFIG1, data->config_orig); } +static void lm90_stop_work(void *_data) +{ + struct lm90_data *data = _data; + + hwmon_lock(data->hwmon_dev); + data->shutdown = true; + hwmon_unlock(data->hwmon_dev); + cancel_delayed_work_sync(&data->alert_work); + cancel_work_sync(&data->report_work); +} + static int lm90_init_client(struct i2c_client *client, struct lm90_data *data) { struct device_node *np = client->dev.of_node; @@ -2902,6 +2914,10 @@ static int lm90_probe(struct i2c_client *client) data->hwmon_dev = hwmon_dev; + err = devm_add_action_or_reset(&client->dev, lm90_stop_work, data); + if (err) + return err; + if (client->irq) { dev_dbg(dev, "IRQ: %d\n", client->irq); err = devm_request_threaded_irq(dev, client->irq, @@ -2930,7 +2946,7 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type, */ struct lm90_data *data = i2c_get_clientdata(client); - if ((data->flags & LM90_HAVE_BROKEN_ALERT) && + if (!data->shutdown && (data->flags & LM90_HAVE_BROKEN_ALERT) && (data->current_alarms & data->alert_alarms)) { if (!(data->config & 0x80)) { dev_dbg(&client->dev, "Disabling ALERT#\n"); -- cgit v1.2.3 From 873e919e3101063a7a75989510ccfc125a4391cf Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 14 May 2026 14:41:00 -0700 Subject: hwmon: (lm90) Add lock protection to lm90_alert Sashiko reports: lm90_alert() executes in the smbus alert context and calls lm90_update_confreg() to disable the hardware alert line, without acquiring hwmon_lock. Concurrently, sysfs write operations (such as lm90_write_convrate) hold the hwmon_lock, temporarily modify data->config, and then restore it. If an alert interrupt occurs concurrently with a sysfs write, the sysfs path will overwrite the alert handler's modifications to data->config and the hardware register. This unintentionally re-enables the hardware alert line while the alarm is still active, causing an interrupt storm. Add the missing lock to lm90_alert() to solve the problem. Fixes: 7a1d220ccb0cc ("hwmon: (lm90) Introduce function to update configuration register") Reported-by: Sashiko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index c4a9dafff81d..1eeb608e5903 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -2946,6 +2946,7 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type, */ struct lm90_data *data = i2c_get_clientdata(client); + hwmon_lock(data->hwmon_dev); if (!data->shutdown && (data->flags & LM90_HAVE_BROKEN_ALERT) && (data->current_alarms & data->alert_alarms)) { if (!(data->config & 0x80)) { @@ -2955,6 +2956,7 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type, schedule_delayed_work(&data->alert_work, max_t(int, HZ, msecs_to_jiffies(data->update_interval))); } + hwmon_unlock(data->hwmon_dev); } else { dev_dbg(&client->dev, "Everything OK\n"); } -- cgit v1.2.3 From 93d93f5f8da791e98159795c6ef683f45bd95d13 Mon Sep 17 00:00:00 2001 From: Heechan Kang Date: Sun, 17 May 2026 03:47:09 +0900 Subject: io_uring/waitid: clear waitid info before copying it to userspace IORING_OP_WAITID stores its result fields in struct io_waitid::info and later copies them to userspace siginfo. The prep path initializes the request arguments, but it does not initialize info itself. If the wait operation completes without reporting a child event, the common wait code can return without writing wo_info. In that case io_waitid_finish() still copies iw->info to userspace, exposing stale bytes from the reused io_kiocb command storage. Clear the result storage during prep so the io_uring path matches the regular waitid syscall, which uses a zero-initialized struct waitid_info. Fixes: f31ecf671ddc ("io_uring: add IORING_OP_WAITID support") Cc: stable@vger.kernel.org # 6.7+ Signed-off-by: Heechan Kang Link: https://patch.msgid.link/20260516184709.852814-1-gganji11@naver.com Signed-off-by: Jens Axboe --- io_uring/waitid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index d25d60aed6af..32f68fd7fcdd 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -275,6 +275,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) iw->options = READ_ONCE(sqe->file_index); iw->head = NULL; iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + memset(&iw->info, 0, sizeof(iw->info)); return 0; } -- cgit v1.2.3 From 55a0005518195fdea1fd2991b07644f8dc97ea8e Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 15 May 2026 21:16:16 +0100 Subject: tracing: Fix desc in error path for the trace remote test module During initialisation in remote_test_load(), if one of the simple_ring_buffer fails to initialise, the error path attempts to rollback initialised buffers. However, the rollback incorrectly uses the global pointer to the trace descriptor, which is only set upon successful load completion. Fix the error path by using the local pointer to the descriptor. Link: https://patch.msgid.link/20260515201616.337469-1-vdonnefort@google.com Fixes: ea908a2b79c8 ("tracing: Add a trace remote module for testing") Reported-by: Sashiko Signed-off-by: Vincent Donnefort base-commit: 5d6919055dec134de3c40167a490f33c74c12581 Signed-off-by: Steven Rostedt --- kernel/trace/remote_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/remote_test.c b/kernel/trace/remote_test.c index 6c1b7701ddae..a3e2c9b606eb 100644 --- a/kernel/trace/remote_test.c +++ b/kernel/trace/remote_test.c @@ -110,9 +110,9 @@ static struct trace_buffer_desc *remote_test_load(unsigned long size, void *unus return remote_test_buffer_desc; err_unload: - for_each_ring_buffer_desc(rb_desc, cpu, remote_test_buffer_desc) + for_each_ring_buffer_desc(rb_desc, cpu, desc) remote_test_unload_simple_rb(rb_desc->cpu); - trace_remote_free_buffer(remote_test_buffer_desc); + trace_remote_free_buffer(desc); err_free_desc: kfree(desc); -- cgit v1.2.3 From 0039ac8305064e455f04d412ec3896c4fe41d04f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 May 2026 22:10:08 +0200 Subject: batman-adv: fix batadv_skb_is_frag() kernel-doc The kernel-doc comment for batadv_skb_is_frag() contained two errors: * the function description referred to "gain a unicast packet" instead of "contains unicast fragment". * the Return section omitted "merged" from "newly skb", leaving the description grammatically incorrect and inconsistent with the function description. Fixes: bc62216dc8e2 ("batman-adv: frag: disallow unicast fragment in fragment") Signed-off-by: Sven Eckelmann --- net/batman-adv/fragmentation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 4a594aa2ebf6..e9553db42349 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -305,10 +305,10 @@ free: } /** - * batadv_skb_is_frag() - check if newly merged skb is gain a unicast packet + * batadv_skb_is_frag() - check if newly merged skb contains unicast fragment * @skb: newly merged skb * - * Return: if newly skb is of type BATADV_UNICAST_FRAG + * Return: if newly merged skb is of type BATADV_UNICAST_FRAG */ static bool batadv_skb_is_frag(struct sk_buff *skb) { -- cgit v1.2.3 From 92cee08dc4f00e77fd1317e4343c5d458b0abab7 Mon Sep 17 00:00:00 2001 From: Cole Leavitt Date: Sat, 4 Apr 2026 22:41:44 -0700 Subject: wifi: iwlwifi: mld: fix TSO segmentation explosion when AMSDU is disabled When the TLC notification disables AMSDU for a TID, the MLD driver sets max_tid_amsdu_len to the sentinel value 1. The TSO segmentation path in iwl_mld_tx_tso_segment() checks for zero but not for this sentinel, allowing it to reach the num_subframes calculation: num_subframes = (max_tid_amsdu_len + pad) / (subf_len + pad) = (1 + 2) / (1534 + 2) = 0 This zero propagates to iwl_tx_tso_segment() which sets: gso_size = num_subframes * mss = 0 Calling skb_gso_segment() with gso_size=0 creates over 32000 tiny segments from a single GSO skb. This floods the TX ring with ~1024 micro-frames (the rest are purged), creating a massive burst of TX completion events that can lead to memory corruption and a subsequent use-after-free in TCP's retransmit queue (refcount underflow in tcp_shifted_skb, NULL deref in tcp_rack_detect_loss). The MVM driver is immune because it checks mvmsta->amsdu_enabled before reaching the num_subframes calculation. The MLD driver has no equivalent bitmap check and relies solely on max_tid_amsdu_len, which does not catch the sentinel value. Fix this by detecting the sentinel value (max_tid_amsdu_len == 1) at the existing check and falling back to non-AMSDU TSO segmentation. Also add a WARN_ON_ONCE guard after the num_subframes division as defense-in-depth to catch any future code paths that produce zero through a different mechanism. Suggested-by: Miriam Rachel Korenblit Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Cole Leavitt Link: https://patch.msgid.link/20260405054145.1064152-3-cole@unwrap.rs Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/tx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tx.c b/drivers/net/wireless/intel/iwlwifi/mld/tx.c index 546d09a38dab..094a28f75559 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tx.c @@ -834,7 +834,7 @@ static int iwl_mld_tx_tso_segment(struct iwl_mld *mld, struct sk_buff *skb, return -EINVAL; max_tid_amsdu_len = sta->cur->max_tid_amsdu_len[tid]; - if (!max_tid_amsdu_len) + if (!max_tid_amsdu_len || max_tid_amsdu_len == 1) return iwl_tx_tso_segment(skb, 1, netdev_flags, mpdus_skbs); /* Sub frame header + SNAP + IP header + TCP header + MSS */ @@ -846,6 +846,9 @@ static int iwl_mld_tx_tso_segment(struct iwl_mld *mld, struct sk_buff *skb, */ num_subframes = (max_tid_amsdu_len + pad) / (subf_len + pad); + if (WARN_ON_ONCE(!num_subframes)) + return iwl_tx_tso_segment(skb, 1, netdev_flags, mpdus_skbs); + if (sta->max_amsdu_subframes && num_subframes > sta->max_amsdu_subframes) num_subframes = sta->max_amsdu_subframes; -- cgit v1.2.3 From 2becb38a3e217ef2b2f42fddd7db7a25905ec291 Mon Sep 17 00:00:00 2001 From: Sheroz Juraev Date: Sun, 15 Mar 2026 13:12:21 +0500 Subject: wifi: iwlwifi: mld: stop TX during firmware restart When iwlwifi firmware crashes (e.g., NMI_INTERRUPT_UNKNOWN on Intel BE201/Wi-Fi 7), iwl_mld_nic_error() sets mld->fw_status.in_hw_restart to true. However, iwl_mld_tx_from_txq() does not check this flag before dequeuing frames from mac80211 and pushing them to the transport layer. Since the firmware is dead, iwl_trans_tx() returns -EIO for each frame, which then gets freed immediately. Under high-throughput conditions (e.g., Tailscale UDP traffic or active SSH sessions), this creates a tight dequeue-send-fail-free loop that wastes CPU cycles and generates rapid skb allocation churn, leading to memory pressure from slab fragmentation. The RX path already has this guard (iwl_mld_rx_mpdu checks in_hw_restart at rx.c:1906), and so does the TXQ allocation worker (iwl_mld_add_txqs_wk at tx.c:156). Add the same guard to iwl_mld_tx_from_txq() to stop all TX during firmware restart. Frames left in mac80211's TXQs are naturally drained after restart completes, when queue reallocation triggers iwl_mld_tx_from_txq() via iwl_mld_add_txq_list(), or when new upper-layer traffic invokes wake_tx_queue. Tested on ASUS Zenbook 14 UX3405CA with Intel BE201 (Wi-Fi 7) on kernel 6.19.5 where the firmware crashes approximately every 10-15 minutes under Tailscale traffic. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Cc: stable@vger.kernel.org Signed-off-by: Sheroz Juraev Link: https://patch.msgid.link/20260315081221.2678478-1-goodmartiandev@gmail.com Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/tx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tx.c b/drivers/net/wireless/intel/iwlwifi/mld/tx.c index 094a28f75559..0bcb1ae69468 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tx.c @@ -973,6 +973,16 @@ void iwl_mld_tx_from_txq(struct iwl_mld *mld, struct ieee80211_txq *txq) struct sk_buff *skb = NULL; u8 zero_addr[ETH_ALEN] = {}; + /* + * Don't transmit during firmware restart. The firmware is dead, + * so iwl_trans_tx() would return -EIO for each frame. Avoid the + * overhead of dequeuing from mac80211 only to immediately free + * the skbs, and the potential memory pressure from rapid skb + * allocation churn during high-throughput restart scenarios. + */ + if (unlikely(mld->fw_status.in_hw_restart)) + return; + /* * No need for threads to be pending here, they can leave the first * taker all the work. -- cgit v1.2.3 From d733ed481fd20a8e7bfe5119c4e77761ba3f87ee Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 15 May 2026 15:14:56 +0300 Subject: wifi: iwlwifi: mld: don't dereference a pointer before NULL checking it In iwl_mld_remove_link, the link->fw_id is saved at the beginning of the function so we have it after we freed the link. But the link pointer can be NULL, and is not checked when the fw_id is stored. Fix it by simply freeing the link at the end of the function. fFixes: 0e66a39f4f0e ("wifi: iwlwifi: fix potential use after free in iwl_mld_remove_link()") Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20260515151351.371f40fc6711.I6a82cfe9655564e9c5731af91c36493b26b1208e@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index b66e84d2365f..be2cdf43c72e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation */ #include "constants.h" @@ -504,7 +504,6 @@ void iwl_mld_remove_link(struct iwl_mld *mld, struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif); struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); bool is_deflink = link == &mld_vif->deflink; - u8 fw_id = link->fw_id; if (WARN_ON(!link || link->active)) return; @@ -512,15 +511,15 @@ void iwl_mld_remove_link(struct iwl_mld *mld, iwl_mld_rm_link_from_fw(mld, bss_conf); /* Continue cleanup on failure */ - if (!is_deflink) - kfree_rcu(link, rcu_head); - RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL); - if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links)) + if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links)) return; - RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL); + RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL); + + if (!is_deflink) + kfree_rcu(link, rcu_head); } void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, -- cgit v1.2.3 From fb84b5cbcaab3ca0f4e961d92a40ed7f3aac483b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 May 2026 15:14:57 +0300 Subject: wifi: iwlwifi: mvm: fix driver-set TX rates on old devices On old devices such as 7265D, rates are still encoded in version 1 format, which doesn't use the CCK/OFDM rate index (0-3/0-7) but rather their PLCP value (e.g. 10 for 1 Mbps CCK rate.) While introducing v3 rates, I changed the driver from internally handling v1 rates and converting to v2, to internally handling v3 and converting to v1 or v2 according to the firmware. I accordingly changed the code in iwl_mvm_mac80211_idx_to_hwrate() to no longer have different values for different APIs. This was correct. However, I later reverted this part of the change, because it was reported that I had broken beacon rates, causing a FW assert/crash. This caused TX_CMD rates to be set incorrectly, potentially causing a warning when reported back from the device as having been used. Fix this (hopefully correctly now) by handling beacon rates in the TX_CMD that's embedded in the beacon template command separately. Restore iwl_mvm_mac80211_idx_to_hwrate() to return only the rate index, not PLCP value, fixing the real TX_CMD. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20260515151351.7407e293dff7.I4ea1a17f8fe99c933d3f3e30d077cf4246125c3e@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 27 +++++++++++++++-------- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 14 ++++-------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index c523c5e82d4a..8ffa72aca3cf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -927,13 +927,18 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm, u16 iwl_mvm_mac_ctxt_get_beacon_flags(const struct iwl_fw *fw, u8 rate_idx) { - u16 flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); bool is_new_rate = iwl_fw_lookup_cmd_ver(fw, BEACON_TEMPLATE_CMD, 0) > 10; + u16 flags = 0; if (rate_idx <= IWL_LAST_CCK_RATE) flags |= is_new_rate ? IWL_MAC_BEACON_CCK : IWL_MAC_BEACON_CCK_V1; + if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) + flags |= iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); + else + flags |= iwl_fw_rate_idx_to_plcp(rate_idx); + return flags; } @@ -962,6 +967,7 @@ static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm, { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct ieee80211_tx_info *info; + u32 rate_n_flags = 0; u8 rate; u32 tx_flags; @@ -981,18 +987,21 @@ static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm, IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION)) { iwl_mvm_toggle_tx_ant(mvm, &mvm->mgmt_last_antenna_idx); - tx_params->rate_n_flags = - cpu_to_le32(BIT(mvm->mgmt_last_antenna_idx) << - RATE_MCS_ANT_POS); + rate_n_flags |= BIT(mvm->mgmt_last_antenna_idx) << + RATE_MCS_ANT_POS; } rate = iwl_mvm_mac_ctxt_get_beacon_rate(mvm, info, vif); - tx_params->rate_n_flags |= - cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate)); - if (rate == IWL_FIRST_CCK_RATE) - tx_params->rate_n_flags |= cpu_to_le32(RATE_MCS_CCK_MSK_V1); + if (rate < IWL_FIRST_OFDM_RATE) + rate_n_flags |= RATE_MCS_MOD_TYPE_CCK; + else + rate_n_flags |= RATE_MCS_MOD_TYPE_LEGACY_OFDM; + + rate_n_flags |= iwl_mvm_mac80211_idx_to_hwrate(mvm->fw, rate); + tx_params->rate_n_flags = iwl_mvm_v3_rate_to_fw(rate_n_flags, + mvm->fw_rates_ver); } int iwl_mvm_mac_ctxt_send_beacon_cmd(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 4a33a032c2a7..f052537e9567 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -159,15 +159,9 @@ int iwl_mvm_legacy_rate_to_mac80211_idx(u32 rate_n_flags, u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx) { - if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) - /* In the new rate legacy rates are indexed: - * 0 - 3 for CCK and 0 - 7 for OFDM. - */ - return (rate_idx >= IWL_FIRST_OFDM_RATE ? - rate_idx - IWL_FIRST_OFDM_RATE : - rate_idx); - - return iwl_fw_rate_idx_to_plcp(rate_idx); + return rate_idx >= IWL_FIRST_OFDM_RATE ? + rate_idx - IWL_FIRST_OFDM_RATE : + rate_idx; } u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac) -- cgit v1.2.3 From 25e416f148f3f948638ca7c6ff63fd842d9c07ad Mon Sep 17 00:00:00 2001 From: Moriya Itzchaki Date: Fri, 15 May 2026 15:14:58 +0300 Subject: wifi: iwlwifi: use correct function to read STEP_URM register CNVI_PMU_STEP_FLOW is a PRPH register, not a UMAC PRPH register. Use iwl_read_prph() instead of iwl_read_umac_prph() to read it correctly. Signed-off-by: Moriya Itzchaki Reviewed-by: Johannes Berg Link: https://patch.msgid.link/20260515151352.3a69fa2dbda7.I8d96635a9c06a835b05a10b6d66c8a9299676246@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c index a50e845cea42..64262bcca55d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c @@ -398,9 +398,9 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans) mutex_unlock(&trans_pcie->mutex); if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) - trans->step_urm = !!(iwl_read_umac_prph(trans, - CNVI_PMU_STEP_FLOW) & - CNVI_PMU_STEP_FLOW_FORCE_URM); + trans->step_urm = !!(iwl_read_prph(trans, + CNVI_PMU_STEP_FLOW) & + CNVI_PMU_STEP_FLOW_FORCE_URM); } static bool iwl_pcie_set_ltr(struct iwl_trans *trans) -- cgit v1.2.3 From b753b3334bad7c4735b6e5face0c331d4be11dda Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 May 2026 15:14:59 +0300 Subject: wifi: iwlwifi: mld: don't WARN on WoWLAN suspend w/o BSS vif Clearly, from a user perspective, it must be valid to configure WoWLAN (which can include network detection) and then suspend while not connected to a network, or even without an interface at all (WoWLAN config is handled on a per-wiphy basis). Since mac80211 doesn't distinguish these cases and simply calls the driver to suspend whenever WoWLAN is configured, the driver has to cleanly handle the case where it's called for WoWLAN but no (BSS) interface exists. Remove the WARN_ON(), move the print so it doesn't get done in this case, and keep returning 1 to disconnect everything. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20260515151352.0c55d1135409.I54f8be0e2aa28cfb1cb1dcf3b2d2d8fe75b4397b@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index ef98efc8fb1b..3a595a1c2e00 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1930,12 +1930,12 @@ int iwl_mld_wowlan_suspend(struct iwl_mld *mld, struct cfg80211_wowlan *wowlan) if (WARN_ON(!wowlan)) return 1; - IWL_DEBUG_WOWLAN(mld, "Starting the wowlan suspend flow\n"); - bss_vif = iwl_mld_get_bss_vif(mld); - if (WARN_ON(!bss_vif)) + if (!bss_vif) return 1; + IWL_DEBUG_WOWLAN(mld, "Starting the wowlan suspend flow\n"); + if (!bss_vif->cfg.assoc) { int ret; /* If we're not associated, this must be netdetect */ -- cgit v1.2.3 From 734a4e051b9767f439137940095d63afbfed0745 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 15 May 2026 15:15:00 +0300 Subject: wifi: iwlwifi: mld: disconnect only after 6 beacons without Rx After 4 missed beacons since last Rx, the firmware will send an NDP to the AP. If the NDP is ACK'ed, it'll reset the missed_beacons_since_last_rx counter. Disconnecting after 4 beacons doesn't give enough time to the firmware to send the NDP. Wait until we get 6 missed beacons since last Rx before disconnecting. Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20260515151352.c4ed0d849f98.Iefa2e8be9edfc74683997eea60bb53c2002f31f0@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/constants.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/constants.h b/drivers/net/wireless/intel/iwlwifi/mld/constants.h index e2a5eecc18c3..890abcab3837 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/constants.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/constants.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation */ #ifndef __iwl_mld_constants_h__ #define __iwl_mld_constants_h__ -#define IWL_MLD_MISSED_BEACONS_SINCE_RX_THOLD 4 +#define IWL_MLD_MISSED_BEACONS_SINCE_RX_THOLD 6 #define IWL_MLD_MISSED_BEACONS_THRESHOLD 8 #define IWL_MLD_MISSED_BEACONS_THRESHOLD_LONG 19 #define IWL_MLD_BCN_LOSS_EXIT_ESR_THRESH_2_LINKS 5 -- cgit v1.2.3 From db339b6bc9f234b4883eb02946ea01d8d9faa11c Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 28 Apr 2026 11:03:38 +0100 Subject: dt-bindings: display/msm: Fix typo in clock-names property Fix the typo "clocks-names" to "clock-names" in the allOf/if conditional blocks. Fixes: 9be5c47908e66 ("dt-bindings: display/msm: expand to support MST") Fixes: 7403e87c13847 ("dt-bindings: display: msm: Fix reg ranges and clocks on Glymur") Signed-off-by: Lad Prabhakar Reviewed-by: Krzysztof Kozlowski Patchwork: https://patchwork.freedesktop.org/patch/721682/ Message-ID: <20260428100338.3179722-1-prabhakar.mahadev-lad.rj@bp.renesas.com> Signed-off-by: Rob Clark --- .../devicetree/bindings/display/msm/dp-controller.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index b96e1ea40d3f..094a6383bb77 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -244,7 +244,7 @@ allOf: clocks: minItems: 5 maxItems: 5 - clocks-names: + clock-names: minItems: 5 maxItems: 5 @@ -265,7 +265,7 @@ allOf: clocks: minItems: 5 maxItems: 6 - clocks-names: + clock-names: minItems: 5 maxItems: 6 @@ -286,7 +286,7 @@ allOf: clocks: minItems: 6 maxItems: 6 - clocks-names: + clock-names: minItems: 6 maxItems: 6 @@ -324,7 +324,7 @@ allOf: clocks: minItems: 6 maxItems: 8 - clocks-names: + clock-names: minItems: 6 maxItems: 8 @@ -344,7 +344,7 @@ allOf: clocks: minItems: 5 maxItems: 6 - clocks-names: + clock-names: minItems: 5 maxItems: 6 -- cgit v1.2.3 From 3d562d35a044ae798cab421c65a116f8cedfa5d4 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 17 May 2026 09:55:28 +0200 Subject: bpf: Check global subprog exception paths Global subprogs are verified independently and are not descended into when their callers are symbolically executed. This means a caller can hold references or locks across a global subprog call that may throw, while the verifier only checks the non-exceptional return path at the call site. Record whether a subprog might throw in the CFG summary pass, alongside the existing might_sleep and packet-data-changing summaries, and propagate that effect through reachable callees. When a global subprog is marked as possibly throwing, push the normal continuation and validate the exceptional path immediately at the call site, avoiding a synthetic exception state and associated special case in the pruning checks. Fixes: f18b03fabaa9 ("bpf: Implement BPF exceptions") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260517075530.3461166-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/cfg.c | 13 ++++++++++++- kernel/bpf/verifier.c | 23 +++++++++++++++++------ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b148f816f25b..185b2aa43a42 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -729,6 +729,7 @@ struct bpf_subprog_info { */ s16 fastcall_stack_off; bool has_tail_call: 1; + bool might_throw: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; bool is_cb: 1; @@ -1308,6 +1309,7 @@ void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog); int bpf_find_subprog(struct bpf_verifier_env *env, int off); +bool bpf_is_throw_kfunc(struct bpf_insn *insn); int bpf_compute_const_regs(struct bpf_verifier_env *env); int bpf_prune_dead_branches(struct bpf_verifier_env *env); int bpf_check_cfg(struct bpf_verifier_env *env); diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c index 998f42a8189a..26d37066465f 100644 --- a/kernel/bpf/cfg.c +++ b/kernel/bpf/cfg.c @@ -64,11 +64,19 @@ static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) subprog->might_sleep = true; } +static void mark_subprog_might_throw(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *subprog; + + subprog = bpf_find_containing_subprog(env, off); + subprog->might_throw = true; +} + /* 't' is an index of a call-site. * 'w' is a callee entry point. * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. * Rely on DFS traversal order and absence of recursive calls to guarantee that - * callee's change_pkt_data marks would be correct at that moment. + * callee's effect marks would be correct at that moment. */ static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) { @@ -78,6 +86,7 @@ static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) callee = bpf_find_containing_subprog(env, w); caller->changes_pkt_data |= callee->changes_pkt_data; caller->might_sleep |= callee->might_sleep; + caller->might_throw |= callee->might_throw; } enum { @@ -509,6 +518,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) mark_subprog_might_sleep(env, t); if (ret == 0 && bpf_is_kfunc_pkt_changing(&meta)) mark_subprog_changes_pkt_data(env, t); + if (ret == 0 && bpf_is_throw_kfunc(insn)) + mark_subprog_might_throw(env, t); } return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 88b40c979b56..7fb88e1cd7c4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -442,7 +442,6 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) static bool is_sync_callback_calling_kfunc(u32 btf_id); static bool is_async_callback_calling_kfunc(u32 btf_id); static bool is_callback_calling_kfunc(u32 btf_id); -static bool is_bpf_throw_kfunc(struct bpf_insn *insn); static bool is_bpf_wq_set_callback_kfunc(u32 btf_id); static bool is_task_work_add_kfunc(u32 func_id); @@ -5405,7 +5404,7 @@ continue_func: if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) { bool err = false; - if (!is_bpf_throw_kfunc(insn + i)) + if (!bpf_is_throw_kfunc(insn + i)) continue; for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) { if (subprog[tmp].is_cb) { @@ -9499,6 +9498,9 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins return 0; } +static int process_bpf_exit_full(struct bpf_verifier_env *env, + bool *do_print_state, bool exception_exit); + static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -9552,6 +9554,17 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; } + if (env->subprog_info[subprog].might_throw) { + struct bpf_verifier_state *branch; + + branch = push_stack(env, *insn_idx + 1, *insn_idx, false); + if (IS_ERR(branch)) { + verbose(env, "failed to push state for global subprog exception path\n"); + return PTR_ERR(branch); + } + return process_bpf_exit_full(env, NULL, true); + } + /* continue with next insn after call */ return 0; } @@ -11782,7 +11795,7 @@ static bool is_async_callback_calling_kfunc(u32 btf_id) is_task_work_add_kfunc(btf_id); } -static bool is_bpf_throw_kfunc(struct bpf_insn *insn) +bool bpf_is_throw_kfunc(struct bpf_insn *insn) { return bpf_pseudo_kfunc_call(insn) && insn->off == 0 && insn->imm == special_kfunc_list[KF_bpf_throw]; @@ -12972,8 +12985,6 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca } static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name); -static int process_bpf_exit_full(struct bpf_verifier_env *env, - bool *do_print_state, bool exception_exit); static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) @@ -13354,7 +13365,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) env->prog->call_session_cookie = true; - if (is_bpf_throw_kfunc(insn)) + if (bpf_is_throw_kfunc(insn)) return process_bpf_exit_full(env, NULL, true); return 0; -- cgit v1.2.3 From 511a5db3c9ec55dd5393b8639000df080c296ef4 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 17 May 2026 09:55:29 +0200 Subject: selftests/bpf: Cover global subprog exception leaks Add a verifier failure case where the caller holds a reference across a global subprog call that may throw. The program must be rejected because the exceptional path would skip the caller's reference release. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20260517075530.3461166-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/exceptions_fail.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 051e2b6f2694..ac44d60e5066 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -208,6 +208,28 @@ int reject_with_reference(void *ctx) return 0; } +__noinline int global_subprog_may_throw(struct __sk_buff *ctx) +{ + if (ctx->len) + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference") +int reject_global_subprog_throw_with_reference(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + if (ctx->protocol) + global_subprog_may_throw(ctx); + bpf_obj_drop(f); + return 0; +} + __noinline static int subprog_ref(struct __sk_buff *ctx) { struct foo *f; -- cgit v1.2.3 From 23e6a1ca04ae44806439a5a446e62e4d42e80bb4 Mon Sep 17 00:00:00 2001 From: Carlos López Date: Tue, 12 May 2026 12:00:41 +0200 Subject: virt: sev-guest: Do not use host-controlled page order in cleanup path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When issuing an extended guest request (SVM_VMGEXIT_EXT_GUEST_REQUEST), get_ext_report() allocates a buffer to retrieve a certificate blob from the host, keeping track of its size in report_req->certs_len. However, the host may return SNP_GUEST_VMM_ERR_INVALID_LEN, indicating an invalid buffer size, as well as the expected length of such buffer. get_ext_report() subsequently updates report_req->certs_len with the host-controlled value, and cleans up the buffer by computing a page order from such value. This is incorrect, as the host-provided length may not match the page order of the original allocation, potentially resulting in corruption in the page allocator. Fix this by using alloc_pages_exact() instead, and reusing @npages to compute the size passed to free_pages_exact(). For consistency, also use @npages to compute the size when allocating the pages, even though this last change has no functional effect. Fixes: 3e385c0d6ce8 ("virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex") Signed-off-by: Carlos López Signed-off-by: Borislav Petkov (AMD) Tested-by: Michael Roth Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/virt/coco/sev-guest/sev-guest.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index e001e6769a43..910a1de0d5a7 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -176,7 +176,6 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; - struct page *page; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; @@ -211,16 +210,15 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques * zeros to indicate that certificate data was not provided. */ npages = report_req->certs_len >> PAGE_SHIFT; - page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, - get_order(report_req->certs_len)); - if (!page) + req.certs_data = alloc_pages_exact(npages << PAGE_SHIFT, + GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!req.certs_data) return -ENOMEM; - req.certs_data = page_address(page); ret = set_memory_decrypted((unsigned long)req.certs_data, npages); if (ret) { pr_err("failed to mark page shared, ret=%d\n", ret); - __free_pages(page, get_order(report_req->certs_len)); + free_pages_exact(req.certs_data, npages << PAGE_SHIFT); return -EFAULT; } @@ -277,7 +275,7 @@ e_free_data: if (set_memory_encrypted((unsigned long)req.certs_data, npages)) WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); else - __free_pages(page, get_order(report_req->certs_len)); + free_pages_exact(req.certs_data, npages << PAGE_SHIFT); } return ret; } -- cgit v1.2.3 From 515e3996a4c26e7f955c13b3b19522a2c8642af9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 17 May 2026 07:43:16 -1000 Subject: sched_ext: Fix deadlock between scx_root_disable() and concurrent forks scx_root_disable() enters SCX_DISABLING before it grabs scx_enable_mutex to clear __scx_switched_all and scx_switching_all. task_should_scx() short-circuits on DISABLING, so forks in that window land on fair while next_active_class() still skips fair - the new tasks stall. This can deadlock the disable path itself: scx_alloc_and_add_sched() runs under scx_enable_mutex and creates a helper kthread; if that new kthread is one of the stalled fair tasks, the mutex holder waits forever and scx_root_disable() can never make progress. Only sub-sched support exposes this, since sub-sched enables are the only path where scx_alloc_and_add_sched() can race the root's disable. Move the DISABLING check after @scx_switching_all. @scx_switching_all serves as a proxy for __scx_switched_all, so while it's set, forks keep going to scx. Once cleared, DISABLING applies normally. v2: Reword in-source comment and description. (Andrea) Fixes: 337ec00b1d9c ("sched_ext: Implement cgroup sub-sched enabling and disabling") Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- kernel/sched/ext.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a6d0a93d8174..547ca398f646 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4946,10 +4946,30 @@ static const struct kset_uevent_ops scx_uevent_ops = { */ bool task_should_scx(int policy) { - if (!scx_enabled() || unlikely(scx_enable_state() == SCX_DISABLING)) + /* if disabled, nothing should be on it */ + if (!scx_enabled()) return false; + + /* scx is taking over all SCHED_OTHER and SCHED_EXT tasks */ if (READ_ONCE(scx_switching_all)) return true; + + /* + * scx is tearing down - keep new SCHED_EXT tasks out. + * + * Must come after scx_switching_all test, which serves as a proxy + * for __scx_switched_all. While __scx_switched_all is set, we must + * return true via the branch above: a fork routed to fair would + * stall because next_active_class() skips fair. + * + * This can develop into a deadlock - scx holds scx_enable_mutex across + * kthread_create() in scx_alloc_and_add_sched(); if the new kthread is + * the stalled task, the disable path can never grab the mutex to clear + * scx_switching_all. + */ + if (unlikely(scx_enable_state() == SCX_DISABLING)) + return false; + return policy == SCHED_EXT; } -- cgit v1.2.3 From 608d76ec371406045c7686677870a54ccbf83eb6 Mon Sep 17 00:00:00 2001 From: Aryan Kushwaha Date: Sat, 16 May 2026 20:14:36 +0530 Subject: ALSA: hda/realtek: Add mute LED quirk for HP Pavilion Plus 14 The HP Pavilion Plus 14-eh0xxx with subsystem ID 103c:8a36 needs the ALC245 COEF bit mute LED quirk for the mute LED to follow the audio mute state. Add the missing quirk entry. Signed-off-by: Aryan Kushwaha Link: https://patch.msgid.link/20260516144436.35022-1-aryankushwaha3101@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index b6f5339cf1ea..6c872a24b8fc 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7084,6 +7084,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8a30, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a31, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8a34, "HP Pavilion x360 2-in-1 Laptop 14-ek0xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8a36, "HP Pavilion Plus 14-eh0xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a3d, "HP Victus 15-fb0xxx (MB 8A3D)", ALC245_FIXUP_HP_MUTE_LED_V2_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a4f, "HP Victus 15-fa0xxx (MB 8A4F)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8a6e, "HP EDNA 360", ALC287_FIXUP_CS35L41_I2C_4), -- cgit v1.2.3 From e4d3386b74fba8e01280484b67ee481ece00201e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 May 2026 18:51:20 +0200 Subject: ALSA: pcm: Don't setup bogus iov_iter for silencing At transition to the iov_iter for PCM data transfer, we blindly applied the iov_iter setup also for silencing (i.e. data = NULL), and it leads to a calculation of bogus iov_iter. Fortunately this didn't cause troubles on most of architectures but it goes wrong on RISC-V now, causing a NULL dereference. Handle the NULL data case to treat the silencing in interleaved_copy() for addressing the bug above. noninterleaved_copy() has already the NULL data handling, so it doesn't need changes. Reported-by: Jiakai Xu Closes: https://lore.kernel.org/20260515051516.3103036-1-xujiakai24@mails.ucas.ac.cn Fixes: cf393babb37a ("ALSA: pcm: Add copy ops with iov_iter") Cc: Link: https://patch.msgid.link/20260517165121.31399-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 09c421cd9319..fe597f7d522d 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -2138,6 +2138,9 @@ static int interleaved_copy(struct snd_pcm_substream *substream, off = frames_to_bytes(runtime, off); frames = frames_to_bytes(runtime, frames); + if (!data) + return fill_silence(substream, 0, hwoff, NULL, frames); + return do_transfer(substream, 0, hwoff, data + off, frames, transfer, in_kernel); } -- cgit v1.2.3 From 5200f5f493f79f14bbdc349e402a40dfb32f23c8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 May 2026 13:59:58 -0700 Subject: Linux 7.1-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b7b80e84e1eb..9f59598d3a08 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3 From 653f17c742601004774e3f8fb79d387d5ae6103e Mon Sep 17 00:00:00 2001 From: Jiakai Xu Date: Wed, 15 Apr 2026 07:52:16 +0000 Subject: RISC-V: KVM: Fix invalid HVA warning in steal-time recording kvm_riscv_vcpu_record_steal_time() assumes that the steal-time shared memory GPA (vcpu->arch.sta.shmem) is always backed by a valid guest memory slot. However, this assumption is not guaranteed by the KVM userspace ABI. A malicious or buggy userspace can set the STA shared memory GPA via KVM_SET_ONE_REG without establishing a corresponding memory region via KVM_SET_USER_MEMORY_REGION. In such cases, the GPA cannot be translated to a valid HVA and kvm_vcpu_gfn_to_hva() returns an error address. The current implementation incorrectly treats this as a kernel warning using WARN_ON(), which may escalate to a kernel panic when panic_on_warn is enabled. This is not a kernel bug condition but a normal invalid configuration from userspace, and should be handled gracefully. Fix it by removing WARN_ON() and treating invalid HVA as a normal failure case, resetting the STA shared memory state. Fixes: e9f12b5fff8ad0 ("RISC-V: KVM: Implement SBI STA extension") Signed-off-by: Jiakai Xu Signed-off-by: Jiakai Xu Assisted-by: OpenClaw:DeepSeek-V3.2 Reviewed-by: Nutty Liu Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20260415075216.2757427-1-xujiakai2025@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index 3b834709b429..60e50296a008 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -46,7 +46,7 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) gfn = shmem >> PAGE_SHIFT; hva = kvm_vcpu_gfn_to_hva(vcpu, gfn); - if (WARN_ON(kvm_is_error_hva(hva))) { + if (kvm_is_error_hva(hva)) { vcpu->arch.sta.shmem = INVALID_GPA; return; } -- cgit v1.2.3 From 0835ee26938e15eccd70f7d33da386b6490f9449 Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 14 May 2026 19:36:40 +0200 Subject: riscv: kvm: return SBI_ERR_FAILURE for pmu_snapshot_set_shmem() when OOM kvm_riscv_vcpu_pmu_snapshot_set_shmem() returned -ENOMEM from the SBI extension handler, which caused kvm_riscv_vcpu_sbi_ecall() to abort KVM_RUN and surface the error to userspace instead of ompleting the ECALL with a negative SBI error in a0. Use SBI_ERR_FAILURE and the normal retdata path, matching other PMU handlers and kvm_sbi_ext_pmu_handler comment. Fixes: c2f41ddbcdd7 ("RISC-V: KVM: Implement SBI PMU Snapshot feature") Cc: stable@vger.kernel.org Signed-off-by: Osama Abdelkader Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260514173642.41448-1-osama.abdelkader@gmail.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_pmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index a935ed96bc17..91aa0155a420 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -453,8 +453,10 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s } kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); - if (!kvpmu->sdata) - return -ENOMEM; + if (!kvpmu->sdata) { + sbiret = SBI_ERR_FAILURE; + goto out; + } /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { -- cgit v1.2.3 From 0e9d0e7a7c78db7aa1c13796c65cfe0aefa54a5b Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 14 May 2026 19:36:41 +0200 Subject: riscv: kvm: return SBI_ERR_FAILURE for pmu_event_info() when OOM kvm_riscv_vcpu_pmu_event_info() returned -ENOMEM from the SBI extension handler, which caused kvm_riscv_vcpu_sbi_ecall() to abort KVM_RUN and surface the error to userspace instead of completing the ECALL with a negative SBI error in a0. Use SBI_ERR_FAILURE and the normal retdata path, matching other PMU handlers and kvm_sbi_ext_pmu_handler comment. Fixes: e309fd113b9f ("RISC-V: KVM: Implement get event info function") Cc: stable@vger.kernel.org Signed-off-by: Osama Abdelkader Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260514173642.41448-2-osama.abdelkader@gmail.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_pmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 91aa0155a420..bb46dcbfb24d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -501,8 +501,10 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low } einfo = kzalloc(shmem_size, GFP_KERNEL); - if (!einfo) - return -ENOMEM; + if (!einfo) { + ret = SBI_ERR_FAILURE; + goto out; + } ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size); if (ret) { -- cgit v1.2.3 From fdb69d401967fd88d27982a7e4984b2a3a4f0314 Mon Sep 17 00:00:00 2001 From: Jiakai Xu Date: Sun, 17 May 2026 12:44:14 +0000 Subject: RISC-V: KVM: Fix NULL pointer dereference in SBI v0.1 SEND_IPI handler The SBI v0.1 SEND_IPI handler iterates over the hart mask and calls kvm_get_vcpu_by_id() to find the target vcpu for each set bit. When a guest provides a hart mask containing bits for non-existent vcpu_ids, kvm_get_vcpu_by_id() returns NULL, which is then unconditionally dereferenced by kvm_riscv_vcpu_set_interrupt(), causing a kernel crash. Fix this by adding a NULL check before dereferencing the return value. If the target vcpu is not found, skip it and continue processing the remaining valid harts. Fixes: a046c2d8578c ("RISC-V: KVM: Reorganize SBI code by moving SBI v0.1 to its own file") Signed-off-by: Jiakai Xu Signed-off-by: Jiakai Xu Assisted-by: OpenClaw:DeepSeek-V3.2 Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260517124414.420919-1-xujiakai2025@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_v01.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c index 188d5ea5b3b8..c9c323d4577a 100644 --- a/arch/riscv/kvm/vcpu_sbi_v01.c +++ b/arch/riscv/kvm/vcpu_sbi_v01.c @@ -55,6 +55,8 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, for_each_set_bit(i, &hmask, BITS_PER_LONG) { rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i); + if (!rvcpu) + continue; ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT); if (ret < 0) break; -- cgit v1.2.3 From c7832534a8160276cccb9a8cc8cafb5614c579d0 Mon Sep 17 00:00:00 2001 From: Jiakai Xu Date: Thu, 14 May 2026 08:17:51 +0000 Subject: RISC-V: KVM: Fix sign extension for MMIO loads The kvm_riscv_vcpu_mmio_return() function handles MMIO read results by writing the data back to the guest register. For signed load instructions (LB, LH, LW on RV64), the value needs sign-extension from a smaller integer to unsigned long. The current code uses: (ulong)data << shift >> shift but (ulong) makes the right shift a logical shift (zero-extend) rather than an arithmetic shift (sign-extend), causing incorrect results when the MMIO device returns a negative value. For example, LB reading 0x80 would return 128 instead of -128. Fix this by casting to (long) after the left shift so that the subsequent right shift is arithmetic and correctly propagates the sign bit: (long)((ulong)data << shift) >> shift Additionally, remove the unnecessary shift assignment for LBU (unsigned byte load) since it does not need sign extension. This makes LBU consistent with LHU and LWU which already keep shift = 0. Fixes: b91f0e4cb8a3 ("RISC-V: KVM: Factor-out instruction emulation into separate sources") Signed-off-by: Jiakai Xu Signed-off-by: Jiakai Xu Assisted-by: OpenClaw:DeepSeek-V3.2 Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20260514081752.472987-1-xujiakai2025@iscas.ac.cn Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_insn.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c index 4d89b94128ae..f09f9251d1f0 100644 --- a/arch/riscv/kvm/vcpu_insn.c +++ b/arch/riscv/kvm/vcpu_insn.c @@ -415,7 +415,6 @@ int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run, shift = 8 * (sizeof(ulong) - len); } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) { len = 1; - shift = 8 * (sizeof(ulong) - len); #ifdef CONFIG_64BIT } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { len = 8; @@ -649,22 +648,22 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) case 1: data8 = *((u8 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data8 << shift >> shift); + (long)((ulong)data8 << shift) >> shift); break; case 2: data16 = *((u16 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data16 << shift >> shift); + (long)((ulong)data16 << shift) >> shift); break; case 4: data32 = *((u32 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data32 << shift >> shift); + (long)((ulong)data32 << shift) >> shift); break; case 8: data64 = *((u64 *)run->mmio.data); SET_RD(insn, &vcpu->arch.guest_context, - (ulong)data64 << shift >> shift); + (long)((ulong)data64 << shift) >> shift); break; default: return -EOPNOTSUPP; -- cgit v1.2.3 From 532d06c646a6ed5c8701eb483dd64c7002c87f71 Mon Sep 17 00:00:00 2001 From: Minxi Hou Date: Mon, 18 May 2026 11:15:42 +0800 Subject: ALSA: hda/realtek: Add quirk for HP Z66 G6 14 laptop The HP Z66 G6 14 inch laptop uses the ALC236 codec with subsystem ID 0x103c:8df7. Without a quirk entry, the PCI SSID falls back to the generic 0x103c:0000 fixup, which does not configure the mute/micmute LED GPIOs correctly. Add the SND_PCI_QUIRK entry for this model using ALC236_FIXUP_HP_GPIO_LED, matching the surrounding HP EliteBook G12 entries (0x8dec-0x8dfe) which share the same ALC236 codec and GPIO LED layout. Signed-off-by: Minxi Hou Link: https://patch.msgid.link/20260518031542.2899188-1-houminxi@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 6c872a24b8fc..d86d4f5f9ca6 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7243,6 +7243,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8df7, "HP Z66 G6", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfb, "HP EliteBook 6 G1a 14", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfd, "HP EliteBook 6 G1a 16", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), -- cgit v1.2.3 From af0c3f05866237f7592219bfe05387bc3bfc99b5 Mon Sep 17 00:00:00 2001 From: Jianpeng Chang Date: Wed, 13 May 2026 15:22:09 +0800 Subject: dma-mapping: move dma_map_resource() sanity check into debug code dma_map_resource() uses pfn_valid() to ensure the range is not RAM. However, pfn_valid() only checks for availability of the memory map for a PFN but it does not ensure that the PFN is actually backed by RAM. On ARM64 with SPARSEMEM (128MB section granularity), MMIO addresses that share a section with RAM will falsely trigger the WARN_ON_ONCE and cause dma_map_resource() to return DMA_MAPPING_ERROR. This causes a WARNING on Raspberry Pi 4 during spi_bcm2835 probe because the SPI FIFO register (0xfe204004) falls in the same sparsemem section as the end of RAM (0xf8000000-0xfbffffff), both in section 31 (0xf8000000-0xffffffff). Move the sanity check from dma_map_resource() into debug_dma_map_phys() and replace the unreliable pfn_valid() with pfn_valid() && !PageReserved(), which correctly identifies actual usable RAM without false positives for MMIO regions that happen to have struct pages. Since dma_map_resource() is dma_map_phys(DMA_ATTR_MMIO), the check applies equally to both APIs. Any non-reserved page represents kernel memory to a sufficient degree that using DMA_ATTR_MMIO on it is almost certainly wrong and risks breaking coherency on non-coherent platforms. ZONE_DEVICE pages used for PCI P2P DMA (MEMORY_DEVICE_PCI_P2PDMA) have PageReserved set, so they will not trigger a false positive. The check no longer blocks the mapping and uses err_printk() to integrate with dma-debug filtering. Fixes: f7326196a781 ("dma-mapping: export new dma_*map_phys() interface") Reviewed-by: Robin Murphy Signed-off-by: Jianpeng Chang Reviewed-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20260513072209.1486986-1-jianpeng.chang.cn@windriver.com --- kernel/dma/debug.c | 9 ++++++++- kernel/dma/mapping.c | 4 ---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 1a725edbbbf6..3248f8b4d096 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1251,7 +1251,14 @@ void debug_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, entry->direction = direction; entry->map_err_type = MAP_ERR_NOT_CHECKED; - if (!(attrs & DMA_ATTR_MMIO)) { + if (attrs & DMA_ATTR_MMIO) { + unsigned long pfn = PHYS_PFN(phys); + + if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) + err_printk(dev, entry, + "dma_map_resource called for RAM address %pa\n", + &phys); + } else { check_for_stack(dev, phys); if (!PhysHighMem(phys)) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 23ed8eb9233e..e6b07f160d20 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -365,10 +365,6 @@ EXPORT_SYMBOL(dma_unmap_sg_attrs); dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (IS_ENABLED(CONFIG_DMA_API_DEBUG) && - WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) - return DMA_MAPPING_ERROR; - return dma_map_phys(dev, phys_addr, size, dir, attrs | DMA_ATTR_MMIO); } EXPORT_SYMBOL(dma_map_resource); -- cgit v1.2.3 From 5f41161059fd0f1bbf18c90f3180e38cc45a14eb Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Mon, 11 May 2026 18:53:05 -0300 Subject: debugobjects: Do not fill_pool() if pi_blocked_on On RT enabled kernels, fill_pool() ends up calling rtlock_lock(), which asserts if current::pi_blocked_on is set, because a task can obviously only block on one lock as otherwise the priority inheritenace chain gets corrupted. Prevent this by expanding the conditional to take current::pi_blocked_on into account. Fixes: 4bedcc28469a ("debugobjects: Make them PREEMPT_RT aware") Reported-by: syzbot+b8ca586b9fc235f0c0df@syzkaller.appspotmail.com Signed-off-by: Helen Koike Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260511215359.3351259-1-koike@igalia.com Closes: https://syzkaller.appspot.com/bug?extid=b8ca586b9fc235f0c0df --- lib/debugobjects.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 12e2e42e6a31..772ddabcbe7d 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -711,6 +711,15 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket return NULL; } +static inline bool debug_objects_is_pi_blocked_on(void) +{ +#ifdef CONFIG_RT_MUTEXES + return current->pi_blocked_on != NULL; +#else + return false; +#endif +} + static void debug_objects_fill_pool(void) { if (!static_branch_likely(&obj_cache_enabled)) @@ -727,11 +736,12 @@ static void debug_objects_fill_pool(void) /* * On RT enabled kernels the pool refill must happen in preemptible - * context -- for !RT kernels we rely on the fact that spinlock_t and - * raw_spinlock_t are basically the same type and this lock-type - * inversion works just fine. + * context and not enqueued on an rt_mutex -- for !RT kernels we rely + * on the fact that spinlock_t and raw_spinlock_t are basically the + * same type and this lock-type inversion works just fine. */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible() || system_state < SYSTEM_SCHEDULING) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || system_state < SYSTEM_SCHEDULING || + (preemptible() && !debug_objects_is_pi_blocked_on())) { /* * Annotate away the spinlock_t inside raw_spinlock_t warning * by temporarily raising the wait-type to LD_WAIT_CONFIG, matching -- cgit v1.2.3 From e02b5262fd288cc235f14e12233ea54e78c04611 Mon Sep 17 00:00:00 2001 From: Julien Chauveau Date: Tue, 24 Mar 2026 20:30:11 +0100 Subject: drm/bridge: it66121: acquire reset GPIO in probe The it66121_ctx structure has a gpio_reset field, and it66121_hw_reset() calls gpiod_set_value() on it. However, the GPIO descriptor is never acquired via devm_gpiod_get(), leaving gpio_reset as NULL throughout the driver lifetime. gpiod_set_value() silently returns when passed a NULL descriptor, so the hardware reset sequence in it66121_hw_reset() is a no-op. This leaves the chip in an undefined state at probe time, which can prevent it from responding on the I2C bus. The DT binding marks reset-gpios as a required property, so all compliant device trees provide this GPIO. Add the missing devm_gpiod_get() call after enabling power supplies and before the hardware reset, so the chip is properly reset with power applied. Fixes: 988156dc2fc9 ("drm: bridge: add it66121 driver") Cc: stable@vger.kernel.org Signed-off-by: Julien Chauveau Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Link: https://patch.msgid.link/20260324193011.16583-1-chauveau.julien@gmail.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/bridge/ite-it66121.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 9246e9c15a6e..ed21f09cd19a 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -1559,6 +1559,11 @@ static int it66121_probe(struct i2c_client *client) return ret; } + ctx->gpio_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(ctx->gpio_reset)) + return dev_err_probe(dev, PTR_ERR(ctx->gpio_reset), + "Failed to get reset GPIO\n"); + it66121_hw_reset(ctx); ctx->regmap = devm_regmap_init_i2c(client, &it66121_regmap_config); -- cgit v1.2.3 From f9b2d3b703d13df50c630997dfdc25648e96db0d Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 18 May 2026 10:31:11 +0200 Subject: regulator: tps65219: fix irq_data.rdev not being assigned Commit 64a6b577490c ("regulator: tps65219: Remove debugging helper function") removed the tps65219_get_rdev_by_name() helper along with the irq_data.rdev assignment that depended on it. This left irq_data.rdev uninitialized for all IRQs, causing undefined behavior when regulator_notifier_call_chain() is called from the IRQ handler: Internal error: Oops: 0000000096000004 pc : regulator_notifier_call_chain lr : tps65219_regulator_irq_handler Call trace: regulator_notifier_call_chain tps65219_regulator_irq_handler handle_nested_irq regmap_irq_thread irq_thread_fn irq_thread kthread ret_from_fork Instead of restoring a dedicated lookup array, restructure the probe function to combine regulator registration with IRQ registration in the same loop. This way the rdev returned by devm_regulator_register() is naturally available for assigning to irq_data.rdev without any auxiliary data structure. Non-regulator IRQs (SENSOR, TIMEOUT) that don't correspond to any registered regulator are registered with rdev=NULL, and the IRQ handler is protected with a NULL check to avoid crashing. Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/all/aBDSTxALaOc-PD7X@gaggiata.pivistrello.it/ Reported-by: Francesco Dolcini Fixes: 64a6b577490c ("regulator: tps65219: Remove debugging helper function") Signed-off-by: Alexander Sverdlin Link: https://patch.msgid.link/20260518083113.2063368-1-alexander.sverdlin@siemens.com Signed-off-by: Mark Brown --- drivers/regulator/tps65219-regulator.c | 135 +++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 40 deletions(-) diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index d77ca486879f..324c3a33af8a 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -346,8 +346,9 @@ static irqreturn_t tps65219_regulator_irq_handler(int irq, void *data) return IRQ_HANDLED; } - regulator_notifier_call_chain(irq_data->rdev, - irq_data->type->event, NULL); + if (irq_data->rdev) + regulator_notifier_call_chain(irq_data->rdev, + irq_data->type->event, NULL); dev_err(irq_data->dev, "Error IRQ trap %s for %s\n", irq_data->type->event_name, irq_data->type->regulator_name); @@ -398,14 +399,65 @@ static struct tps65219_chip_data chip_info_table[] = { }, }; -static int tps65219_regulator_probe(struct platform_device *pdev) +static bool tps65219_is_regulator_name(const struct tps65219_chip_data *pmic, + const char *name) +{ + int i; + + for (i = 0; i < pmic->common_rdesc_size; i++) + if (!strcmp(pmic->common_rdesc[i].name, name)) + return true; + for (i = 0; i < pmic->rdesc_size; i++) + if (!strcmp(pmic->rdesc[i].name, name)) + return true; + return false; +} + +static int tps65219_register_irqs(struct platform_device *pdev, + struct tps65219 *tps, + struct regulator_dev *rdev, + struct tps65219_regulator_irq_type *irq_types, + int nirqs, + const char *regulator_name) { struct tps65219_regulator_irq_data *irq_data; + int i, irq, error; + + for (i = 0; i < nirqs; i++) { + if (strcmp(irq_types[i].regulator_name, regulator_name)) + continue; + + irq = platform_get_irq_byname(pdev, irq_types[i].irq_name); + if (irq < 0) + return -EINVAL; + + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = &irq_types[i]; + irq_data->rdev = rdev; + + error = devm_request_threaded_irq(tps->dev, irq, NULL, + tps65219_regulator_irq_handler, + IRQF_ONESHOT, + irq_types[i].irq_name, + irq_data); + if (error) + return dev_err_probe(tps->dev, error, + "Failed to request %s IRQ %d\n", + irq_types[i].irq_name, irq); + } + return 0; +} + +static int tps65219_regulator_probe(struct platform_device *pdev) +{ struct tps65219_regulator_irq_type *irq_type; struct tps65219_chip_data *pmic; struct regulator_dev *rdev; int error; - int irq; int i; struct tps65219 *tps = dev_get_drvdata(pdev->dev.parent); @@ -425,6 +477,19 @@ static int tps65219_regulator_probe(struct platform_device *pdev) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to register %s regulator\n", pmic->common_rdesc[i].name); + + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->common_irq_types, + pmic->common_irq_size, + pmic->common_rdesc[i].name); + if (error) + return error; + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->irq_types, + pmic->dev_irq_size, + pmic->common_rdesc[i].name); + if (error) + return error; } for (i = 0; i < pmic->rdesc_size; i++) { @@ -434,52 +499,42 @@ static int tps65219_regulator_probe(struct platform_device *pdev) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to register %s regulator\n", pmic->rdesc[i].name); + + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->common_irq_types, + pmic->common_irq_size, + pmic->rdesc[i].name); + if (error) + return error; + error = tps65219_register_irqs(pdev, tps, rdev, + pmic->irq_types, + pmic->dev_irq_size, + pmic->rdesc[i].name); + if (error) + return error; } + /* Register non-regulator IRQs (TIMEOUT, SENSOR) with rdev=NULL */ for (i = 0; i < pmic->common_irq_size; ++i) { irq_type = &pmic->common_irq_types[i]; - irq = platform_get_irq_byname(pdev, irq_type->irq_name); - if (irq < 0) - return -EINVAL; - - irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - - irq_data->dev = tps->dev; - irq_data->type = irq_type; - error = devm_request_threaded_irq(tps->dev, irq, NULL, - tps65219_regulator_irq_handler, - IRQF_ONESHOT, - irq_type->irq_name, - irq_data); + if (tps65219_is_regulator_name(pmic, irq_type->regulator_name)) + continue; + error = tps65219_register_irqs(pdev, tps, NULL, + irq_type, 1, + irq_type->regulator_name); if (error) - return dev_err_probe(tps->dev, error, - "Failed to request %s IRQ %d\n", - irq_type->irq_name, irq); + return error; } for (i = 0; i < pmic->dev_irq_size; ++i) { irq_type = &pmic->irq_types[i]; - irq = platform_get_irq_byname(pdev, irq_type->irq_name); - if (irq < 0) - return -EINVAL; - - irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - - irq_data->dev = tps->dev; - irq_data->type = irq_type; - error = devm_request_threaded_irq(tps->dev, irq, NULL, - tps65219_regulator_irq_handler, - IRQF_ONESHOT, - irq_type->irq_name, - irq_data); + if (tps65219_is_regulator_name(pmic, irq_type->regulator_name)) + continue; + error = tps65219_register_irqs(pdev, tps, NULL, + irq_type, 1, + irq_type->regulator_name); if (error) - return dev_err_probe(tps->dev, error, - "Failed to request %s IRQ %d\n", - irq_type->irq_name, irq); + return error; } return 0; -- cgit v1.2.3 From 360190bd965f93794d5f5685a6de22ce6da2b672 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 May 2026 09:38:59 +0200 Subject: ata: libata-scsi: improve readability of ata_scsi_qc_issue() Improve readability of ata_scsi_qc_issue(). No functional changes. Tested-by: Tommy Kelly Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-scsi.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index f44612e269a4..f9ca5410e223 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1767,7 +1767,7 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) int ret; if (!ap->ops->qc_defer) - goto issue; + goto issue_qc; /* * If we already have a deferred qc, then rely on the SCSI layer to @@ -1786,38 +1786,37 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) break; case ATA_DEFER_LINK: ret = SCSI_MLQUEUE_DEVICE_BUSY; - break; + goto defer_qc; case ATA_DEFER_PORT: ret = SCSI_MLQUEUE_HOST_BUSY; - break; + goto defer_qc; default: WARN_ON_ONCE(1); ret = SCSI_MLQUEUE_HOST_BUSY; - break; + goto defer_qc; } - if (ret) { - /* - * We must defer this qc: if this is not an NCQ command, keep - * this qc as a deferred one and report to the SCSI layer that - * we issued it so that it is not requeued. The deferred qc will - * be issued with the port deferred_qc_work once all on-going - * commands complete. - */ - if (!ata_is_ncq(qc->tf.protocol)) { - ap->deferred_qc = qc; - return 0; - } +issue_qc: + ata_qc_issue(qc); + return 0; - /* Force a requeue of the command to defer its execution. */ - ata_qc_free(qc); - return ret; +defer_qc: + /* + * We must defer this qc: if this is not an NCQ command, keep + * this qc as a deferred one and report to the SCSI layer that + * we issued it so that it is not requeued. The deferred qc will + * be issued with the port deferred_qc_work once all on-going + * commands complete. + */ + if (!ata_is_ncq(qc->tf.protocol)) { + ap->deferred_qc = qc; + return 0; } -issue: - ata_qc_issue(qc); + /* Force a requeue of the command to defer its execution. */ + ata_qc_free(qc); - return 0; + return ret; } /** -- cgit v1.2.3 From ce4548807d2e4ae48fd0dbe38865467369877913 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 May 2026 09:39:00 +0200 Subject: ata: libata-scsi: do not use the deferred QC feature for ATA_DEFER_PORT The deferred QC feature was meant to handle mixed NCQ and non-NCQ commands, i.e. for return value ATA_DEFER_LINK. ATA_DEFER_PORT is returned by PATA drivers, but also certain SATA drivers like sata_mv and sata_sil24 that uses ap->excl_link to workaround hardware bugs in these HBAs. Regardless of the reason, using the deferred QC feature for ATA_DEFER_PORT is always wrong, and will break the ap->excl_link usage of the SATA drivers that rely on that feature. Modify ata_scsi_qc_issue() to only use the deferred QC feature when mixing NCQ and non-NCQ commands, i.e. ATA_DEFER_LINK. Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Tested-by: Tommy Kelly Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-scsi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index f9ca5410e223..f03b6326ad2d 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1789,11 +1789,11 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) goto defer_qc; case ATA_DEFER_PORT: ret = SCSI_MLQUEUE_HOST_BUSY; - goto defer_qc; + goto free_qc; default: WARN_ON_ONCE(1); ret = SCSI_MLQUEUE_HOST_BUSY; - goto defer_qc; + goto free_qc; } issue_qc: @@ -1813,6 +1813,7 @@ defer_qc: return 0; } +free_qc: /* Force a requeue of the command to defer its execution. */ ata_qc_free(qc); -- cgit v1.2.3 From f233124fb36cd57ef09f96d517a38ab4b902e15e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 May 2026 09:39:01 +0200 Subject: ata: libata-scsi: do not use the deferred QC feature on PMPs with CBS When using Port Multipliers (PMPs) with Command-Based Switching (CBS), you can only issue commands to one link at a time. For PMPs with CBS, there is already code to handle commands being sent to different links in sata_pmp_qc_defer_cmd_switch() using ap->excl_link. sata_sil24 also makes use of ap->excl_link. A user on the list reported that commit 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") broke PMPs with CBS. The commit introduced code that stores a deferred qc in ap->deferred_qc, to later be issued via a workqueue. It turns out that this change is incompatible with the existing ap->excl_link handling used by PMPs with CBS. Thus, modify sata_pmp_qc_defer_cmd_switch() and sil24_qc_defer() to return ATA_DEFER_LINK_EXCL, and make sure that the deferred QC handling via workqueue is not used for this return value. This way, PMPs with CBS will work once again. Note that the starvation referenced in commit 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") can only happen on libsas ports, and libsas does not support Port Multipliers, thus there is no harm of reverting back to the previous way of deferring commands for PMPs with CBS. Non-libsas ports connected to anything but a PMP with CBS (e.g. a normal drive or a PMP with FBS) will continue using the deferred workqueue, since it does result in lower completion latencies for non-NCQ commands, even though the workqueue is not strictly needed to avoid starvation for non-libsas ports. If we want to modify the scope of the workqueue issuing to also handle PMPs with CBS, then we should ensure that we can save both NCQ and non-NCQ commands in ap->deferred_qc, while also removing the existing PMP CBS handling using ap->excl_link, such that we don't duplicate features. While at it, also add a comment explaining how the ap->excl_link mechanism works. Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Tested-by: Tommy Kelly Reported-by: Tommy Kelly Closes: https://lore.kernel.org/linux-ide/ce09cc21-a8e9-4845-b205-35411e22fba9@tkel.ly/ Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-pmp.c | 13 ++++++++++++- drivers/ata/libata-scsi.c | 8 ++++++++ drivers/ata/sata_sil24.c | 6 +++++- include/linux/libata.h | 1 + 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index e3adc008fed1..7e889534d73b 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -110,13 +110,24 @@ int sata_pmp_qc_defer_cmd_switch(struct ata_queued_cmd *qc) { struct ata_link *link = qc->dev->link; struct ata_port *ap = link->ap; + int ret; if (ap->excl_link == NULL || ap->excl_link == link) { if (ap->nr_active_links == 0 || ata_link_active(link)) { qc->flags |= ATA_QCFLAG_CLEAR_EXCL; - return ata_std_qc_defer(qc); + ret = ata_std_qc_defer(qc); + if (ret == ATA_DEFER_LINK) + return ATA_DEFER_LINK_EXCL; + return ret; } + /* + * Note: ap->excl_link contains the link that is next in line, + * i.e. implicit round robin. If there is only one link + * dispatching, ap->excl_link will be left unclaimed, allowing + * other links to set ap->excl_link, ensuring that the currently + * active link cannot queue any more. + */ ap->excl_link = link; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index f03b6326ad2d..ca29744c57f9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1787,6 +1787,14 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) case ATA_DEFER_LINK: ret = SCSI_MLQUEUE_DEVICE_BUSY; goto defer_qc; + case ATA_DEFER_LINK_EXCL: + /* + * Drivers making use of ap->excl_link cannot store the QC in + * ap->deferred_qc, because the ap->excl_link handling is + * incompatible with the ap->deferred_qc workqueue handling. + */ + ret = SCSI_MLQUEUE_DEVICE_BUSY; + goto free_qc; case ATA_DEFER_PORT: ret = SCSI_MLQUEUE_HOST_BUSY; goto free_qc; diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index d642ece9f07a..57f1081b86db 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -789,6 +789,7 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc) struct ata_link *link = qc->dev->link; struct ata_port *ap = link->ap; u8 prot = qc->tf.protocol; + int ret; /* * There is a bug in the chip: @@ -826,7 +827,10 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc) qc->flags |= ATA_QCFLAG_CLEAR_EXCL; } - return ata_std_qc_defer(qc); + ret = ata_std_qc_defer(qc); + if (ret == ATA_DEFER_LINK) + return ATA_DEFER_LINK_EXCL; + return ret; } static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc) diff --git a/include/linux/libata.h b/include/linux/libata.h index 5c085ef4eda7..360776016b50 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -371,6 +371,7 @@ enum { /* return values for ->qc_defer */ ATA_DEFER_LINK = 1, ATA_DEFER_PORT = 2, + ATA_DEFER_LINK_EXCL = 3, /* desc_len for ata_eh_info and context */ ATA_EH_DESC_LEN = 80, -- cgit v1.2.3 From 759e8756da00aa115d504a18155b1d1ee1cc12e8 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 May 2026 09:39:02 +0200 Subject: ata: libata-scsi: do not needlessly defer commands when using PMP with FBS The ACS specification does not allow a non-NCQ command to be issued while an NCQ command is outstanding. Commit 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") introduced a feature where a deferred non-NCQ command gets issued from a workqueue. The design stores a single non-NCQ command per port. However, when using Port Multipliers (PMPs), specifically PMPs that support FIS-Based Switching (FBS), non-NCQ and NCQ commands can be mixed on the same port, just not for the same link, see e.g. ata_std_qc_defer() which is, and always has operated on a per-link basis. Therefore, move the deferred_qc from struct ata_port to struct ata_link. This way, when using a PMP with FBS, we will not needlessly defer commands to all other links, just because one link issued a non-NCQ command while having an NCQ command outstanding. Only commands for that specific link will be deferred. This is in line with how PMPs with FBS worked before commit 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation"). Fixes: 0ea84089dbf6 ("ata: libata-scsi: avoid Non-NCQ command starvation") Tested-by: Tommy Kelly Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 9 ++++++--- drivers/ata/libata-eh.c | 8 ++++---- drivers/ata/libata-pmp.c | 5 ++++- drivers/ata/libata-scsi.c | 43 +++++++++++++++++++++++++------------------ include/linux/libata.h | 6 +++--- 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index e76d15411e2a..3d0027ec33c2 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5584,6 +5584,7 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp) link->pmp = pmp; link->active_tag = ATA_TAG_POISON; link->hw_sata_spd_limit = UINT_MAX; + INIT_WORK(&link->deferred_qc_work, ata_scsi_deferred_qc_work); /* can't use iterator, ap isn't initialized yet */ for (i = 0; i < ATA_MAX_DEVICES; i++) { @@ -5666,7 +5667,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host) mutex_init(&ap->scsi_scan_mutex); INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug); INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); - INIT_WORK(&ap->deferred_qc_work, ata_scsi_deferred_qc_work); INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); init_completion(&ap->park_req_pending); @@ -6291,12 +6291,15 @@ static void ata_port_detach(struct ata_port *ap) /* It better be dead now and not have any remaining deferred qc. */ WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); - WARN_ON(ap->deferred_qc); - cancel_work_sync(&ap->deferred_qc_work); cancel_delayed_work_sync(&ap->hotplug_task); cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_for_each_link(link, ap, PMP_FIRST) { + WARN_ON(link->deferred_qc); + cancel_work_sync(&link->deferred_qc_work); + } + /* Delete port multiplier link transport devices */ if (ap->pmp_link) { int i; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 9a4b67b90b17..d623eb32ed8b 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -651,11 +651,11 @@ int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, if (qc->scsicmd != scmd) continue; if ((qc->flags & ATA_QCFLAG_ACTIVE) || - qc == ap->deferred_qc) + qc == qc->dev->link->deferred_qc) break; } - if (i < ATA_MAX_QUEUE && qc == ap->deferred_qc) { + if (i < ATA_MAX_QUEUE && qc == qc->dev->link->deferred_qc) { /* * This is a deferred command that timed out while * waiting for the command queue to drain. Since the qc @@ -666,8 +666,8 @@ int ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, * deferred qc work from issuing this qc. */ WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); - ap->deferred_qc = NULL; - cancel_work(&ap->deferred_qc_work); + qc->dev->link->deferred_qc = NULL; + cancel_work(&qc->dev->link->deferred_qc_work); set_host_byte(scmd, DID_TIME_OUT); scsi_eh_finish_cmd(scmd, &ap->eh_done_q); } else if (i < ATA_MAX_QUEUE) { diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 7e889534d73b..e8540931b4a1 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -582,8 +582,11 @@ static void sata_pmp_detach(struct ata_device *dev) if (ap->ops->pmp_detach) ap->ops->pmp_detach(ap); - ata_for_each_link(tlink, ap, EDGE) + ata_for_each_link(tlink, ap, EDGE) { + WARN_ON(tlink->deferred_qc); + cancel_work_sync(&tlink->deferred_qc_work); ata_eh_detach_dev(tlink->device); + } spin_lock_irqsave(ap->lock, flags); ap->nr_pmp_links = 0; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index ca29744c57f9..d43207c6e467 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1664,8 +1664,9 @@ static void ata_scsi_qc_done(struct ata_queued_cmd *qc, bool set_result, void ata_scsi_deferred_qc_work(struct work_struct *work) { - struct ata_port *ap = - container_of(work, struct ata_port, deferred_qc_work); + struct ata_link *link = + container_of(work, struct ata_link, deferred_qc_work); + struct ata_port *ap = link->ap; struct ata_queued_cmd *qc; unsigned long flags; @@ -1676,10 +1677,10 @@ void ata_scsi_deferred_qc_work(struct work_struct *work) * such case, we should not need any more deferring the qc, so warn if * qc_defer() says otherwise. */ - qc = ap->deferred_qc; + qc = link->deferred_qc; if (qc && !ata_port_eh_scheduled(ap)) { WARN_ON_ONCE(ap->ops->qc_defer(qc)); - ap->deferred_qc = NULL; + link->deferred_qc = NULL; ata_qc_issue(qc); } @@ -1688,7 +1689,7 @@ void ata_scsi_deferred_qc_work(struct work_struct *work) void ata_scsi_requeue_deferred_qc(struct ata_port *ap) { - struct ata_queued_cmd *qc = ap->deferred_qc; + struct ata_link *link; lockdep_assert_held(ap->lock); @@ -1697,16 +1698,21 @@ void ata_scsi_requeue_deferred_qc(struct ata_port *ap) * do not try to be smart about what to do with this deferred command * and simply requeue it by completing it with DID_REQUEUE. */ - if (qc) { - ap->deferred_qc = NULL; - cancel_work(&ap->deferred_qc_work); - ata_scsi_qc_done(qc, true, DID_REQUEUE << 16); + ata_for_each_link(link, ap, PMP_FIRST) { + struct ata_queued_cmd *qc = link->deferred_qc; + + if (qc) { + link->deferred_qc = NULL; + cancel_work(&link->deferred_qc_work); + ata_scsi_qc_done(qc, true, DID_REQUEUE << 16); + } } } -static void ata_scsi_schedule_deferred_qc(struct ata_port *ap) +static void ata_scsi_schedule_deferred_qc(struct ata_link *link) { - struct ata_queued_cmd *qc = ap->deferred_qc; + struct ata_queued_cmd *qc = link->deferred_qc; + struct ata_port *ap = link->ap; lockdep_assert_held(ap->lock); @@ -1723,12 +1729,12 @@ static void ata_scsi_schedule_deferred_qc(struct ata_port *ap) return; } if (!ap->ops->qc_defer(qc)) - queue_work(system_highpri_wq, &ap->deferred_qc_work); + queue_work(system_highpri_wq, &link->deferred_qc_work); } static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) { - struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; bool have_sense = qc->flags & ATA_QCFLAG_SENSE_VALID; @@ -1759,11 +1765,12 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) ata_scsi_qc_done(qc, false, 0); - ata_scsi_schedule_deferred_qc(ap); + ata_scsi_schedule_deferred_qc(link); } static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) { + struct ata_link *link = qc->dev->link; int ret; if (!ap->ops->qc_defer) @@ -1774,7 +1781,7 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) * requeue and defer all incoming commands until the deferred qc is * processed, once all on-going commands complete. */ - if (ap->deferred_qc) { + if (link->deferred_qc) { ata_qc_free(qc); return SCSI_MLQUEUE_DEVICE_BUSY; } @@ -1790,8 +1797,8 @@ static int ata_scsi_qc_issue(struct ata_port *ap, struct ata_queued_cmd *qc) case ATA_DEFER_LINK_EXCL: /* * Drivers making use of ap->excl_link cannot store the QC in - * ap->deferred_qc, because the ap->excl_link handling is - * incompatible with the ap->deferred_qc workqueue handling. + * link->deferred_qc, because the ap->excl_link handling is + * incompatible with the link->deferred_qc workqueue handling. */ ret = SCSI_MLQUEUE_DEVICE_BUSY; goto free_qc; @@ -1817,7 +1824,7 @@ defer_qc: * commands complete. */ if (!ata_is_ncq(qc->tf.protocol)) { - ap->deferred_qc = qc; + link->deferred_qc = qc; return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 360776016b50..127229fbd1a6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -855,6 +855,9 @@ struct ata_link { unsigned int sata_spd; /* current SATA PHY speed */ enum ata_lpm_policy lpm_policy; + struct work_struct deferred_qc_work; + struct ata_queued_cmd *deferred_qc; + /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; /* EH context */ @@ -900,9 +903,6 @@ struct ata_port { u64 qc_active; int nr_active_links; /* #links with active qcs */ - struct work_struct deferred_qc_work; - struct ata_queued_cmd *deferred_qc; - struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ -- cgit v1.2.3 From b02900c85a6423cf9b3dcc6b47bf060c85075e69 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 May 2026 13:14:59 +0300 Subject: usb: typec: tipd: Fix error code in tps6598x_probe() Set the error code on these two error paths. The existing code returns success. Fixes: 77ed2f4538da ("usb: typec: tipd: Use read_power_status function in probe") Fixes: 04041fd7d6ec ("usb: typec: tipd: Read data status in probe and cache its value") Cc: stable Signed-off-by: Dan Carpenter Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/agL9o7wUK1dOVBTy@stanley.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 43faec794b95..d0b769333bd9 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1835,6 +1835,7 @@ static int tps6598x_probe(struct i2c_client *client) goto err_role_put; if (status & TPS_STATUS_PLUG_PRESENT) { + ret = -EINVAL; if (!tps6598x_read_power_status(tps)) goto err_unregister_port; if (!tps->data->read_data_status(tps)) -- cgit v1.2.3 From 6c5dbc104dadd79fc2923497c20bae759a18758c Mon Sep 17 00:00:00 2001 From: Jeremy Erazo Date: Tue, 12 May 2026 16:05:30 +0000 Subject: usb: gadget: composite: fix integer underflow in WebUSB GET_URL handling The WebUSB GET_URL handler in composite_setup() narrows landing_page_length to fit the host-supplied wLength using landing_page_length = w_length - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset; If wLength is smaller than WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH the unsigned subtraction wraps, and the subsequent memcpy(url_descriptor->URL, cdev->landing_page + landing_page_offset, landing_page_length - landing_page_offset); ends up copying close to UINT_MAX bytes from cdev->landing_page into cdev->req->buf. KASAN reports a slab-out-of-bounds in composite_setup on the kmalloc-2k gadget_info allocation, and FORTIFY_SOURCE traps the memcpy as a 4294967293-byte field-spanning write into url_descriptor->URL (size 252). A USB host can reach this from a single SETUP packet against any gadget that has webusb/use=1 and a landingPage configured. Handle the small-wLength case before the math: when the host requested fewer bytes than the URL descriptor header, only the header is meaningful and no URL bytes need to be copied. Setting landing_page_length to landing_page_offset makes the existing memcpy a no-op and leaves the descriptor returned to the host unchanged for all larger wLength values. Fixes: 93c473948c58 ("usb: gadget: add WebUSB landing page support") Cc: stable Signed-off-by: Jeremy Erazo Link: https://patch.msgid.link/20260512160530.352318-1-mendozayt13@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index a902184bdf82..dc3664374596 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2172,7 +2172,10 @@ unknown: sizeof(url_descriptor->URL) - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset); - if (w_length < WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_length) + if (w_length < WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH) + landing_page_length = landing_page_offset; + else if (w_length < + WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_length) landing_page_length = w_length - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset; -- cgit v1.2.3 From d7486952bf74e546ee3748fb14b2d07881fa6273 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 14 May 2026 19:10:06 +0200 Subject: usb: typec: ucsi: ccg: reject firmware images without a ':' record header do_flash() locates the first .cyacd record with p = strnchr(fw->data, fw->size, ':'); while (p < eof) { s = strnchr(p + 1, eof - p - 1, ':'); ... } If the firmware image contains no ':' byte, strnchr() returns NULL. NULL compares less than the valid kernel pointer eof, so the loop body runs and strnchr() is called with p + 1 == (void *)1 and a length of roughly (unsigned long)eof, causing a wonderful crash. The not_signed_fw fallthrough earlier in do_flash() and the chip-state branches in ccg_fw_update_needed() allow an unsigned blob to reach this loop, so a root user who can place a crafted file under /lib/firmware and write the do_flash sysfs attribute can trigger the oops. Bail out with -EINVAL when the initial strnchr() returns NULL. Assisted-by: gkh_clanker_t1000 Cc: stable Cc: Heikki Krogerus Reviewed-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026051405-posture-shrill-7884@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 199799b319c2..4463c1ae96bd 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -1243,6 +1243,11 @@ not_signed_fw: *****************************************************************/ p = strnchr(fw->data, fw->size, ':'); + if (!p) { + dev_err(dev, "Bad FW format: no ':' record header found\n"); + err = -EINVAL; + goto release_mem; + } while (p < eof) { s = strnchr(p + 1, eof - p - 1, ':'); -- cgit v1.2.3 From 379e8f1ca5e919b130b40d8115d92a536e5f8d7a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 18 May 2026 13:41:45 +0200 Subject: drm/gem: Make the GEM LRU lock part of drm_device Recently, a few races have been discovered in the GEM LRU logic, all of them caused by the fact the LRU lock is accessed through gem->lru->lock, and that very same lock also protects changes to gem->lru, leading to situations where gem->lru needs to first be accessed without the lock held, to then get the lru to access the lock through and finally take the lock and do the expected operation. Currently, the only driver making use of this API (MSM) declares a device-wide lock, and the user we're about to add (panthor) will do the same. There's no evidence that we will ever have a driver that wants different pools of LRUs protected by different locks under the same drm_device. So we're better off moving this lock to drm_device and always locking it through obj->dev->gem_lru_mutex, or directly through dev->gem_lru_mutex. If anyone ever needs more fine-grained locking, this can be revisited to pass some drm_gem_lru_pool object representing the pool of LRUs under a specific lock, but for now, the per-device lock seems to be enough. Fixes: e7c2af13f811 ("drm/gem: Add LRU/shrinker helper") Reported-by: Chia-I Wu Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/86 Reviewed-by: Rob Clark Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20260518-panthor-shrinker-fixes-v4-1-1920234470d5@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_drv.c | 2 ++ drivers/gpu/drm/drm_gem.c | 36 +++++++++++++++------------------- drivers/gpu/drm/msm/msm_drv.c | 11 +++++------ drivers/gpu/drm/msm/msm_drv.h | 7 ------- drivers/gpu/drm/msm/msm_gem.c | 33 +++++++++++++++---------------- drivers/gpu/drm/msm/msm_gem_shrinker.c | 4 ++-- drivers/gpu/drm/msm/msm_gem_submit.c | 6 +++--- drivers/gpu/drm/msm/msm_gem_vma.c | 12 ++++++------ drivers/gpu/drm/msm/msm_ringbuffer.c | 6 +++--- include/drm/drm_device.h | 7 +++++++ include/drm/drm_gem.h | 20 +++++++++---------- 11 files changed, 69 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 985c283cf59f..675675480da4 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -697,6 +697,7 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); + mutex_destroy(&dev->gem_lru_mutex); } static int drm_dev_init(struct drm_device *dev, @@ -738,6 +739,7 @@ static int drm_dev_init(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->event_lock); + mutex_init(&dev->gem_lru_mutex); mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d6424267260b..b95b015d2983 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1541,12 +1541,10 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations); * drm_gem_lru_init - initialize a LRU * * @lru: The LRU to initialize - * @lock: The lock protecting the LRU */ void -drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock) +drm_gem_lru_init(struct drm_gem_lru *lru) { - lru->lock = lock; lru->count = 0; INIT_LIST_HEAD(&lru->list); } @@ -1571,14 +1569,10 @@ drm_gem_lru_remove_locked(struct drm_gem_object *obj) void drm_gem_lru_remove(struct drm_gem_object *obj) { - struct drm_gem_lru *lru = obj->lru; - - if (!lru) - return; - - mutex_lock(lru->lock); - drm_gem_lru_remove_locked(obj); - mutex_unlock(lru->lock); + mutex_lock(&obj->dev->gem_lru_mutex); + if (obj->lru) + drm_gem_lru_remove_locked(obj); + mutex_unlock(&obj->dev->gem_lru_mutex); } EXPORT_SYMBOL(drm_gem_lru_remove); @@ -1593,7 +1587,7 @@ EXPORT_SYMBOL(drm_gem_lru_remove); void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj) { - lockdep_assert_held_once(lru->lock); + lockdep_assert_held_once(&obj->dev->gem_lru_mutex); if (obj->lru) drm_gem_lru_remove_locked(obj); @@ -1617,9 +1611,9 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail_locked); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj) { - mutex_lock(lru->lock); + mutex_lock(&obj->dev->gem_lru_mutex); drm_gem_lru_move_tail_locked(lru, obj); - mutex_unlock(lru->lock); + mutex_unlock(&obj->dev->gem_lru_mutex); } EXPORT_SYMBOL(drm_gem_lru_move_tail); @@ -1633,6 +1627,7 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail); * of the shrink callback to check for this (ie. dma_resv_test_signaled()) * or if necessary block until the buffer becomes idle. * + * @dev: DRM device the LRU belongs to * @lru: The LRU to scan * @nr_to_scan: The number of pages to try to reclaim * @remaining: The number of pages left to reclaim, should be initialized by caller @@ -1640,7 +1635,8 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail); * @ticket: Optional ww_acquire_ctx context to use for locking */ unsigned long -drm_gem_lru_scan(struct drm_gem_lru *lru, +drm_gem_lru_scan(struct drm_device *dev, + struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket), @@ -1650,9 +1646,9 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, struct drm_gem_object *obj; unsigned freed = 0; - drm_gem_lru_init(&still_in_lru, lru->lock); + drm_gem_lru_init(&still_in_lru); - mutex_lock(lru->lock); + mutex_lock(&dev->gem_lru_mutex); while (freed < nr_to_scan) { obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node); @@ -1675,7 +1671,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, * rest of the loop body, to reduce contention with other * code paths that need the LRU lock */ - mutex_unlock(lru->lock); + mutex_unlock(&dev->gem_lru_mutex); if (ticket) ww_acquire_init(ticket, &reservation_ww_class); @@ -1709,7 +1705,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, tail: drm_gem_object_put(obj); - mutex_lock(lru->lock); + mutex_lock(&dev->gem_lru_mutex); } /* @@ -1721,7 +1717,7 @@ tail: list_splice_tail(&still_in_lru.list, &lru->list); lru->count += still_in_lru.count; - mutex_unlock(lru->lock); + mutex_unlock(&dev->gem_lru_mutex); return freed; } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 195f40e331e5..cc2bcd14b1c2 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -128,11 +128,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv, /* * Initialize the LRUs: */ - mutex_init(&priv->lru.lock); - drm_gem_lru_init(&priv->lru.unbacked, &priv->lru.lock); - drm_gem_lru_init(&priv->lru.pinned, &priv->lru.lock); - drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock); - drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock); + drm_gem_lru_init(&priv->lru.unbacked); + drm_gem_lru_init(&priv->lru.pinned); + drm_gem_lru_init(&priv->lru.willneed); + drm_gem_lru_init(&priv->lru.dontneed); /* Initialize stall-on-fault */ spin_lock_init(&priv->fault_stall_lock); @@ -140,7 +139,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv, /* Teach lockdep about lock ordering wrt. shrinker: */ fs_reclaim_acquire(GFP_KERNEL); - might_lock(&priv->lru.lock); + might_lock(&ddev->gem_lru_mutex); fs_reclaim_release(GFP_KERNEL); if (priv->kms_init) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 6d847d593f1a..617b3c4b42c0 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -150,13 +150,6 @@ struct msm_drm_private { * DONTNEED state (ie. can be purged) */ struct drm_gem_lru dontneed; - - /** - * lock: - * - * Protects manipulation of all of the LRUs. - */ - struct mutex lock; } lru; struct notifier_block vmap_notifier; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 2cb3ab04f125..efd3d3c9a449 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -177,11 +177,11 @@ static void update_lru_locked(struct drm_gem_object *obj) static void update_lru(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); update_lru_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); } static struct page **get_pages(struct drm_gem_object *obj) @@ -292,11 +292,11 @@ void msm_gem_pin_obj_locked(struct drm_gem_object *obj) static void pin_obj_locked(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); msm_gem_pin_obj_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); } struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj) @@ -487,16 +487,16 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma) void msm_gem_unpin_locked(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_gem_assert_locked(obj); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); update_lru_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); } /* Special unpin path for use in fence-signaling path, avoiding the need @@ -507,10 +507,10 @@ void msm_gem_unpin_locked(struct drm_gem_object *obj) */ void msm_gem_unpin_active(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); - GEM_WARN_ON(!mutex_is_locked(&priv->lru.lock)); + GEM_WARN_ON(!mutex_is_locked(&dev->gem_lru_mutex)); msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); @@ -797,12 +797,12 @@ void msm_gem_put_vaddr(struct drm_gem_object *obj) */ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_gem_lock(obj); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); if (msm_obj->madv != __MSM_MADV_PURGED) msm_obj->madv = madv; @@ -814,7 +814,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) */ update_lru_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); msm_gem_unlock(obj); @@ -824,7 +824,6 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) void msm_gem_purge(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = obj->dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_gem_assert_locked(obj); @@ -839,10 +838,10 @@ void msm_gem_purge(struct drm_gem_object *obj) put_pages(obj); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); /* A one-way transition: */ msm_obj->madv = __MSM_MADV_PURGED; - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); drm_gem_free_mmap_offset(obj); diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 31fa51a44f86..c07af9602fee 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -186,7 +186,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) if (!stages[i].cond) continue; stages[i].freed = - drm_gem_lru_scan(stages[i].lru, nr, + drm_gem_lru_scan(priv->dev, stages[i].lru, nr, &stages[i].remaining, stages[i].shrink, &ticket); @@ -255,7 +255,7 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) unsigned long remaining = 0; for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) { - unmapped += drm_gem_lru_scan(lrus[idx], + unmapped += drm_gem_lru_scan(priv->dev, lrus[idx], vmap_shrink_limit - unmapped, &remaining, vmap_shrink, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 26ea8a28be47..3c6bc90c3d48 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -352,7 +352,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit) static int submit_pin_objects(struct msm_gem_submit *submit) { - struct msm_drm_private *priv = submit->dev->dev_private; + struct drm_device *dev = submit->dev; int i, ret = 0; for (i = 0; i < submit->nr_bos; i++) { @@ -381,11 +381,11 @@ static int submit_pin_objects(struct msm_gem_submit *submit) * get_pages() which could trigger reclaim.. and if we held the LRU lock * could trigger deadlock with the shrinker). */ - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); for (i = 0; i < submit->nr_bos; i++) { msm_gem_pin_obj_locked(submit->bos[i].obj); } - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); submit->bos_pinned = true; diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 1a952b171ed7..c4cfe036066b 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -702,7 +702,7 @@ static struct dma_fence * msm_vma_job_run(struct drm_sched_job *_job) { struct msm_vm_bind_job *job = to_msm_vm_bind_job(_job); - struct msm_drm_private *priv = job->vm->drm->dev_private; + struct drm_device *dev = job->vm->drm; struct msm_gem_vm *vm = to_msm_vm(job->vm); struct drm_gem_object *obj; int ret = vm->unusable ? -EINVAL : 0; @@ -745,13 +745,13 @@ msm_vma_job_run(struct drm_sched_job *_job) if (ret) msm_gem_vm_unusable(job->vm); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); job_foreach_bo (obj, job) { msm_gem_unpin_active(obj); } - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); /* VM_BIND ops are synchronous, so no fence to wait on: */ return NULL; @@ -1305,7 +1305,7 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job) return PTR_ERR(pages); } - struct msm_drm_private *priv = job->vm->drm->dev_private; + struct drm_device *dev = job->vm->drm; /* * A second loop while holding the LRU lock (a) avoids acquiring/dropping @@ -1314,10 +1314,10 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job) * get_pages() which could trigger reclaim.. and if we held the LRU lock * could trigger deadlock with the shrinker). */ - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); job_foreach_bo (obj, job) msm_gem_pin_obj_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); job->bos_pinned = true; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 30ddb5351e98..2d6b930b766e 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -16,13 +16,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) struct msm_gem_submit *submit = to_msm_submit(job); struct msm_fence_context *fctx = submit->ring->fctx; struct msm_gpu *gpu = submit->gpu; - struct msm_drm_private *priv = gpu->dev->dev_private; + struct drm_device *dev = gpu->dev; unsigned nr_cmds = submit->nr_cmds; int i; msm_fence_init(submit->hw_fence, fctx); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; @@ -32,7 +32,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) submit->bos_pinned = false; - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); /* TODO move submit path over to using a per-ring lock.. */ mutex_lock(&gpu->lock); diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index bc78fb77cc27..768a8dae83c5 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -375,6 +375,13 @@ struct drm_device { * Root directory for debugfs files. */ struct dentry *debugfs_root; + + /** + * @gem_lru_mutex: + * + * Lock protecting movement of GEM objects between LRUs. + */ + struct mutex gem_lru_mutex; }; void drm_dev_set_dma_dev(struct drm_device *dev, struct device *dma_dev); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 86f5846154f7..8a704f6a65c1 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -245,17 +245,11 @@ struct drm_gem_object_funcs { * for lockless &shrinker.count_objects, and provides * &drm_gem_lru_scan for driver's &shrinker.scan_objects * implementation. + * + * Any access to this kind of object must be done with + * drm_device::gem_lru_mutex held. */ struct drm_gem_lru { - /** - * @lock: - * - * Lock protecting movement of GEM objects between LRUs. All - * LRUs that the object can move between should be protected - * by the same lock. - */ - struct mutex *lock; - /** * @count: * @@ -453,6 +447,9 @@ struct drm_gem_object { * @lru: * * The current LRU list that the GEM object is on. + * + * Access to this field must be done with drm_device::gem_lru_mutex + * held. */ struct drm_gem_lru *lru; }; @@ -610,12 +607,13 @@ void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset); -void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock); +void drm_gem_lru_init(struct drm_gem_lru *lru); void drm_gem_lru_remove(struct drm_gem_object *obj); void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj); unsigned long -drm_gem_lru_scan(struct drm_gem_lru *lru, +drm_gem_lru_scan(struct drm_device *dev, + struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket), -- cgit v1.2.3 From d1e280334b7f0a1df441e08bd1f6a1bcc36b3bbb Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Mon, 18 May 2026 07:31:21 +0200 Subject: usb: core: Fix SuperSpeed root hub wMaxPacketSize There is no good reason to have wBytesPerInterval < wMaxPacketSize - either one is too low or the other too high, and we may want to warn about such descriptors. Start with cleaning up our own root hubs. USB 3.2 section 10.15.1 sets wMaxPacketSize and wBytesPerInterval of SuperSpeed hub status endpoints at 2 bytes, so reduce wMaxPacketSize from its former value of 4, which was derived from USB 2.0 spec and the kernel's USB_MAXCHILDREN limit. They don't apply because USB 3.2 10.15.2.1 specifies SuperSpeed hubs to have up to 15 ports. Suggested-by: Mathias Nyman Signed-off-by: Michal Pecio Link: https://patch.msgid.link/20260518073121.7bc1da0f.michal.pecio@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 89221f1ce769..b181b43a35dc 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -328,9 +328,7 @@ static const u8 ss_rh_config_descriptor[] = { USB_DT_ENDPOINT, /* __u8 ep_bDescriptorType; Endpoint */ 0x81, /* __u8 ep_bEndpointAddress; IN Endpoint 1 */ 0x03, /* __u8 ep_bmAttributes; Interrupt */ - /* __le16 ep_wMaxPacketSize; 1 + (MAX_ROOT_PORTS / 8) - * see hub.c:hub_configure() for details. */ - (USB_MAXCHILDREN + 1 + 7) / 8, 0x00, + 0x02, 0x00, /* __le16 ep_wMaxPacketSize; 2 bytes per USB3 10.15.1 */ 0x0c, /* __u8 ep_bInterval; (256ms -- usb 2.0 spec) */ /* one SuperSpeed endpoint companion descriptor */ -- cgit v1.2.3 From 727d045d064b7c9a24db3bce9c0485a382cb768b Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Mon, 18 May 2026 07:32:07 +0200 Subject: usb: core: Fix up Interrupt IN endpoints with bogus wBytesPerInterval Tao Xue found that some common devices violate USB 3.x section 9.6.7 by reporting wBytesPerInterval lower than the size of packets they actually send. I confirmed that AX88179 may set it to 0 and RTL8153 CDC configuration sets it to 8 but sends both 8 and 16 byte packets: S Ii:11:007:3 -115:128 16 < C Ii:11:007:3 0:128 8 = a1000000 01000000 S Ii:11:007:3 -115:128 16 < C Ii:11:007:3 0:128 16 = a12a0000 01000800 00000000 00000000 Most xHCI host controllers neglect interrupt bandwidth reservations and let such devices exceed theirs, some fail the URB with EOVERFLOW. Assume that wBytesPerInterval lower than wMaxPacketSize is bogus and increase it to the worst case maximum on interrupt IN endpoints. This solves xHCI problems and appears to have no other effect. Interrupt transfers are not limited to one interval and drivers submit URBs of class defined size without looking at wBytesPerInterval. Any multi- interval transfer is considered terminated by a packet shorter than wMaxPacketSize regardless of wBytesPerInterval - see USB3 8.10.3. Stay in spec on OUT endpoints and isochronous. No buggy devices are known and we don't want to risk sending more data than the device is prepared to handle or confusing isoc drivers regarding altsetting capacities guaranteed by the device itself. And don't complain when wMaxPacketSize <= wBytesPerInterval < wMaxPacketSize * (bMaxBurst+1) because enabling this seems to be the exact goal of the spec. Reported-and-tested-by: Tao Xue Closes: https://lore.kernel.org/linux-usb/20260402021400.28853-1-xuetao09@huawei.com/ Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Link: https://patch.msgid.link/20260518073207.5b7d26e7.michal.pecio@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 417140b012bb..d9171bf7bc88 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -191,7 +191,14 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, (desc->bMaxBurst + 1); else max_tx = 999999; - if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) { + /* + * wBytesPerInterval > max_tx is bogus, but USB3 spec doesn't forbid the opposite. + * Experience shows that wBytesPerInterval < wMaxPacketSize on common interrupt IN + * endpoints is usually bogus too, and recent HCs enforce interrupt BW limits. + */ + if (le16_to_cpu(desc->wBytesPerInterval) > max_tx || + (le16_to_cpu(desc->wBytesPerInterval) < usb_endpoint_maxp(&ep->desc) && + usb_endpoint_is_int_in(&ep->desc))) { dev_notice(ddev, "%s endpoint with wBytesPerInterval of %d in " "config %d interface %d altsetting %d ep %d: " "setting to %d\n", -- cgit v1.2.3 From af8c5aa7a9c6f503d81f103d7ab4f8d759521de3 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Mon, 18 May 2026 07:32:58 +0200 Subject: usb: core: Clean up SuperSpeed/eUSB2 descriptor validation logging Core usually prints endpoint addresses with 0x%X format. Change this code to use it too, instead of just %d. Particularly for IN, 0x83 seems more readable than 131. While at that, fix checkpatch warnings about multi-line quoted strings, as well as missing or doubled whitespace in those strings. Signed-off-by: Michal Pecio Link: https://patch.msgid.link/20260518073258.6532bdd5.michal.pecio@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index d9171bf7bc88..45e20c6d76c0 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -56,8 +56,7 @@ static void usb_parse_ssp_isoc_endpoint_companion(struct device *ddev, desc = (struct usb_ssp_isoc_ep_comp_descriptor *) buffer; if (size < USB_DT_SSP_ISOC_EP_COMP_SIZE || desc->bDescriptorType != USB_DT_SSP_ISOC_ENDPOINT_COMP) { - dev_notice(ddev, "Invalid SuperSpeedPlus isoc endpoint companion" - "for config %d interface %d altsetting %d ep %d.\n", + dev_notice(ddev, "Invalid SuperSpeedPlus isoc endpoint companion for config %d interface %d altsetting %d ep 0x%X.\n", cfgno, inum, asnum, ep->desc.bEndpointAddress); return; } @@ -91,7 +90,7 @@ static void usb_parse_eusb2_isoc_endpoint_companion(struct device *ddev, size -= h->bLength; } - dev_notice(ddev, "No eUSB2 isoc ep %d companion for config %d interface %d altsetting %d\n", + dev_notice(ddev, "No eUSB2 isoc ep 0x%X companion for config %d interface %d altsetting %d\n", ep->desc.bEndpointAddress, cfgno, inum, asnum); } @@ -115,9 +114,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, } if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP) { - dev_notice(ddev, "No SuperSpeed endpoint companion for config %d " - " interface %d altsetting %d ep %d: " - "using minimum values\n", + dev_notice(ddev, "No SuperSpeed endpoint companion for config %d interface %d altsetting %d ep 0x%X: using minimum values\n", cfgno, inum, asnum, ep->desc.bEndpointAddress); /* Fill in some default values. @@ -141,42 +138,32 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, /* Check the various values */ if (usb_endpoint_xfer_control(&ep->desc) && desc->bMaxBurst != 0) { - dev_notice(ddev, "Control endpoint with bMaxBurst = %d in " - "config %d interface %d altsetting %d ep %d: " - "setting to zero\n", desc->bMaxBurst, - cfgno, inum, asnum, ep->desc.bEndpointAddress); + dev_notice(ddev, "Control endpoint with bMaxBurst = %d in config %d interface %d altsetting %d ep 0x%X: setting to zero\n", + desc->bMaxBurst, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bMaxBurst = 0; } else if (desc->bMaxBurst > 15) { - dev_notice(ddev, "Endpoint with bMaxBurst = %d in " - "config %d interface %d altsetting %d ep %d: " - "setting to 15\n", desc->bMaxBurst, - cfgno, inum, asnum, ep->desc.bEndpointAddress); + dev_notice(ddev, "Endpoint with bMaxBurst = %d in config %d interface %d altsetting %d ep 0x%X: setting to 15\n", + desc->bMaxBurst, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bMaxBurst = 15; } if ((usb_endpoint_xfer_control(&ep->desc) || usb_endpoint_xfer_int(&ep->desc)) && desc->bmAttributes != 0) { - dev_notice(ddev, "%s endpoint with bmAttributes = %d in " - "config %d interface %d altsetting %d ep %d: " - "setting to zero\n", + dev_notice(ddev, "%s endpoint with bmAttributes = %d in config %d interface %d altsetting %d ep 0x%X: setting to zero\n", usb_endpoint_xfer_control(&ep->desc) ? "Control" : "Bulk", desc->bmAttributes, cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 0; } else if (usb_endpoint_xfer_bulk(&ep->desc) && desc->bmAttributes > 16) { - dev_notice(ddev, "Bulk endpoint with more than 65536 streams in " - "config %d interface %d altsetting %d ep %d: " - "setting to max\n", + dev_notice(ddev, "Bulk endpoint with more than 65536 streams in config %d interface %d altsetting %d ep 0x%X: setting to max\n", cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 16; } else if (usb_endpoint_xfer_isoc(&ep->desc) && !USB_SS_SSP_ISOC_COMP(desc->bmAttributes) && USB_SS_MULT(desc->bmAttributes) > 3) { - dev_notice(ddev, "Isoc endpoint has Mult of %d in " - "config %d interface %d altsetting %d ep %d: " - "setting to 3\n", + dev_notice(ddev, "Isoc endpoint has Mult of %d in config %d interface %d altsetting %d ep 0x%X: setting to 3\n", USB_SS_MULT(desc->bmAttributes), cfgno, inum, asnum, ep->desc.bEndpointAddress); ep->ss_ep_comp.bmAttributes = 2; @@ -199,9 +186,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, if (le16_to_cpu(desc->wBytesPerInterval) > max_tx || (le16_to_cpu(desc->wBytesPerInterval) < usb_endpoint_maxp(&ep->desc) && usb_endpoint_is_int_in(&ep->desc))) { - dev_notice(ddev, "%s endpoint with wBytesPerInterval of %d in " - "config %d interface %d altsetting %d ep %d: " - "setting to %d\n", + dev_notice(ddev, "%s endpoint with wBytesPerInterval of %d in config %d interface %d altsetting %d ep 0x%X: setting to %d\n", usb_endpoint_xfer_isoc(&ep->desc) ? "Isoc" : "Int", le16_to_cpu(desc->wBytesPerInterval), cfgno, inum, asnum, ep->desc.bEndpointAddress, -- cgit v1.2.3 From 6eb0168d091404bee11d1f0712515d77b8d01579 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 11 May 2026 19:28:37 +0200 Subject: drm/xe/memirq: Update interrupt handler logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To workaround some corner case hardware limitations, new programming note for the memory based interrupt handler suggests to assume that some status bytes, like GT_MI_USER_INTERRUPT and GUC_INTR_GUC2HOST, are always set. Update our interrupt handler to follow the new rules. Bspec: 53672 Fixes: a6581ebe7685 ("drm/xe/vf: Introduce Memory Based Interrupts Handler") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Matthew Brost Reviewed-by: Michał Winiarski Link: https://patch.msgid.link/20260511172838.2299-2-michal.wajdeczko@intel.com (cherry picked from commit 284f4cae4579eed9dd4406f18a6c1becc69f8931) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_memirq.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 811e07136efb..579af47edc61 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -427,13 +427,25 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, return __memirq_received(memirq, vector, offset, name, true); } +static void memirq_assume_received(struct xe_memirq *memirq, const char *source, + u16 offset, const char *status) +{ + memirq_debug(memirq, "ASSUME %s %s(%u)\n", source, status, offset); +} + static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status, struct xe_hw_engine *hwe) { memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) - xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); + /* + * The programming note says to assume that GT_MI_USER_INTERRUPT is always + * set. Check and clear related status byte just for a debug. + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) && + !memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) + memirq_assume_received(memirq, hwe->name, ilog2(GT_MI_USER_INTERRUPT), "USER"); + xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); } static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status, @@ -443,8 +455,14 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) - xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); + /* + * The programming note says to assume that GUC_INTR_GUC2HOST is always + * set. Check and clear related status byte just for a debug. + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) && + !memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) + memirq_assume_received(memirq, name, ilog2(GUC_INTR_GUC2HOST), "GUC2HOST"); + xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); /* * This is a software interrupt that must be cleared after it's consumed -- cgit v1.2.3 From d3ded53fab90996e7d94a39049e11962dd066725 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 11 May 2026 15:41:34 +0000 Subject: drm/xe/gsc: Fix double-free of managed BO in error path The error path in xe_gsc_init_post_hwconfig() explicitly frees a BO allocated with xe_managed_bo_create_pin_map() via xe_bo_unpin_map_no_vm(). Since the managed BO already has a devm cleanup action registered, this causes a double-free when devm unwinds during probe failure. Remove the explicit free and let devm handle it, consistent with all other xe_managed_bo_create_pin_map() callers. Fixes: 2e5d47fe7839 ("drm/xe/uc: Use managed bo for HuC and GSC objects") Reviewed-by: Daniele Ceraolo Spurio Assisted-by: Claude:claude-opus-4.6 Link: https://patch.msgid.link/20260511154134.223696-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 71d61e3e299a17139e47f980a4d6f425b2c59bf7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gsc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0d13e357fb43..aab59dc647fb 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -482,8 +482,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) EXEC_QUEUE_FLAG_PERMANENT, 0); if (IS_ERR(q)) { xe_gt_err(gt, "Failed to create queue for GSC submission\n"); - err = PTR_ERR(q); - goto out_bo; + return PTR_ERR(q); } wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); @@ -506,8 +505,6 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) out_q: xe_exec_queue_put(q); -out_bo: - xe_bo_unpin_map_no_vm(bo); return err; } -- cgit v1.2.3 From 9bb2f1d7e6e58b8e434ddc2048c661bf87ccdf2a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 14 May 2026 17:57:26 +0200 Subject: drm/xe/vf: Fix signature of print functions We have plugged-in existing VF print functions into our GT debugfs show helper as-is, but we missed that the helper expects functions to return int, while they were defined as void. This can lead to errors being reported when CFI is enabled. Fixes: 63d8cb8fe3dd ("drm/xe/vf: Expose SR-IOV VF attributes to GT debugfs") Signed-off-by: Michal Wajdeczko Cc: Mohanram Meenakshisundaram Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260514155726.7165-1-michal.wajdeczko@intel.com (cherry picked from commit 314e31c9a8a1c421ee4f7f755b9348aefbbca090) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 24 ++++++++++++++++++------ drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 6 +++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 8989c8e1be95..0cd9d77f3351 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1137,13 +1137,15 @@ void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) } /** - * xe_gt_sriov_vf_print_config - Print VF self config. + * xe_gt_sriov_vf_print_config() - Print VF self config. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; struct xe_device *xe = gt_to_xe(gt); @@ -1170,16 +1172,20 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs); + + return 0; } /** - * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF. + * xe_gt_sriov_vf_print_runtime() - Print VF's runtime regs received from PF. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) { struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; unsigned int size = gt->sriov.vf.runtime.num_regs; @@ -1188,16 +1194,20 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) for (; size--; vf_regs++) drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value); + + return 0; } /** - * xe_gt_sriov_vf_print_version - Print VF ABI versions. + * xe_gt_sriov_vf_print_version() - Print VF ABI versions. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; @@ -1227,6 +1237,8 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR); drm_printf(p, "\thandshake:\t%u.%u\n", pf_version->major, pf_version->minor); + + return 0; } static bool vf_post_migration_shutdown(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index a6f7127521a5..79878f21b1da 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -35,9 +35,9 @@ bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt); u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); -void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); int xe_vf_migration_fixups_complete_count(struct xe_gt *gt); -- cgit v1.2.3 From 96bf49b526e2d03a2b7f6e861925a08f46ed0d28 Mon Sep 17 00:00:00 2001 From: Mohanram Meenakshisundaram Date: Thu, 14 May 2026 23:19:18 +0530 Subject: drm/xe/pf: Fix CFI failure in debugfs access Reading debugfs file (/sys/kernel/debug/dri/0/gt*/pf/adverse_events) with CFI (Control Flow Integrity) enabled, the kernel panics at xe_gt_debugfs_simple_show+0x82/0xc0. xe_gt_debugfs_simple_show() declare a function pointer expecting int return type, but xe_gt_sriov_pf_monitor_print_events() is void return type, leading to CFI failure and kernel panic. [507620.973657] CFI failure at xe_gt_debugfs_simple_show+0x82/0xc0 [xe] (target: xe_gt_sriov_pf_monitor_print_events+0x0/0x130 [xe]; expected type: 0xd72c7139) Fix xe_gt_sriov_pf_monitor_print_events() function by updating to return an int type. Fixes: 1c99d3d3edab ("drm/xe/pf: Expose PF monitor details via debugfs") Signed-off-by: Mohanram Meenakshisundaram Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260514174918.1556357-2-mohanram.meenakshisundaram@intel.com (cherry picked from commit ff1d386a8359746d9699ac30336e3b0684c68958) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c | 6 +++++- drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c index 7d532bded02a..a85ba4435378 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c @@ -114,8 +114,10 @@ int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 * VFs with no events are not printed. * * This function can only be called on PF. + * + * Return: always 0 */ -void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) { unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt); const struct xe_gt_sriov_monitor *data; @@ -144,4 +146,6 @@ void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p #undef __format #undef __value } + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h index 7ca9351a271b..0b8f088d3a16 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h @@ -13,7 +13,7 @@ struct drm_printer; struct xe_gt; void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid); -void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); -- cgit v1.2.3 From 16be14eec5fdaea9477368856a85173246c41454 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 14 May 2026 18:44:44 -0300 Subject: drm/xe: Define CACHE_MODE_1 as MCR register CACHE_MODE_1 is a MCR register for all platforms that currently use it in the Xe driver. Use XE_REG_MCR() when defining it. Fixes: 8cd7e9759766 ("drm/xe: Add missing DG2 lrc workarounds") Fixes: ff063430caa8 ("drm/xe/mtl: Add some initial MTL workarounds") Bspec: 66534, 67788 Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260514-rtp-mcr-check-v3-1-30dd47855fee@intel.com Signed-off-by: Gustavo Sousa (cherry picked from commit 8f765f0c054e0fb39980a76b4c899b027395929d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 9c88ca3ce768..4399518c270e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -152,7 +152,7 @@ #define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c) -#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) +#define CACHE_MODE_1 XE_REG_MCR(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) #define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED) -- cgit v1.2.3 From a4660bd949733fd6ea621fdb50fabac2608155e9 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 14 May 2026 18:44:45 -0300 Subject: drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN1 The register COMMON_SLICE_CHICKEN1 is a MCR register on Xe2. Let's make sure to define a MCR version of it and use it for the relevant IP versions. Use XEHP_ as prefix for the register name, since it is MCR as of Xe_HP. Fixes: a5d221924e13 ("drm/xe/xe2_hpg: Add set of workarounds") Fixes: 9f18b55b6d3f ("drm/xe/xe2: Add workaround 18033852989") Bspec: 66534, 71185 Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260514-rtp-mcr-check-v3-2-30dd47855fee@intel.com Signed-off-by: Gustavo Sousa (cherry picked from commit a672725fdbfc3ea430130039d677c7dc98d59df8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4399518c270e..ed9aba5bf75e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -156,6 +156,7 @@ #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) #define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED) +#define XEHP_COMMON_SLICE_CHICKEN1 XE_REG_MCR(0x7010, XE_REG_OPTION_MASKED) #define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14) #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4b1cbced06be..100569a62283 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -651,7 +651,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("18033852989"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, { XE_RTP_NAME("15016589081"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), -- cgit v1.2.3 From 6df5678b6a94ac80e31e847074c4b30c21025b1f Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Thu, 14 May 2026 18:44:46 -0300 Subject: drm/xe: Define and use MCR version of COMMON_SLICE_CHICKEN4 The register COMMON_SLICE_CHICKEN4 is a MCR register on both Xe2 and Xe3. Let's make sure to define a MCR version of it and use it for the relevant IP versions. Use XEHP_ as prefix for the register name, since it is MCR as of Xe_HP. v2: - Also change for one entry in lrc_tunnings, which was caught by manual testing and add corresponging Fixes tag in commit message. (Gustavo) Fixes: 8d6f16f1f082 ("drm/xe: Extend Wa_22021007897 to Xe3 platforms") Fixes: e5c13e2c505b ("drm/xe/xe2hpg: Add Wa_22021007897") Fixes: 8ccf5f6b2295 ("drm/xe/tuning: Apply windower hardware filtering setting on Xe3 and Xe3p") Bspec: 66534, 71185, 74417 Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260514-rtp-mcr-check-v3-3-30dd47855fee@intel.com Signed-off-by: Gustavo Sousa (cherry picked from commit 75f65f1a4c06da1d87f28570a9d4cdad28f13360) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_tuning.c | 2 +- drivers/gpu/drm/xe/xe_wa.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index ed9aba5bf75e..353fe0bd49bf 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -179,6 +179,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define XEHP_COMMON_SLICE_CHICKEN4 XE_REG_MCR(0x7300, XE_REG_OPTION_MASKED) #define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define HW_FILTERING REG_BIT(5) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 0b78ec2bc6a4..fcb6698abc6e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -129,7 +129,7 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { static const struct xe_rtp_entry_sr lrc_tunings[] = { { XE_RTP_NAME("Tuning: Windower HW Filtering"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, HW_FILTERING)) }, /* DG2 */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 100569a62283..33df43d0bede 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -754,7 +754,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("22021007897"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, /* Xe3_LPG */ @@ -770,7 +770,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("22021007897"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, { XE_RTP_NAME("14024681466"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), -- cgit v1.2.3 From 2a2451a34afdf563b3102d36a4b6cf335cf813e2 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 24 Apr 2026 10:50:35 +0100 Subject: wifi: ath11k: fix peer resolution on rx path when peer_id=0 It has been observed that on certain chipsets a peer can be assigned peer_id=0. For reception of non-aggregated MPDUs this is fine as ath11k_dp_rx_h_find_peer() has a fallback case where it locates the peer based upon the source MAC address. On an aggregated link, the mpdu_start header is only populated by hardware on the first sub-MSDU. This causes the peer resolution to be skipped for the subsequent MSDUs and the encryption type of these frames to be set to an incorrect value, resulting in these MSDUs being dropped by ieee80211. ath11k_pci 0000:03:00.0: data rx skb 000000002f4b704d len 1534 peer xx:xx:xx:xx:xx:xx 0 ucast sn 3063 he160 rate_idx 9 vht_nss 2 freq 5240 band 1 flag 0x40d1a fcs-err 0 mic-err 0 amsdu-more 0 peer_id 0 first_msdu 1 last_msdu 0 ath11k_pci 0000:03:00.0: data rx skb 0000000038acd580 len 1534 peer (null) 0 ucast sn 3063 he160 rate_idx 9 vht_nss 2 freq 5240 band 1 flag 0x40d00 fcs-err 0 mic-err 0 amsdu-more 0 peer_id 0 first_msdu 0 last_msdu 1 Remove the null peer_id checks in ath11k_dp_rx_h_find_peer() and ath11k_hal_rx_parse_mon_status_tlv(), allowing peers with an assigned ID of 0 to be resolved. Tested-on: QCA2066 hw2.1 PCI WLAN.HSP.1.1-03926.13-QCAHSPSWPL_V2_SILICONZ_CE-2.52297.9 Fixes: 2167fa606c0f ("ath11k: Add support for RX decapsulation offload") Reviewed-by: Baochen Qiang Signed-off-by: Matthew Leach Reviewed-by: P Praneesh Link: https://patch.msgid.link/20260424-ath11k-null-peerid-workaround-v4-1-252b224d3cf6@collabora.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp_rx.c | 3 +-- drivers/net/wireless/ath/ath11k/hal_rx.c | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index fe79109adc70..72d5d933656d 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2214,8 +2214,7 @@ ath11k_dp_rx_h_find_peer(struct ath11k_base *ab, struct sk_buff *msdu) lockdep_assert_held(&ab->base_lock); - if (rxcb->peer_id) - peer = ath11k_peer_find_by_id(ab, rxcb->peer_id); + peer = ath11k_peer_find_by_id(ab, rxcb->peer_id); if (peer) return peer; diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c index 753bd93f0212..51e0840bc0d1 100644 --- a/drivers/net/wireless/ath/ath11k/hal_rx.c +++ b/drivers/net/wireless/ath/ath11k/hal_rx.c @@ -1467,11 +1467,8 @@ ath11k_hal_rx_parse_mon_status_tlv(struct ath11k_base *ab, case HAL_RX_MPDU_START: { struct hal_rx_mpdu_info *mpdu_info = (struct hal_rx_mpdu_info *)tlv_data; - u16 peer_id; - peer_id = ath11k_hal_rx_mpduinfo_get_peerid(ab, mpdu_info); - if (peer_id) - ppdu_info->peer_id = peer_id; + ppdu_info->peer_id = ath11k_hal_rx_mpduinfo_get_peerid(ab, mpdu_info); break; } case HAL_RXPCU_PPDU_END_INFO: { -- cgit v1.2.3 From 72b8654e3b83548f64524add2e9145e9b6c8a852 Mon Sep 17 00:00:00 2001 From: Willmar Knikker Date: Tue, 5 May 2026 17:17:43 +0000 Subject: wifi: ath11k: fix use after free in ath11k_dp_rx_msdu_coalesce() In ath11k_dp_rx_msdu_coalesce() the loop uses ->is_continuation after the dev_kfree_skb_any(). This can cause a use after free kfence. Use flag for caching is_continuation for use after the dev_kfree_skb_any(). Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Willmar Knikker Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260505171709.547274-1-willmar@met-dubbel-l.nl Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp_rx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 72d5d933656d..2a413e3a07a7 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -1761,6 +1761,7 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar, int buf_first_hdr_len, buf_first_len; struct hal_rx_desc *ldesc; int space_extra, rem_len, buf_len; + bool is_continuation; u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz; /* As the msdu is spread across multiple rx buffers, @@ -1810,7 +1811,8 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar, rem_len = msdu_len - buf_first_len; while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) { rxcb = ATH11K_SKB_RXCB(skb); - if (rxcb->is_continuation) + is_continuation = rxcb->is_continuation; + if (is_continuation) buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz; else buf_len = rem_len; @@ -1828,7 +1830,7 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar, dev_kfree_skb_any(skb); rem_len -= buf_len; - if (!rxcb->is_continuation) + if (!is_continuation) break; } -- cgit v1.2.3 From f51e4b3b5574ad8cb5b16b11f8a1452147ece87a Mon Sep 17 00:00:00 2001 From: Kyle Farnung Date: Wed, 13 May 2026 21:52:12 -0700 Subject: wifi: ath11k: clear shared SRNG pointer state on restart LMAC rings reuse the shared rdp/wrp pointer buffers without going through the normal SRNG hw-init path that zeros non-LMAC ring pointers. After restart, ath11k_hal_srng_clear() can therefore hand stale hp/tp state from the previous firmware instance back to the new one. Clear the shared pointer buffers while keeping the allocations in place so restart still avoids reallocating SRNG DMA memory, but starts with fresh ring-pointer state. Fixes: 32be3ca4cf78b ("wifi: ath11k: HAL SRNG: don't deinitialize and re-initialize again") Cc: stable@vger.kernel.org Closes: https://lore.kernel.org/all/CAOPSVF04q6uvVdq8GTRLHBrVMdpt9=o9wVcFMc6f-yhmSBcZqQ@mail.gmail.com/ Signed-off-by: Kyle Farnung Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260513-kfarnung-ath11k-srng-clear-pointer-state-v1-1-bc700dd8b333@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/hal.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c index e821e5a62c1c..98bd9e3f0aae 100644 --- a/drivers/net/wireless/ath/ath11k/hal.c +++ b/drivers/net/wireless/ath/ath11k/hal.c @@ -1387,14 +1387,22 @@ EXPORT_SYMBOL(ath11k_hal_srng_deinit); void ath11k_hal_srng_clear(struct ath11k_base *ab) { - /* No need to memset rdp and wrp memory since each individual - * segment would get cleared in ath11k_hal_srng_src_hw_init() - * and ath11k_hal_srng_dst_hw_init(). + /* + * Preserve the shared pointer buffers, but clear the previous + * firmware instance's hp/tp state before handing them back to FW. + * LMAC rings reuse this shared memory without going through the + * normal SRNG hw-init path that zeros non-LMAC ring pointers. */ memset(ab->hal.srng_list, 0, sizeof(ab->hal.srng_list)); memset(ab->hal.shadow_reg_addr, 0, sizeof(ab->hal.shadow_reg_addr)); + if (ab->hal.rdp.vaddr) + memset(ab->hal.rdp.vaddr, 0, + sizeof(*ab->hal.rdp.vaddr) * HAL_SRNG_RING_ID_MAX); + if (ab->hal.wrp.vaddr) + memset(ab->hal.wrp.vaddr, 0, + sizeof(*ab->hal.wrp.vaddr) * HAL_SRNG_NUM_LMAC_RINGS); ab->hal.avail_blk_resource = 0; ab->hal.current_blk_index = 0; ab->hal.num_shadow_reg_configured = 0; -- cgit v1.2.3 From 60fb2cf51e77bb1c0261160b4be44209d68956b1 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 14 May 2026 11:32:51 +0800 Subject: wifi: ath12k: fix EHT TX MCS limitation due to wrong 20 MHz-only parsing When connecting to an AP configured for EHT 20 MHz with a full EHT MCS/NSS map (supporting MCS 0-13) Supported EHT-MCS and NSS Set EHT-MCS Map (BW <= 80MHz): 0x444444 .... .... .... .... .... 0100 = Rx Max Nss That Supports EHT-MCS 0-9: 4 .... .... .... .... 0100 .... = Tx Max Nss That Supports EHT-MCS 0-9: 4 .... .... .... 0100 .... .... = Rx Max Nss That Supports EHT-MCS 10-11: 4 .... .... 0100 .... .... .... = Tx Max Nss That Supports EHT-MCS 10-11: 4 .... 0100 .... .... .... .... = Rx Max Nss That Supports EHT-MCS 12-13: 4 0100 .... .... .... .... .... = Tx Max Nss That Supports EHT-MCS 12-13: 4 TX throughput is observed to be significantly lower than expected. Investigation shows that TX rates are limited to EHT MCS 11, even though the AP advertises support for EHT MCS 12/13. The root cause is an incorrect parsing of the Supported EHT-MCS and NSS Set element in ath12k_peer_assoc_h_eht(). IEEE Std 802.11be-2024 Figure 9-1074as describes the format for 20 MHz-Only Non-AP STAs. IEEE Std 802.11be-2024 Figure 9-1074at describes the format for all other AP and non-AP STAs. Currently the first format is parsed when the peer advertises no wider HE channel width support, without considering whether it is an AP or a non-AP STA. This is incorrect: the peer AP's capabilities must be parsed using Figure 9-1074at even when it operates on 20 MHz only. Parsing it as Figure 9-1074as causes rx_tx_mcs13_max_nss to be interpreted as zero, which is then passed to firmware, leading firmware to assume the peer does not support MCS 13 and to limit TX rates at MCS 11. Fix this by parsing the Figure 9-1074as format only when the peer is a 20 MHz-Only non-AP STA, i.e. when the local interface operates as AP or mesh point. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 6c95151e2e77 ("wifi: ath12k: Add EHT MCS/NSS rates to Peer Assoc") Signed-off-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260514-ath12k-fix-20mhz-only-mcs-map-v1-1-a38d4a9b21a2@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index df2334f3bad6..2cff9485c95a 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -3446,7 +3446,9 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, arg->peer_eht_mcs_count++; fallthrough; default: - if (!(link_sta->he_cap.he_cap_elem.phy_cap_info[0] & + if ((vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_MESH_POINT) && + !(link_sta->he_cap.he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) { bw_20 = &eht_cap->eht_mcs_nss_supp.only_20mhz; @@ -3475,7 +3477,9 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, arg->punct_bitmap = ~arvif->punct_bitmap; arg->eht_disable_mcs15 = link_conf->eht_disable_mcs15; - if (!(link_sta->he_cap.he_cap_elem.phy_cap_info[0] & + if ((vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_MESH_POINT) && + !(link_sta->he_cap.he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) { if (bw_20->rx_tx_mcs13_max_nss) max_nss = max(max_nss, u8_get_bits(bw_20->rx_tx_mcs13_max_nss, -- cgit v1.2.3 From e9f5e8da29762df1111a58ae0b4a83091595d834 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Wed, 29 Apr 2026 11:58:59 +0200 Subject: drm/mediatek: mtk_hdmi_ddc_v2: Fix non-static global variable The struct 'mtk_hdmi_ddc_v2_driver' is not used outside of the mtk_hdmi_ddc_v2.c file, so make it static to silence sparse warning: ``` drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c:392:24: sparse: warning: symbol 'mtk_hdmi_ddc_v2_driver' was not declared. Should it be static? ``` Fixes: 8d0f79886273 ("drm/mediatek: Introduce HDMI/DDC v2 for MT8195/MT8188") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604132044.fcYjEcU8-lkp@intel.com/ Signed-off-by: Louis-Alexis Eyraud Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20260429-mediatek-drm-fix-sparse-warnings-v1-1-d95c4d118b83@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c index d937219fdb7e..31e81a6de6d8 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c @@ -389,7 +389,7 @@ static const struct of_device_id mtk_hdmi_ddc_v2_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_v2_match); -struct platform_driver mtk_hdmi_ddc_v2_driver = { +static struct platform_driver mtk_hdmi_ddc_v2_driver = { .probe = mtk_hdmi_ddc_v2_probe, .driver = { .name = "mediatek-hdmi-ddc-v2", -- cgit v1.2.3 From dc245d9a7f1b06f86271d4e524d6e5634c5ce312 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Wed, 29 Apr 2026 11:59:00 +0200 Subject: drm/mediatek: mtk_hdmi_v2: Fix non-static global variable The struct 'mtk_hdmi_v2_clk_names' is not used outside of the mtk_hdmi_v2.c file, so make it static to silence sparse warning: ``` drivers/gpu/drm/mediatek/mtk_hdmi_v2.c:53:12: sparse: warning: symbol 'mtk_hdmi_v2_clk_names' was not declared. Should it be static? ``` Fixes: 8d0f79886273 ("drm/mediatek: Introduce HDMI/DDC v2 for MT8195/MT8188") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202604132044.fcYjEcU8-lkp@intel.com/ Signed-off-by: Louis-Alexis Eyraud Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20260429-mediatek-drm-fix-sparse-warnings-v1-2-d95c4d118b83@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c b/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c index b5c738380dc2..a8eb6fd0908b 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c @@ -50,7 +50,7 @@ enum mtk_hdmi_v2_clk_id { MTK_HDMI_V2_CLK_COUNT, }; -const char *const mtk_hdmi_v2_clk_names[MTK_HDMI_V2_CLK_COUNT] = { +static const char *const mtk_hdmi_v2_clk_names[MTK_HDMI_V2_CLK_COUNT] = { [MTK_HDMI_V2_CLK_HDMI_APB_SEL] = "bus", [MTK_HDMI_V2_CLK_HDCP_SEL] = "hdcp", [MTK_HDMI_V2_CLK_HDCP_24M_SEL] = "hdcp24m", -- cgit v1.2.3 From 571f00a5fb725984049bd532ee8193cc34ff2994 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Wed, 29 Apr 2026 11:59:01 +0200 Subject: drm/mediatek: mtk_cec: Fix non-static global variable The struct 'mtk_cec_driver' is not used outside of the mtk_cec.c file, so make it static to silence sparse warning: ``` drivers/gpu/drm/mediatek/mtk_cec.c:243:24: sparse: warning: symbol 'mtk_cec_driver' was not declared. Should it be static? ``` Fixes: 1e914a89ab7e ("drm/mediatek: mtk_cec: Switch to register as module_platform_driver") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20260429-mediatek-drm-fix-sparse-warnings-v1-3-d95c4d118b83@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index c7be530ca041..b8ccd6e55bed 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -240,7 +240,7 @@ static const struct of_device_id mtk_cec_of_ids[] = { }; MODULE_DEVICE_TABLE(of, mtk_cec_of_ids); -struct platform_driver mtk_cec_driver = { +static struct platform_driver mtk_cec_driver = { .probe = mtk_cec_probe, .remove = mtk_cec_remove, .driver = { -- cgit v1.2.3 From 87ed4e845d5a90bba1a56c0a5c580a13982e8648 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Wed, 29 Apr 2026 11:59:02 +0200 Subject: drm/mediatek: mtk_hdmi_ddc: Fix non-static global variable The struct 'mtk_hdmi_ddc_driver' is not used outside of the mtk_hdmi_ddc.c file, so make it static to silence sparse warning: ``` drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c:331:24: sparse: warning: symbol 'mtk_hdmi_ddc_driver' was not declared. Should it be static? ``` Fixes: c241118b6216 ("drm/mediatek: mtk_hdmi_ddc: Switch to register as module_platform_driver") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20260429-mediatek-drm-fix-sparse-warnings-v1-4-d95c4d118b83@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c index 6358e1af69b4..2acbdb025d89 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c @@ -328,7 +328,7 @@ static const struct of_device_id mtk_hdmi_ddc_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_match); -struct platform_driver mtk_hdmi_ddc_driver = { +static struct platform_driver mtk_hdmi_ddc_driver = { .probe = mtk_hdmi_ddc_probe, .remove = mtk_hdmi_ddc_remove, .driver = { -- cgit v1.2.3 From cf18e36455603d65d4745de83e2d1743c54ada47 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 17 May 2026 17:30:10 -0400 Subject: io_uring: propagate array_index_nospec opcode into req->opcode Commit 1e988c3fe126 ("io_uring: prevent opcode speculation") added array_index_nospec() to io_init_req(), but applied it only to a local opcode variable. req->opcode is initialized from sqe->opcode before the bounds check and remains the raw value. Keep req->opcode as the canonical opcode in io_init_req(): reject out-of-range values architecturally, then write the array_index_nospec() result back to req->opcode before any table lookup. This keeps downstream users of req->opcode from observing the raw user byte on a mispredicted path. No functional change: array_index_nospec() is a no-op for opcodes in [0, IORING_OP_LAST), and out-of-range opcodes are still rejected at the bounds check above the assignment. Fixes: 1e988c3fe126 ("io_uring: prevent opcode speculation") Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260517213010.696135-1-michael.bommarito@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 036145ee466c..103b6c88f252 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1738,10 +1738,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_issue_def *def; unsigned int sqe_flags; int personality; - u8 opcode; req->ctx = ctx; - req->opcode = opcode = READ_ONCE(sqe->opcode); + req->opcode = READ_ONCE(sqe->opcode); /* same numerical values with corresponding REQ_F_*, safe to copy */ sqe_flags = READ_ONCE(sqe->flags); req->flags = (__force io_req_flags_t) sqe_flags; @@ -1751,13 +1750,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->cancel_seq_set = false; req->async_data = NULL; - if (unlikely(opcode >= IORING_OP_LAST)) { + if (unlikely(req->opcode >= IORING_OP_LAST)) { req->opcode = 0; return io_init_fail_req(req, -EINVAL); } - opcode = array_index_nospec(opcode, IORING_OP_LAST); + req->opcode = array_index_nospec(req->opcode, IORING_OP_LAST); - def = &io_issue_defs[opcode]; + def = &io_issue_defs[req->opcode]; if (def->is_128 && !(ctx->flags & IORING_SETUP_SQE128)) { /* * A 128b op on a non-128b SQ requires mixed SQE support as -- cgit v1.2.3 From f23bf992d65a42007c517b060ca35cebdea3525a Mon Sep 17 00:00:00 2001 From: Carl Lee Date: Sat, 16 May 2026 19:55:18 +0800 Subject: nfc: nxp-nci: i2c: use rising-edge IRQ on ACPI systems Some ACPI-based platforms report incorrect IRQ trigger types (e.g. IRQF_TRIGGER_HIGH), which can lead to interrupt storms. Use the historically working rising-edge trigger on ACPI systems to avoid this regression. Device Tree-based systems continue to use the firmware-provided trigger type. Fixes: 57be33f85e36 ("nfc: nxp-nci: remove interrupt trigger type") Signed-off-by: Carl Lee Tested-by: Bartosz Golaszewski Reviewed-by: Bartosz Golaszewski Reviewed-by: Mark Pearson Tested-by: Mark Pearson Tested-by: Luca Stefani Link: https://patch.msgid.link/20260516-nfc-nxp-nci-i2c-restore-irq-trigger-fallback-v3-1-37ba4b6e9086@amd.com Signed-off-by: David Heidelberg --- drivers/nfc/nxp-nci/i2c.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index b3d34433bd14..a6c08175d9dd 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -267,6 +268,7 @@ static int nxp_nci_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct nxp_nci_i2c_phy *phy; + unsigned long irqflags; int r; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { @@ -303,9 +305,26 @@ static int nxp_nci_i2c_probe(struct i2c_client *client) if (r < 0) return r; + /* + * ACPI platforms may report incorrect IRQ trigger types + * (e.g. level-high), which can lead to interrupt storms. + * + * Use the historically stable rising-edge trigger for ACPI devices. + * + * On non-ACPI systems (e.g. Device Tree), prefer the firmware- + * provided trigger type, falling back to rising-edge if not set. + */ + if (ACPI_COMPANION(dev)) { + irqflags = IRQF_TRIGGER_RISING; + } else { + irqflags = irq_get_trigger_type(client->irq); + if (!irqflags) + irqflags = IRQF_TRIGGER_RISING; + } + r = request_threaded_irq(client->irq, NULL, nxp_nci_i2c_irq_thread_fn, - IRQF_ONESHOT, + irqflags | IRQF_ONESHOT, NXP_NCI_I2C_DRIVER_NAME, phy); if (r < 0) nfc_err(&client->dev, "Unable to register IRQ handler\n"); -- cgit v1.2.3 From a7e8f3efd50a165ba0189f6dc57f7e51a7d149db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 May 2026 09:43:34 +0200 Subject: spi: qup: fix error pointer deref after DMA setup failure The driver falls back to PIO mode if DMA setup fails during probe. Make sure to the clear the DMA channel pointers on setup failure to avoid dereferencing an error pointer (or attempting to release a channel a second time) on later probe errors or driver unbind. This issue was flagged by Sashiko when reviewing a devres allocation conversion patch. Fixes: 612762e82ae6 ("spi: qup: Add DMA capabilities") Link: https://sashiko.dev/#/patchset/20260505072909.618363-1-johan%40kernel.org?part=4 Cc: stable@vger.kernel.org # 4.1 Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260512074334.914735-1-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-qup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index 45d9b4cb75e4..50bb7701b9d5 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -996,8 +996,11 @@ static int spi_qup_init_dma(struct spi_controller *host, resource_size_t base) err: dma_release_channel(host->dma_tx); + host->dma_tx = NULL; err_tx: dma_release_channel(host->dma_rx); + host->dma_rx = NULL; + return ret; } -- cgit v1.2.3 From fd3b95866d86844ae747fce9b3438d73ed5f1e7a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 12 May 2026 14:52:52 +0800 Subject: ASoC: fsl_sai: Eliminate possible interrupt storm during probe When the SAI peripheral is left in a running state by the bootloader, the driver can experience an interrupt storm during probe that prevents successful initialization. This occurs because the current code registers the IRQ handler before resetting the hardware to a known state. The issue manifests as: - Continuous interrupts firing immediately after devm_request_irq() - Driver probe failure or system hang - Error messages about unhandled interrupts This is particularly problematic on systems where U-Boot or other bootloaders enable SAI for boot-time audio feedback or diagnostics and don't properly disable it before handing control to Linux. Fix this by reordering the probe sequence: 1. Add fsl_sai_reset_hw() to clear TCSR/RCSR control registers, which disables the transmitter/receiver and all interrupt sources 2. Move devm_request_irq() to after hardware initialization This ensures the SAI is in a clean reset state before the interrupt handler can be invoked, preventing the storm while maintaining proper error handling and cleanup paths. Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20260512065252.75859-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index bd336d2e4cb3..e364552c1f47 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1370,6 +1370,31 @@ static int fsl_sai_check_version(struct device *dev) return 0; } +static int fsl_sai_reset_hw(struct device *dev) +{ + struct fsl_sai *sai = dev_get_drvdata(dev); + unsigned char ofs = sai->soc_data->reg_offset; + int ret; + + /* + * Clear TCSR/RCSR to reset SAI and disable all interrupts. + * Bootloader may leave SAI running causing interrupt storm. + */ + ret = regmap_write(sai->regmap, FSL_SAI_TCSR(ofs), 0); + if (ret) { + dev_err(dev, "Failed to clear TCSR: %d\n", ret); + return ret; + } + + ret = regmap_write(sai->regmap, FSL_SAI_RCSR(ofs), 0); + if (ret) { + dev_err(dev, "Failed to clear RCSR: %d\n", ret); + return ret; + } + + return 0; +} + /* * Calculate the offset between first two datalines, don't * different offset in one case. @@ -1575,13 +1600,6 @@ static int fsl_sai_probe(struct platform_device *pdev) if (irq < 0) return irq; - ret = devm_request_irq(dev, irq, fsl_sai_isr, IRQF_SHARED, - np->name, sai); - if (ret) { - dev_err(dev, "failed to claim irq %u\n", irq); - return ret; - } - memcpy(&sai->cpu_dai_drv, fsl_sai_dai_template, sizeof(*fsl_sai_dai_template) * ARRAY_SIZE(fsl_sai_dai_template)); @@ -1656,6 +1674,10 @@ static int fsl_sai_probe(struct platform_device *pdev) if (ret < 0) dev_warn(dev, "Error reading SAI version: %d\n", ret); + ret = fsl_sai_reset_hw(dev); + if (ret < 0) + dev_warn(dev, "Failed to reset hardware: %d\n", ret); + /* Select MCLK direction */ if (sai->mclk_direction_output && sai->soc_data->max_register >= FSL_SAI_MCTL) { @@ -1667,6 +1689,13 @@ static int fsl_sai_probe(struct platform_device *pdev) if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; + ret = devm_request_irq(dev, irq, fsl_sai_isr, IRQF_SHARED, + np->name, sai); + if (ret) { + dev_err(dev, "failed to claim irq %u\n", irq); + goto err_pm_get_sync; + } + if (of_device_is_compatible(np, "fsl,imx952-sai") && !of_property_read_string(np, "fsl,sai-amix-mode", &str)) { if (!strcmp(str, "bypass")) -- cgit v1.2.3 From 3d67fffb74267772d461c02c67f1eff893ad547d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 May 2026 09:47:33 +0200 Subject: spi: sprd: fix error pointer deref after DMA setup failure The driver falls back to PIO mode if DMA setup fails during probe. Make sure to check the dma.enabled flag before trying to release the DMA channels also on late probe errors to avoid dereferencing an error pointer (or attempting to release a channel a second time). This issue was flagged by Sashiko when reviewing a devres allocation conversion patch. Fixes: 386119bc7be9 ("spi: sprd: spi: sprd: Add DMA mode support") Link: https://sashiko.dev/#/patchset/20260505072909.618363-1-johan%40kernel.org?part=10 Cc: stable@vger.kernel.org # 5.1 Cc: Lanqing Liu Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260512074733.915029-1-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-sprd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c index fd3fd0ce122c..acebf9c2e795 100644 --- a/drivers/spi/spi-sprd.c +++ b/drivers/spi/spi-sprd.c @@ -991,7 +991,8 @@ err_rpm_put: disable_clk: clk_disable_unprepare(ss->clk); release_dma: - sprd_spi_dma_release(ss); + if (ss->dma.enable) + sprd_spi_dma_release(ss); free_controller: spi_controller_put(sctlr); -- cgit v1.2.3 From ea6ec3343e05f7937a53eb6d7617b3abdb4abc19 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 12 May 2026 09:48:09 +0200 Subject: spi: ti-qspi: fix use-after-free after DMA setup failure The driver falls back to PIO mode if DMA setup fails during probe. Make sure to clear the DMA channel pointer also if buffer allocation fails to avoid passing a pointer to the released channel to the DMA engine (or trying to free the channel a second time on late probe errors or driver unbind). This issue was flagged by Sashiko when reviewing a devres allocation conversion patch. Fixes: c687c46e9e45 ("spi: spi-ti-qspi: Use bounce buffer if read buffer is not DMA'ble") Link: https://sashiko.dev/#/patchset/20260505072909.618363-1-johan%40kernel.org?part=17 Cc: stable@vger.kernel.org # 4.12 Cc: Vignesh R Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260512074809.915084-1-johan@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-ti-qspi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 1fbd710d616f..e3b413b9828c 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -867,6 +867,7 @@ static int ti_qspi_probe(struct platform_device *pdev) dev_err(qspi->dev, "dma_alloc_coherent failed, using PIO mode\n"); dma_release_channel(qspi->rx_chan); + qspi->rx_chan = NULL; goto no_dma; } host->dma_rx = qspi->rx_chan; -- cgit v1.2.3 From 593889c401426004bd0ea0f6d4fcece728b03420 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 May 2026 19:54:41 +0200 Subject: srcu: Don't queue workqueue handlers to never-online CPUs While an srcu_struct structure is in the midst of switching from CPU-0 to all-CPUs state, it can attempt to invoke callbacks for CPUs that have never been online. Worse yet, it can attempt in invoke callbacks for CPUs that never will be online, even including imaginary CPUs not in cpu_possible_mask. This can cause hangs on s390, which is not set up to deal with workqueue handlers being scheduled on such CPUs. This commit therefore causes Tree SRCU to refrain from queueing workqueue handlers on CPUs that have not yet (and might never) come online. Because callbacks are not invoked on CPUs that have not been online, it is an error to invoke call_srcu(), synchronize_srcu(), or synchronize_srcu_expedited() on a CPU that is not yet fully online. However, it turns out to be less code to redirect the callbacks from too-early invocations of call_srcu() than to warn about such invocations. This commit therefore also redirects callbacks queued on not-yet-fully-online CPUs to the boot CPU. Reported-by: Vasily Gorbik Fixes: 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when non-preemptible") Signed-off-by: Paul E. McKenney Tested-by: Vasily Gorbik Tested-by: Samir Reviewed-by: Shrikanth Hegde Cc: Tejun Heo Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Boqun Feng --- kernel/rcu/srcutree.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 0d01cd8c4b4a..7c2f7cc131f7 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -897,11 +897,9 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp { int cpu; - for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { - if (!(mask & (1UL << (cpu - snp->grplo)))) - continue; - srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); - } + for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) + if ((mask & (1UL << (cpu - snp->grplo))) && rcu_cpu_beenfullyonline(cpu)) + srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); } /* @@ -1322,7 +1320,9 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, */ idx = __srcu_read_lock_nmisafe(ssp); ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state); - if (ss_state < SRCU_SIZE_WAIT_CALL) + // If !rcu_cpu_beenfullyonline(), interrupts are still disabled, + // so no migration is possible in either direction from this CPU. + if (ss_state < SRCU_SIZE_WAIT_CALL || !rcu_cpu_beenfullyonline(raw_smp_processor_id())) sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = raw_cpu_ptr(ssp->sda); -- cgit v1.2.3 From 8817005efbdfdf5d4e4814cb5dc52b53d12917d7 Mon Sep 17 00:00:00 2001 From: Qing Ming Date: Sat, 16 May 2026 15:08:49 +0800 Subject: cgroup/rstat: validate cpu before css_rstat_cpu() access css_rstat_updated() is exposed as a BPF kfunc and accepts a caller-provided cpu argument. The function uses cpu for per-cpu rstat lookups without checking whether it refers to a valid possible CPU. A BPF iter/cgroup program with CAP_BPF and CAP_PERFMON can pass an invalid cpu value. On an unfixed UBSCAN_BOUNDS test kernel, cpu == 0x7fffffff triggers: UBSAN: array-index-out-of-bounds in kernel/cgroup/rstat.c:31:9 index 2147483647 is out of range for type 'long unsigned int [64]' Call Trace: css_rstat_updated bpf_iter_run_prog cgroup_iter_seq_show bpf_seq_read Add cpu validation to the BPF-facing css_rstat_updated() kfunc and move the common implementation to __css_rstat_updated() for in-kernel callers. Fixes: a319185be9f5 ("cgroup: bpf: enable bpf programs to integrate with rstat") Signed-off-by: Qing Ming Signed-off-by: Tejun Heo --- block/blk-cgroup.c | 2 +- include/linux/cgroup.h | 1 + kernel/cgroup/rstat.c | 30 ++++++++++++++++++++---------- mm/memcontrol.c | 6 +++--- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 554c87bb4a86..bc63bd220865 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -2241,7 +2241,7 @@ void blk_cgroup_bio_start(struct bio *bio) } u64_stats_update_end_irqrestore(&bis->sync, flags); - css_rstat_updated(&blkcg->css, cpu); + __css_rstat_updated(&blkcg->css, cpu); put_cpu(); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e52160e85af4..e011dc43fcf1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -776,6 +776,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup scalable recursive statistics. */ +void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu); void css_rstat_updated(struct cgroup_subsys_state *css, int cpu); void css_rstat_flush(struct cgroup_subsys_state *css); diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 150e5871e66f..ed60ba119c68 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include "cgroup-internal.h" +#include #include #include @@ -53,7 +54,7 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu) } /** - * css_rstat_updated - keep track of updated rstat_cpu + * __css_rstat_updated - keep track of updated rstat_cpu * @css: target cgroup subsystem state * @cpu: cpu on which rstat_cpu was updated * @@ -63,20 +64,17 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu) * * NOTE: if the user needs the guarantee that the updater either add itself in * the lockless list or the concurrent flusher flushes its updated stats, a - * memory barrier is needed before the call to css_rstat_updated() i.e. a + * memory barrier is needed before the call to __css_rstat_updated() i.e. a * barrier after updating the per-cpu stats and before calling - * css_rstat_updated(). + * __css_rstat_updated(). */ -__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) +void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu) { struct llist_head *lhead; struct css_rstat_cpu *rstatc; struct llist_node *self; - /* - * Since bpf programs can call this function, prevent access to - * uninitialized rstat pointers. - */ + /* Prevent access to uninitialized rstat pointers. */ if (!css_uses_rstat(css)) return; @@ -125,6 +123,18 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) llist_add(&rstatc->lnode, lhead); } +/* + * BPF-facing wrapper for __css_rstat_updated(). Validate the caller-provided + * CPU before passing it to the internal rstat updater. + */ +__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) +{ + if (unlikely(cpu < 0 || cpu >= nr_cpu_ids || !cpu_possible(cpu))) + return; + + __css_rstat_updated(css, cpu); +} + static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu) { /* put @css and all ancestors on the corresponding updated lists */ @@ -170,7 +180,7 @@ static void css_process_update_tree(struct cgroup_subsys *ss, int cpu) * flusher flush the stats updated by the updater who have * observed that they are already on the list. The * corresponding barrier pair for this one should be before - * css_rstat_updated() by the user. + * __css_rstat_updated() by the user. * * For now, there aren't any such user, so not adding the * barrier here but if such a use-case arise, please add @@ -614,7 +624,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, unsigned long flags) { u64_stats_update_end_irqrestore(&rstatbc->bsync, flags); - css_rstat_updated(&cgrp->self, smp_processor_id()); + __css_rstat_updated(&cgrp->self, smp_processor_id()); put_cpu_ptr(rstatbc); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 051b82ebf371..c7e60f26013c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -579,7 +579,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val, if (!val) return; - css_rstat_updated(&memcg->css, cpu); + __css_rstat_updated(&memcg->css, cpu); statc_pcpu = memcg->vmstats_percpu; for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) { statc = this_cpu_ptr(statc_pcpu); @@ -2608,7 +2608,7 @@ static inline void account_slab_nmi_safe(struct mem_cgroup *memcg, struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id]; /* preemption is disabled in_nmi(). */ - css_rstat_updated(&memcg->css, smp_processor_id()); + __css_rstat_updated(&memcg->css, smp_processor_id()); if (idx == NR_SLAB_RECLAIMABLE_B) atomic_add(nr, &pn->slab_reclaimable); else @@ -2832,7 +2832,7 @@ static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) mod_memcg_state(memcg, MEMCG_KMEM, val); } else { /* preemption is disabled in_nmi(). */ - css_rstat_updated(&memcg->css, smp_processor_id()); + __css_rstat_updated(&memcg->css, smp_processor_id()); atomic_add(val, &memcg->kmem_stat); } } -- cgit v1.2.3 From 4d3a2a466b8d68d852a1f3bbf11204b718428dc4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 17 May 2026 13:51:01 +0900 Subject: HID: core: Fix size_t specifier in hid_report_raw_event() When building for 32-bit platforms, for which 'size_t' is 'unsigned int', there are warnings around using the incorrect format specifier to print bsize in hid_report_raw_event(): drivers/hid/hid-core.c:2054:29: error: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Werror,-Wformat] 2053 | hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", | ~~~ | %zu 2054 | report->id, csize, bsize); | ^~~~~ drivers/hid/hid-core.c:2076:29: error: format specifies type 'long' but the argument has type 'size_t' (aka 'unsigned int') [-Werror,-Wformat] 2075 | hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", | ~~~ | %zu 2076 | report->id, rsize, bsize); | ^~~~~ Use the proper 'size_t' format specifier, '%zu', to clear up the warnings. Cc: stable@vger.kernel.org Fixes: 2c85c61d1332 ("HID: pass the buffer size to hid_report_raw_event") Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/20260516020430.110135-1-ojeda@kernel.org/ Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- drivers/hid/hid-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index b3596851c719..41a79e43c82b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2050,7 +2050,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * return 0; if (unlikely(bsize < csize)) { - hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n", + hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %zu)\n", report->id, csize, bsize); return -EINVAL; } @@ -2072,7 +2072,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * rsize = max_buffer_size; if (bsize < rsize) { - hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n", + hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %zu)\n", report->id, rsize, bsize); return -EINVAL; } -- cgit v1.2.3 From b0fe80c0b9250b35e2211bf3117e7aca814a21b0 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Fri, 15 May 2026 12:07:14 -0300 Subject: drm/v3d: Fix use-after-free of CPU job query arrays on error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPU job ioctl's fail label calls kvfree() on cpu_job's timestamp and performance query arrays after v3d_job_cleanup(), which drops the job's last reference and frees cpu_job. Reading cpu_job at that point is a use-after-free. Also, on the early v3d_job_init() failure path, it is a NULL dereference, since v3d_job_deallocate() zeroes the local pointer. In the success path, the arrays are released from the scheduler's .free_job callback, but on the error path, they are freed manually, as the job was never pushed to the scheduler. While the success path deals with this correctly, the fail path doesn't. On top of that, the manual kvfree() calls only free the array storage; they don't drm_syncobj_put() the per-query syncobjs that v3d_timestamp_query_info_free() and v3d_performance_query_info_free() release on the success path. So the same fail path that triggers the use-after-free also leaks one syncobj reference per query. Unify the CPU job teardown into the CPU job's kref destructor, mirroring v3d_render_job_free(). The scheduler's .free_job slot reverts to the generic v3d_sched_job_free() and the fail label drops the manual kvfree() calls, leaving a single teardown path that is reached from both the scheduler and the ioctl error path. That removes the use-after-free, the NULL dereference, and the syncobj leak by construction. Cc: stable@vger.kernel.org Fixes: 9ba0ff3e083f ("drm/v3d: Create a CPU job extension for the timestamp query job") Assisted-by: Claude:claude-opus-4.7 Reviewed-by: Iago Toral Quiroga Link: https://patch.msgid.link/20260515-v3d-cpu-job-leaks-v1-1-7f147cbbf935@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_sched.c | 16 +--------------- drivers/gpu/drm/v3d/v3d_submit.c | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 1855ef5b3b5f..94bf628dc91c 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -125,20 +125,6 @@ v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, } } -static void -v3d_cpu_job_free(struct drm_sched_job *sched_job) -{ - struct v3d_cpu_job *job = to_cpu_job(sched_job); - - v3d_timestamp_query_info_free(&job->timestamp_query, - job->timestamp_query.count); - - v3d_performance_query_info_free(&job->performance_query, - job->performance_query.count); - - v3d_job_cleanup(&job->base); -} - static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { @@ -830,7 +816,7 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { .run_job = v3d_cpu_job_run, - .free_job = v3d_cpu_job_free + .free_job = v3d_sched_job_free }; static int diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index ee4512db294b..e3a6e7cc7bd5 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -123,6 +123,21 @@ v3d_render_job_free(struct kref *ref) v3d_job_free(ref); } +static void +v3d_cpu_job_free(struct kref *ref) +{ + struct v3d_cpu_job *job = container_of(ref, struct v3d_cpu_job, + base.refcount); + + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); + + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); + + v3d_job_free(ref); +} + void v3d_job_cleanup(struct v3d_job *job) { if (!job) @@ -1302,7 +1317,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); ret = v3d_job_init(v3d, file_priv, &cpu_job->base, - v3d_job_free, 0, &se, V3D_CPU); + v3d_cpu_job_free, 0, &se, V3D_CPU); if (ret) { v3d_job_deallocate((void *)&cpu_job); goto fail; @@ -1385,8 +1400,6 @@ fail: v3d_job_cleanup((void *)csd_job); v3d_job_cleanup(clean_job); v3d_put_multisync_post_deps(&se); - kvfree(cpu_job->timestamp_query.queries); - kvfree(cpu_job->performance_query.queries); return ret; } -- cgit v1.2.3 From 6eb6e5acafa46854d4363e6c34981289995f3ace Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Fri, 15 May 2026 12:07:15 -0300 Subject: drm/v3d: Release indirect CSD GEM reference on CPU job free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v3d_get_cpu_indirect_csd_params() takes a reference to the indirect BO via drm_gem_object_lookup() and stashes it in cpu_job->indirect_csd.indirect, but nothing on the CPU job teardown path ever drops that reference. Drop the extra reference in v3d_cpu_job_free(). The NULL check covers ioctl errors before the lookup ran and CPU job types other than V3D_CPU_JOB_TYPE_INDIRECT_CSD, which leave the field zero-initialised. Cc: stable@vger.kernel.org Fixes: 18b8413b25b7 ("drm/v3d: Create a CPU job extension for a indirect CSD job") Assisted-by: Claude:claude-opus-4.7 Reviewed-by: Iago Toral Quiroga Link: https://patch.msgid.link/20260515-v3d-cpu-job-leaks-v1-2-7f147cbbf935@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_submit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index e3a6e7cc7bd5..3ddd53b6f437 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -135,6 +135,9 @@ v3d_cpu_job_free(struct kref *ref) v3d_performance_query_info_free(&job->performance_query, job->performance_query.count); + if (job->indirect_csd.indirect) + drm_gem_object_put(job->indirect_csd.indirect); + v3d_job_free(ref); } -- cgit v1.2.3 From c326f9c68921e2f14dfcecb2f6b4216313d50248 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 13 May 2026 09:46:13 +0300 Subject: net/mlx5e: xsk: Fix unlocked writing to ICOSQ During napi poll, when the affinity changes and there's still XSK work to be done, we trigger an ICOSQ interrupt on the new CPU. However, this triggering on the ICOSQ is done unprotected. There are 2 such races: A) mlx5e_trigger_irq() is called while mlx5e_xsk_alloc_rx_mpwqe() is running from a different CPU due to affinity change. This can happen because IRQ triggering is done after napi_complete_done(). At this point the NAPI can be scheduled on a different CPU. Like this: CPU A (old affinity, NAPI tail) CPU B (new affinity, fresh NAPI) ------------------------------- -------------------------------- napi_complete_done() clears SCHED mlx5e_cq_arm(...) napi_schedule_prep() sets SCHED mlx5e_napi_poll() mlx5e_xsk_alloc_rx_mpwqe() mlx5e_icosq_sync_lock() // noop memcpy 640 B UMR body advance sq->pc by 10 mlx5e_trigger_irq(&c->icosq) wqe_info[pi] = {NOP, 1} mlx5e_post_nop() advances sq->pc B) mlx5e_trigger_irq() is called on the ICOSQ when mlx5e_trigger_napi_icosq() is running. The obvious fix would be to lock the ICOSQ. But ICOSQ has an optimized locking scheme that doesn't work for this scenario. Kick the async ICOSQ instead which is always locked. This issue was noticed in the wild with the following splat: netdevice: ge-0-0-1: Bad OP in ICOSQ CQE: 0xd WARNING: drivers/net/ethernet/mellanox/mlx5/core/en_rx.c:826 [...] [...] Call Trace: mlx5e_napi_poll+0x11d/0x7f0 [mlx5_core] __napi_poll+0x30/0x200 ? skb_defer_free_flush+0x9c/0xc0 net_rx_action+0x2fe/0x3f0 handle_softirqs+0xd8/0x340 __irq_exit_rcu+0xbc/0xe0 common_interrupt+0x85/0xa0 asm_common_interrupt+0x26/0x40 [...] ---[ end trace 0000000000000000 ]--- mlx5_core 0000:08:00.0 ge-0-0-1: Error cqe on cqn 0x548, ci 0x2022, qn 0x8f4, opcode 0xd, syndrome 0x2, vendor syndrome 0x68 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 01 00 68 02 01 00 08 f4 de 14 59 d2 WQE DUMP: WQ size 16384 WQ cur size 0, WQE index 0x1e14, len: 64 00000000: 00 00 00 01 d9 ed 80 02 00 00 00 01 d9 ed 90 02 00000010: 00 00 00 01 d9 ed a0 02 00 00 00 01 d9 ed b0 02 00000020: 00 00 00 01 d9 ed c0 02 00 00 00 01 d9 ed d0 02 00000030: 00 00 00 01 d9 ed e0 02 00 00 00 01 d9 ed f0 02 mlx5_core 0000:08:00.0 ge-0-0-1: Error cqe on cqn 0x548, ci 0x2023, qn 0x8f4, opcode 0xd, syndrome 0x5, vendor syndrome 0xf9 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 01 00 f9 05 01 00 08 f4 de 15 cf d2 Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Reported-by: Paul Saab Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260513064613.334602-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index b31f689fe271..e90c6c6df835 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -252,7 +252,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->xdpsq->cq); if (unlikely(aff_change && busy_xsk)) { - mlx5e_trigger_irq(&c->icosq); + mlx5e_trigger_napi_async_icosq(c); ch_stats->force_irq++; } -- cgit v1.2.3 From 9e7f36ab5b7bf68463faa5f7b926fea8f35597bb Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 14 May 2026 05:38:08 -0700 Subject: net: appletalk: fix NULL pointer dereference in aarp_send_ddp() aarp_send_ddp() calls atalk_find_dev_addr(dev) in the LocalTalk fast path without checking for NULL. When the device has no AppleTalk interface configured (dev->atalk_ptr == NULL), this leads to a NULL pointer dereference at the at->s_net access. KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:aarp_send_ddp (net/appletalk/aarp.c:552 (discriminator 2)) Call Trace: atalk_sendmsg (net/appletalk/ddp.c:1715) __sys_sendto (net/socket.c:2265 (discriminator 1)) __x64_sys_sendto (net/socket.c:2272) do_syscall_64 (arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) Add a NULL check consistent with the other callers of atalk_find_dev_addr(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Link: https://patch.msgid.link/20260514123806.3085961-3-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- net/appletalk/aarp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index e7315c01a299..30493ea3c010 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -542,6 +542,11 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb, struct ddpehdr *ddp = (struct ddpehdr *)skb->data; int ft = 2; + if (!at) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + /* * Compressible ? * -- cgit v1.2.3 From d00c953a8f69921f484b629801766da68f27f658 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 14 May 2026 05:25:12 -0700 Subject: net: qualcomm: rmnet: fix endpoint use-after-free in rmnet_dellink() rmnet_dellink() removes the endpoint from the hash table with hlist_del_init_rcu() and then immediately frees it with kfree(). However, RCU readers on the receive path (rmnet_rx_handler -> __rmnet_map_ingress_handler) may still hold a reference to the endpoint and dereference ep->egress_dev after the memory has been freed. The endpoint is a kmalloc-32 object, and the stale read at offset 8 corresponds to the egress_dev pointer. BUG: unable to handle page fault for address: ffffffffde942eef Oops: 0002 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 137 Comm: poc_write Not tainted 7.0.0+ #4 PREEMPTLAZY RIP: 0010:rmnet_vnd_rx_fixup (rmnet_vnd.c:27) Call Trace: __rmnet_map_ingress_handler (rmnet_handlers.c:48 rmnet_handlers.c:101) rmnet_rx_handler (rmnet_handlers.c:129 rmnet_handlers.c:235) __netif_receive_skb_core.constprop.0 (net/core/dev.c:6096) __netif_receive_skb_one_core (net/core/dev.c:6208) netif_receive_skb (net/core/dev.c:6467) tun_get_user (drivers/net/tun.c:1955) tun_chr_write_iter (drivers/net/tun.c:2003) vfs_write (fs/read_write.c:688) ksys_write (fs/read_write.c:740) Add an rcu_head field to struct rmnet_endpoint and replace kfree() with kfree_rcu() so the endpoint memory remains valid through the RCU grace period. Also remove the rmnet_vnd_dellink() call and inline only the nr_rmnet_devs decrement, since rmnet_vnd_dellink() would set ep->egress_dev to NULL during the grace period, creating a data race with lockless readers. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Link: https://patch.msgid.link/20260514122511.3083479-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 8 ++++---- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 269c0449760c..78d4df55740a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -213,8 +213,8 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) ep = rmnet_get_endpoint(real_port, mux_id); if (ep) { hlist_del_init_rcu(&ep->hlnode); - rmnet_vnd_dellink(mux_id, real_port, ep); - kfree(ep); + real_port->nr_rmnet_devs--; + kfree_rcu(ep, rcu); } netdev_upper_dev_unlink(real_dev, dev); @@ -238,9 +238,9 @@ static void rmnet_force_unassociate_device(struct net_device *real_dev) hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { unregister_netdevice_queue(ep->egress_dev, &list); netdev_upper_dev_unlink(real_dev, ep->egress_dev); - rmnet_vnd_dellink(ep->mux_id, port, ep); hlist_del_init_rcu(&ep->hlnode); - kfree(ep); + port->nr_rmnet_devs--; + kfree_rcu(ep, rcu); } rmnet_unregister_real_device(real_dev); unregister_netdevice_many(&list); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index ed112d51ac5a..f50fae1c6bdd 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -18,6 +18,7 @@ struct rmnet_endpoint { u8 mux_id; struct net_device *egress_dev; struct hlist_node hlnode; + struct rcu_head rcu; }; struct rmnet_egress_agg_params { -- cgit v1.2.3 From bae3ee802c21e83ad1eb805519e6f32ea528b4d2 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 14 May 2026 20:46:31 +0200 Subject: openvswitch: vport: fix race between linking and the device notifier Sashiko reports that it is technically possible that we got the device reference, but by the time we're linking it to the OVS datapath, it may be already in the process of being deleted. In this case if the notifier wins the race for RTNL, it will see that the device is not yet in the OVS datapath (ovs_netdev_get_vport() will fail in the dp_device_event()) and will do nothing. Then the ovs_netdev_link() will take the RTNL and link the unregistering device to OVS datapath. Eventually, netdev_wait_allrefs_any() will re-broadcast the event and the device will be properly detached, but it will take at least a second before that happens, so it's not something we should rely on. Let's avoid linking the non-registered device in the first place. Note: As per documentation, RTNL doesn't protect the reg_state, but it actually does for all the state transitions we care about here, so it should not be necessary to use READ_ONCE or taking the instance lock. We can still do that, but we have a few more places even in this file where the reg_state is accessed without those while under RTNL, and many more places like this across the kernel code, so it might make more sense to change all of them in a more centralized fashion in the future, if necessary. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Ilya Maximets Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Link: https://patch.msgid.link/20260514184702.2461435-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/openvswitch/vport-netdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index c42642075685..e7e8490a53d8 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -83,6 +83,14 @@ struct vport *ovs_netdev_link(struct vport *vport, bool tunnel) } rtnl_lock(); + /* Do not link devices that are not registered to avoid a potential + * race with the NETDEV_UNREGISTER notification in dp_device_event(). + */ + if (vport->dev->reg_state != NETREG_REGISTERED) { + err = -ENODEV; + goto error_put_unlock; + } + err = netdev_master_upper_dev_link(vport->dev, get_dpdev(vport->dp), NULL, NULL, NULL); -- cgit v1.2.3 From 8cf8b5ae8e093132b0dce0a932af10c9ef077936 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 18 May 2026 22:13:09 +0100 Subject: cifs: Fix undefined variables Fix a couple of undefined variables introduced by the patch to fix tearing on ->remote_i_size and ->zero_point. For some reason, make W=1 with gcc doesn't give undefined variable warnings (but clang does). Fixes: 2c8f4742bb76 ("netfs: Fix potential for tearing in ->remote_i_size and ->zero_point") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202605031459.eX5UbO3K-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202605021450.ca5QGqLH-lkp@intel.com/ cc: Steve French cc: Paulo Alcantara cc: Matthew Wilcox cc: Christian Brauner cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/smb/client/cifsfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index feac491c5070..f557eb7875c7 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1304,7 +1304,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, struct cifsFileInfo *smb_file_src = src_file->private_data; struct cifsFileInfo *smb_file_target = dst_file->private_data; struct cifs_tcon *target_tcon, *src_tcon; - unsigned long long i_size, old_size, new_size, zero_point; + unsigned long long i_size, new_size; unsigned long long destend, fstart, fend; unsigned int xid; int rc; @@ -1372,7 +1372,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, goto unlock; spin_lock(&target_inode->i_lock); - if (fend > zero_point) + if (fend > target_cifsi->netfs._zero_point) netfs_write_zero_point(target_inode, fend + 1); i_size = target_inode->i_size; spin_unlock(&target_inode->i_lock); @@ -1387,7 +1387,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, if (target_tcon->ses->server->ops->duplicate_extents) { rc = target_tcon->ses->server->ops->duplicate_extents(xid, smb_file_src, smb_file_target, off, len, destoff); - if (rc == 0 && new_size > old_size) { + if (rc == 0 && new_size > i_size) { truncate_setsize(target_inode, new_size); fscache_resize_cookie(cifs_inode_cookie(target_inode), new_size); -- cgit v1.2.3 From 89bbff099bfc94888eb942d5b981592bbbe0c856 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 15 May 2026 11:24:08 -0700 Subject: ice: fix locking around wait_event_interruptible_locked_irq Commit 50327223a8bb ("ice: add lock to protect low latency interface") introduced a wait queue used to protect the low latency timer interface. The queue is used with the wait_event_interruptible_locked_irq macro, which unlocks the wait queue lock while sleeping. The irq variant uses spin_lock_irq and spin_unlock_irq to manage this. The wait queue lock was previously locked using spin_lock_irqsave. This difference in lock variants could lead to issues, since wait_event would unlock the wait queue and restore interrupts while sleeping. The ice_read_phy_tstamp_ll_e810() function is ultimately called through ice_read_phy_tstamp, which is called from ice_ptp_process_tx_tstamp or ice_ptp_clear_unexpected_tx_ready. The former is called through the miscellaneous IRQ thread function, while the latter is called from the service task work queue thread. Neither of these functions has interrupts disabled, so use spin_lock_irq instead of spin_lock_irqsave. Fixes: 50327223a8bb ("ice: add lock to protect low latency interface") Cc: stable@vger.kernel.org Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20250109181823.77f44c69@kernel.org/ Signed-off-by: Jacob Keller Signed-off-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 24fb7a3e14d6..672218e5d1f9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -4503,18 +4503,17 @@ static int ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) { struct ice_e810_params *params = &hw->ptp.phy.e810; - unsigned long flags; u32 val; int err; - spin_lock_irqsave(¶ms->atqbal_wq.lock, flags); + spin_lock_irq(¶ms->atqbal_wq.lock); /* Wait for any pending in-progress low latency interrupt */ err = wait_event_interruptible_locked_irq(params->atqbal_wq, !(params->atqbal_flags & ATQBAL_FLAGS_INTR_IN_PROGRESS)); if (err) { - spin_unlock_irqrestore(¶ms->atqbal_wq.lock, flags); + spin_unlock_irq(¶ms->atqbal_wq.lock); return err; } @@ -4529,7 +4528,7 @@ ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) REG_LL_PROXY_H); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read PTP timestamp using low latency read\n"); - spin_unlock_irqrestore(¶ms->atqbal_wq.lock, flags); + spin_unlock_irq(¶ms->atqbal_wq.lock); return err; } @@ -4539,7 +4538,7 @@ ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) /* Read the low 32 bit value and set the TS valid bit */ *lo = rd32(hw, REG_LL_PROXY_L) | TS_VALID; - spin_unlock_irqrestore(¶ms->atqbal_wq.lock, flags); + spin_unlock_irq(¶ms->atqbal_wq.lock); return 0; } -- cgit v1.2.3 From 3ba4dd024d26372733d1c02e13e076c6016e3320 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Fri, 15 May 2026 11:24:09 -0700 Subject: ice: fix VF queue configuration with low MTU values The ice driver's VF queue configuration validation rejects databuffer_size values below 1024 bytes, which prevents VFs from using MTU values below 871 bytes. The iavf driver calculates databuffer_size based on the MTU using: databuffer_size = ALIGN(MTU + LIBETH_RX_LL_LEN, 128) where LIBETH_RX_LL_LEN = 26 (ETH_HLEN + 2*VLAN_HLEN + ETH_FCS_LEN). For MTU values below 871: MTU 870: 870 + 26 = 896, aligned to 128 = 896 (< 1024, rejected) MTU 871: 871 + 26 = 897, aligned to 128 = 1024 (>= 1024, accepted) The 1024-byte minimum seems unnecessarily restrictive, because the hardware supports databuffer_size as low as 128 bytes (the alignment boundary), which should allow MTU values down to the standard minimum of 68 bytes. I haven't found the reason why the limit was configured in the commit 9c7dd7566d18 ("ice: add validation in OP_CONFIG_VSI_QUEUES VF message"), so with no more information and since it is working, change the minimum databuffer_size validation from 1024 to 128 bytes to allow standard low MTU values while still preventing invalid configurations. Fixes: 9c7dd7566d18 ("ice: add validation in OP_CONFIG_VSI_QUEUES VF message") cc: stable@vger.kernel.org Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Jacob Keller Reviewed-by: Michal Swiatkowski Reviewed-by: Paul Menzel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/virt/queues.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c index f73d5a3e83d4..31be2f76181c 100644 --- a/drivers/net/ethernet/intel/ice/virt/queues.c +++ b/drivers/net/ethernet/intel/ice/virt/queues.c @@ -840,7 +840,7 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qpi->rxq.databuffer_size != 0 && (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || - qpi->rxq.databuffer_size < 1024)) + qpi->rxq.databuffer_size < 128)) goto error_param; ring->rx_buf_len = qpi->rxq.databuffer_size; -- cgit v1.2.3 From ebc8de716c9ec2be384abdc2dd866da26c6580d1 Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Fri, 15 May 2026 11:24:10 -0700 Subject: ice: fix setting promisc mode while adding VID filter There are at least two paths through which VSI promiscuous mode can be independently configured via ice_fltr_set_vsi_promisc(): - ice_vlan_rx_add_vid() (netdev op) - ice_service_task() -> ... -> ice_set_promisc() Both paths may try to program promiscuous mode concurrently. One such scenario is: 1. Add ice netdev to bond 2. Add the bond netdev to bridge 3. ice netdev enters allmulticast mode (IFF_ALLMULTI) 4. Service task programs promisc mode filter 5. Bridge -> bond calls ice_vlan_rx_add_vid() Crucially, ice_vlan_rx_add_vid() fails if ice_fltr_set_vsi_promisc() returns any error, including -EEXIST. This causes VLAN filtering setup to fail on the bond interface. ice_set_promisc() already handles -EEXIST correctly. Fix by adding the same -EEXIST check to ice_vlan_rx_add_vid(): if the promisc filter is already programmed, continue without returning error. Fixes: 1273f89578f2 ("ice: Fix broken IFF_ALLMULTI handling") Cc: stable@vger.kernel.org Signed-off-by: Marcin Szycik Signed-off-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c52c465280f7..66642232b282 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3682,7 +3682,7 @@ int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, ICE_MCAST_VLAN_PROMISC_BITS, vid); - if (ret) + if (ret && ret != -EEXIST) goto finish; } -- cgit v1.2.3 From 781ff8f2d575a794a2a4f11605288ae06757f5eb Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 15 May 2026 11:24:11 -0700 Subject: ice: ptp: serialize E825 PHY timer start with PTP lock ice_start_phy_timer_eth56g() programs TIMETUS registers and issues INIT_INCVAL without holding the global PTP semaphore. This allows concurrent PTP command paths to interleave with PHY timer start, which can make the sequence fail and leave timer initialization inconsistent. Take the PTP lock around TIMETUS registers programming and INIT_INCVAL command execution, and make sure the lock is released on all error paths. Keep the subsequent sync step outside of this critical section, since ice_sync_phy_timer_eth56g() takes the same semaphore internally. Fixes: 7cab44f1c35f ("ice: Introduce ETH56G PHY model for E825C products") Reviewed-by: Arkadiusz Kubalewski Signed-off-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-5-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 672218e5d1f9..8bb94e785f2a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -2141,16 +2141,23 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) } incval = (u64)hi << 32 | lo; + if (!ice_ptp_lock(hw)) { + dev_err(ice_hw_to_dev(hw), "Failed to acquire PTP semaphore\n"); + return -EBUSY; + } + err = ice_write_40b_ptp_reg_eth56g(hw, port, PHY_REG_TIMETUS_L, incval); if (err) - return err; + goto err_ptp_unlock; err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL); if (err) - return err; + goto err_ptp_unlock; ice_ptp_exec_tmr_cmd(hw); + ice_ptp_unlock(hw); + err = ice_sync_phy_timer_eth56g(hw, port); if (err) return err; @@ -2166,6 +2173,10 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); return 0; + +err_ptp_unlock: + ice_ptp_unlock(hw); + return err; } /** -- cgit v1.2.3 From 7b28523546c7e4adbb8436f2986efcfc8382985e Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 15 May 2026 11:24:12 -0700 Subject: ice: ptp: use primary NAC semaphore on E825 For E825 2xNAC configurations, PTP semaphore operations must hit the primary NAC register block so both sides coordinate on the same lock. Commit e2193f9f9ec9 ("ice: enable timesync operation on 2xNAC E825 devices") updated other primary-only PTP register accesses to use the primary NAC on non-primary functions, but left ice_ptp_lock() and ice_ptp_unlock() operating on the local NAC. As a result, secondary NAC PTP paths can take a different semaphore than the primary side. Select the primary hardware in ice_ptp_lock() and ice_ptp_unlock() when the current function is not primary, keeping semaphore operations symmetric and consistent with the rest of the 2xNAC PTP register access path. Fixes: e2193f9f9ec9 ("ice: enable timesync operation on 2xNAC E825 devices") Reviewed-by: Arkadiusz Kubalewski Signed-off-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-6-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 8bb94e785f2a..2c18e16fe053 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -5264,9 +5264,13 @@ static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp) */ bool ice_ptp_lock(struct ice_hw *hw) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u32 hw_lock; int i; + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + #define MAX_TRIES 15 for (i = 0; i < MAX_TRIES; i++) { @@ -5293,6 +5297,11 @@ bool ice_ptp_lock(struct ice_hw *hw) */ void ice_ptp_unlock(struct ice_hw *hw) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); + + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0); } -- cgit v1.2.3 From 975b564d195b13ca6ee1ef5e6a9561734898eb17 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 15 May 2026 11:24:13 -0700 Subject: ice: restore PTP Rx timestamp config after ethtool set-channels When ethtool -L changes queue counts, ice_vsi_recfg_qs() closes and rebuilds the VSI, reallocating Rx rings. The newly allocated rings have ptp_rx cleared, so RX hardware timestamps are no longer attached to skb until hwtstamp configuration is applied again. Restore timestamp mode after ice_vsi_open() in the queue reconfiguration path, matching reset/rebuild behavior and ensuring newly rebuilt Rx rings have PTP RX timestamping re-enabled. Testing hints: - run ptp4l application in client synchronization mode: ptp4l -i ethX -m -s - run PTP traffic - change queue number on ethX netdev interface: ethtool -L ethX combined new_queue_size - observe ptp4l output - expected result: no "received DELAY_REQ without timestamp" messages Fixes: 77a781155a65 ("ice: enable receive hardware timestamping") Cc: stable@vger.kernel.org Reviewed-by: Aleksandr Loktionov Signed-off-by: Grzegorz Nitka Reviewed-by: Simon Horman Tested-by: Alexander Nowlin Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-7-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 66642232b282..e2fbe111f849 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4104,6 +4104,12 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) } ice_pf_dcb_recfg(pf, locked); ice_vsi_open(vsi); + /* Rx rings are reallocated during VSI rebuild and lose their ptp_rx + * flag. Restore timestamp mode so newly allocated rings are set up + * for hardware Rx timestamping. + */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_restore_timestamp_mode(pf); goto done; rebuild_err: -- cgit v1.2.3 From 5d49b568c188dc77199d8d2b959c91da8cc27cf1 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 15 May 2026 11:24:14 -0700 Subject: ixgbevf: fix use-after-free in VEPA multicast source pruning ixgbevf_clean_rx_irq() prunes frames whose source MAC matches the VF's own address (VEPA multicast workaround) by freeing the skb and continuing to the next descriptor: dev_kfree_skb_irq(skb); continue; The skb pointer is declared outside the while loop and persists across iterations. Because the continue skips the "skb = NULL" reset at the bottom of the loop, the next iteration enters the "else if (skb)" path and calls ixgbevf_add_rx_frag() on the freed skb, dereferencing skb_shinfo(skb)->nr_frags - a use-after-free in NAPI softirq context. The sibling driver iavf already handles this correctly by nulling the pointer before continuing. Apply the same pattern here. I do not have ixgbevf hardware; the bug was found by static analysis (scan_drop_continue_loops.py + semgrep drop_continue_in_loop, multi-tool corroboration with the highest score in the scan). The UAF was confirmed under KASAN by loading a test module that reproduces the exact code pattern (alloc skb, kfree_skb, then read skb_shinfo(skb)->nr_frags): BUG: KASAN: slab-use-after-free in ixgbevf_uaf_test_init+0x100/0x1000 Read of size 8 at addr 000000006163ae78 by task insmod/30 freed 208-byte region [000000006163adc0, 000000006163ae90) QEMU emulates igb (82576) but not ixgbe (82599), and the igbvf VF driver does not include the VEPA source pruning path, so a full end-to-end reproduction with emulated hardware was not possible. Fixes: bad17234ba70 ("ixgbevf: Change receive model to use double buffered page based receives") Cc: stable@vger.kernel.org Signed-off-by: Michael Bommarito Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-8-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 42f89a179a3f..4ba3be961ab6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1221,6 +1221,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, ether_addr_equal(rx_ring->netdev->dev_addr, eth_hdr(skb)->h_source)) { dev_kfree_skb_irq(skb); + skb = NULL; continue; } -- cgit v1.2.3 From 5acc641e590e008caaed480ed9ffae47cf7ecbdf Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Fri, 15 May 2026 11:24:15 -0700 Subject: igc: set tx buffer type for SMD frames Sashiko pointed out that igc_fpe_init_smd_frame() initializes igc_tx_buffer fields for an SMD skb, but does not set the buffer type: https://sashiko.dev/#/patchset/20260415025226.114115-1-kohei%40enjuk.jp Since igc_tx_buffer entries are reused, a stale XDP or XSK type can remain and make TX completion use the wrong cleanup path. Set the buffer type to IGC_TX_BUFFER_TYPE_SKB. Fixes: 5422570c0010 ("igc: add support for frame preemption verification") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-9-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_tsn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 8a110145bfee..725ba253165c 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -34,6 +34,7 @@ static int igc_fpe_init_smd_frame(struct igc_ring *ring, return -ENOMEM; } + buffer->type = IGC_TX_BUFFER_TYPE_SKB; buffer->skb = skb; buffer->protocol = 0; buffer->bytecount = skb->len; -- cgit v1.2.3 From e935c37b8a94bb256fada6395a5d05e1c0c6bdaf Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Fri, 15 May 2026 11:24:16 -0700 Subject: igc: fix potential skb leak in igc_fpe_xmit_smd_frame() When igc_fpe_init_tx_descriptor() fails, no one takes care of an allocated skb, leaking it. [1] Use dev_kfree_skb_any() on failure. Tested on an I226 adapter with the following command, while injecting faults in igc_fpe_init_tx_descriptor() to trigger the error path. # ethtool --set-mm $DEV verify-enabled on tx-enabled on pmac-enabled on [1] unreferenced object 0xffff888113c6cdc0 (size 224): ... backtrace (crc be3d3fda): kmem_cache_alloc_node_noprof+0x3b1/0x410 __alloc_skb+0xde/0x830 igc_fpe_xmit_smd_frame.isra.0+0xad/0x1b0 igc_fpe_send_mpacket+0x37/0x90 ethtool_mmsv_verify_timer+0x15e/0x300 Cc: stable@vger.kernel.org Fixes: 5422570c0010 ("igc: add support for frame preemption verification") Signed-off-by: Kohei Enju Reviewed-by: Simon Horman Reviewed-by: Faizal Rahim Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20260515182419.1597859-10-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_tsn.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 725ba253165c..52de2bcbadbe 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -110,10 +110,16 @@ static int igc_fpe_xmit_smd_frame(struct igc_adapter *adapter, __netif_tx_lock(nq, cpu); err = igc_fpe_init_tx_descriptor(ring, skb, type); - igc_flush_tx_descriptors(ring); + if (err) + goto err_free_skb_any; + igc_flush_tx_descriptors(ring); __netif_tx_unlock(nq); + return 0; +err_free_skb_any: + __netif_tx_unlock(nq); + dev_kfree_skb_any(skb); return err; } -- cgit v1.2.3 From 34ed2395613b23f8645e320abdcab6d688dc7a80 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Tue, 19 May 2026 03:40:22 +0800 Subject: ALSA: timer: avoid past-the-end iterator in snd_timer_dev_register() snd_timer_dev_register() walks snd_timer_list looking for the ordered insertion point and on loop fall-through passes &timer1->device_list to list_add_tail(): list_for_each_entry(timer1, &snd_timer_list, device_list) { ... break; /* on found-position */ ... } list_add_tail(&timer->device_list, &timer1->device_list); When the loop walks all entries without break, timer1 is past-the-end. &timer1->device_list aliases &snd_timer_list (the list head) via container_of offset cancellation, so the insert lands at the list tail. That is the intended behaviour, but the access is undefined per C11 even though it works in practice. Track an explicit insert_before pointer initialised to the list head and overwritten to &timer1->device_list only when the loop breaks early. The observable behaviour is unchanged. Fixes: 9244b2c3079f ("[ALSA] alsa core: convert to list_for_each_entry*") Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260518194023.1667857-2-maoyixie.tju@gmail.com Signed-off-by: Takashi Iwai --- sound/core/timer.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 820901d503af..57583dec3974 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1007,6 +1007,7 @@ static int snd_timer_dev_register(struct snd_device *dev) { struct snd_timer *timer = dev->device_data; struct snd_timer *timer1; + struct list_head *insert_before = &snd_timer_list; if (snd_BUG_ON(!timer || !timer->hw.start || !timer->hw.stop)) return -ENXIO; @@ -1016,28 +1017,36 @@ static int snd_timer_dev_register(struct snd_device *dev) guard(mutex)(®ister_mutex); list_for_each_entry(timer1, &snd_timer_list, device_list) { - if (timer1->tmr_class > timer->tmr_class) + if (timer1->tmr_class > timer->tmr_class) { + insert_before = &timer1->device_list; break; + } if (timer1->tmr_class < timer->tmr_class) continue; if (timer1->card && timer->card) { - if (timer1->card->number > timer->card->number) + if (timer1->card->number > timer->card->number) { + insert_before = &timer1->device_list; break; + } if (timer1->card->number < timer->card->number) continue; } - if (timer1->tmr_device > timer->tmr_device) + if (timer1->tmr_device > timer->tmr_device) { + insert_before = &timer1->device_list; break; + } if (timer1->tmr_device < timer->tmr_device) continue; - if (timer1->tmr_subdevice > timer->tmr_subdevice) + if (timer1->tmr_subdevice > timer->tmr_subdevice) { + insert_before = &timer1->device_list; break; + } if (timer1->tmr_subdevice < timer->tmr_subdevice) continue; /* conflicts.. */ return -EBUSY; } - list_add_tail(&timer->device_list, &timer1->device_list); + list_add_tail(&timer->device_list, insert_before); return 0; } -- cgit v1.2.3 From 92b62b7416af11fcfaab7373b15a32a471500bab Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Tue, 19 May 2026 03:40:23 +0800 Subject: ALSA: seq: avoid past-the-end iterator in snd_seq_create_port() snd_seq_create_port() walks client->ports_list_head looking for the ordered insertion point and on loop fall-through passes &p->list to list_add_tail(): list_for_each_entry(p, &client->ports_list_head, list) { if (p->addr.port == port) { kfree(new_port); return -EBUSY; } if (p->addr.port > num) break; ... } list_add_tail(&new_port->list, &p->list); When the loop walks all entries without break (e.g., the new port sorts last), p is past-the-end. &p->list aliases &client->ports_list_head (the list head) via container_of offset cancellation, so the insert lands at the list tail. That is the intended behaviour, but the access is undefined per C11 even though it works in practice. Track an explicit insert_before pointer initialised to the list head and overwritten to &p->list only when the loop breaks early. The observable behaviour is unchanged. Fixes: 9244b2c3079f ("[ALSA] alsa core: convert to list_for_each_entry*") Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260518194023.1667857-3-maoyixie.tju@gmail.com Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ports.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index da8d358958f1..31ab4681c601 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -144,18 +144,21 @@ int snd_seq_create_port(struct snd_seq_client *client, int port, num = max(port, 0); guard(mutex)(&client->ports_mutex); guard(write_lock_irq)(&client->ports_lock); + struct list_head *insert_before = &client->ports_list_head; list_for_each_entry(p, &client->ports_list_head, list) { if (p->addr.port == port) { kfree(new_port); return -EBUSY; } - if (p->addr.port > num) + if (p->addr.port > num) { + insert_before = &p->list; break; + } if (port < 0) /* auto-probe mode */ num = p->addr.port + 1; } /* insert the new port */ - list_add_tail(&new_port->list, &p->list); + list_add_tail(&new_port->list, insert_before); client->num_ports++; new_port->addr.port = num; /* store the port number in the port */ sprintf(new_port->name, "port-%d", num); -- cgit v1.2.3 From 9e5fb6098d21e1f9be9982b46c3e5b8329d4e7d2 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Tue, 19 May 2026 09:55:35 +0800 Subject: ALSA: hda/realtek: Fix mute and mic-mute LEDs for HP 16 Piston OmniBook X The ALC245 sound card on this machine requires the quirk `ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX` to fix the mic and mute LED. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221509 Cc: Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260519015535.891156-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index d86d4f5f9ca6..be1bbde8be5a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7231,7 +7231,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da0, "HP 16 Clipper OmniBook 7(X360)", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC245_FIXUP_HP_ENVY_X360_15_FH0XXX), SND_PCI_QUIRK(0x103c, 0x8dc9, "HP Laptop 15-fc0xxx", ALC236_FIXUP_HP_DMIC), SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8dd7, "HP Laptop 15-fd0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), -- cgit v1.2.3 From b59d5c51bb328a60749b4dd5fe7e649bfb4089b4 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Tue, 19 May 2026 00:32:15 -0300 Subject: ALSA: ua101: Reject too-short USB descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit find_format_descriptor() walks the class-specific interface extras by advancing with bLength. It rejects descriptors that extend past the remaining buffer, but it does not reject descriptor lengths smaller than a USB descriptor header. Reject too-short descriptors before using bLength to advance the local scan. This keeps the UA-101 parser robust against malformed descriptor data and matches the usual USB descriptor walking rules. Fixes: 63978ab3e3e9 ("sound: add Edirol UA-101 support") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260519-alsa-ua101-desc-len-v1-1-4307d1a5e054@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/misc/ua101.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index d129b42eb979..b9a62e94e06c 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -894,8 +894,9 @@ find_format_descriptor(struct usb_interface *interface) struct uac_format_type_i_discrete_descriptor *desc; desc = (struct uac_format_type_i_discrete_descriptor *)extra; - if (desc->bLength > extralen) { - dev_err(&interface->dev, "descriptor overflow\n"); + if (desc->bLength < sizeof(struct usb_descriptor_header) || + desc->bLength > extralen) { + dev_err(&interface->dev, "invalid descriptor length\n"); return NULL; } if (desc->bLength == UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1) && -- cgit v1.2.3 From f8ce8b8331a1bc44ad4905886a482214d428b253 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 9 May 2026 22:44:12 +0200 Subject: batman-adv: v: stop OGMv2 on disabled interface When a batadv_hard_iface is disabled, its mesh_iface pointer is set to NULL. However, batadv_v_ogm_send_meshif() may still dispatch OGMs via batadv_v_ogm_queue_on_if() for interfaces that have since lost their mesh_iface association. This results in a NULL pointer dereference when batadv_v_ogm_queue_on_if() unconditionally calls netdev_priv() on the now NULL hard_iface->mesh_iface to retrieve the batadv_priv. It is necessary to ensure that the batadv_v_ogm_queue_on_if() checks that it is using the same mesh_iface for which batadv_v_ogm_send_meshif() was called. Cc: stable@kernel.org Fixes: 0da0035942d4 ("batman-adv: OGMv2 - add basic infrastructure") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reviewed-by: Yuan Tan Signed-off-by: Sven Eckelmann --- net/batman-adv/bat_v_ogm.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e3870492dab7..e955b4940c72 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -113,14 +113,14 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) /** * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface + * @bat_priv: the bat priv with all the mesh interface information * @skb: the OGM to send * @hard_iface: the interface to use to send the OGM */ -static void batadv_v_ogm_send_to_if(struct sk_buff *skb, +static void batadv_v_ogm_send_to_if(struct batadv_priv *bat_priv, + struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); - if (hard_iface->if_status != BATADV_IF_ACTIVE) { kfree_skb(skb); return; @@ -187,6 +187,7 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) /** * batadv_v_ogm_aggr_send() - flush & send aggregation queue + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: the interface with the aggregation queue to flush * * Aggregates all OGMv2 packets currently in the aggregation queue into a @@ -196,7 +197,8 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. */ -static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) +static void batadv_v_ogm_aggr_send(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) { unsigned int aggr_len = hard_iface->bat_v.aggr_len; struct sk_buff *skb_aggr; @@ -226,27 +228,32 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) consume_skb(skb); } - batadv_v_ogm_send_to_if(skb_aggr, hard_iface); + batadv_v_ogm_send_to_if(bat_priv, skb_aggr, hard_iface); } /** * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface + * @bat_priv: the bat priv with all the mesh interface information * @skb: the OGM to queue * @hard_iface: the interface to queue the OGM on */ -static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, +static void batadv_v_ogm_queue_on_if(struct batadv_priv *bat_priv, + struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); + if (hard_iface->mesh_iface != bat_priv->mesh_iface) { + kfree_skb(skb); + return; + } if (!atomic_read(&bat_priv->aggregated_ogms)) { - batadv_v_ogm_send_to_if(skb, hard_iface); + batadv_v_ogm_send_to_if(bat_priv, skb, hard_iface); return; } spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); if (!batadv_v_ogm_queue_left(skb, hard_iface)) - batadv_v_ogm_aggr_send(hard_iface); + batadv_v_ogm_aggr_send(bat_priv, hard_iface); hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); __skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); @@ -343,7 +350,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) break; } - batadv_v_ogm_queue_on_if(skb_tmp, hard_iface); + batadv_v_ogm_queue_on_if(bat_priv, skb_tmp, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -383,12 +390,14 @@ void batadv_v_ogm_aggr_work(struct work_struct *work) { struct batadv_hard_iface_bat_v *batv; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work); hard_iface = container_of(batv, struct batadv_hard_iface, bat_v); + bat_priv = netdev_priv(hard_iface->mesh_iface); spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); - batadv_v_ogm_aggr_send(hard_iface); + batadv_v_ogm_aggr_send(bat_priv, hard_iface); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); batadv_v_ogm_start_queue_timer(hard_iface); @@ -578,7 +587,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), ogm_forward->ttl, if_incoming->net_dev->name); - batadv_v_ogm_queue_on_if(skb, if_outgoing); + batadv_v_ogm_queue_on_if(bat_priv, skb, if_outgoing); out: batadv_orig_ifinfo_put(orig_ifinfo); -- cgit v1.2.3 From 501368506563e151b322c8c3f228b796e615b90d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 14 May 2026 16:33:12 +0200 Subject: batman-adv: tvlv: abort OGM send on tvlv append failure batadv_tvlv_container_ogm_append() could fail in two ways: a memory allocation failure when resizing the packet buffer, or the tvlv data exceeding U16_MAX bytes. In both cases the function previously returned the old (now stale) tvlv_value_len rather than signalling an error, causing the OGM/OGM2 send path to transmit a packet whose TVLV length field no longer matched the actual buffer contents. And because it also didn't fill in the new TVLV data, sending either uninitialized or corrupted data on the wire. All errors in batadv_tvlv_container_ogm_append() must be forwarded to the caller. And the caller must abort the send of the OGM2. For B.A.T.M.A.N. IV, it is currently not allowed to abort the send. The non-TVLV part of the OGM must be queued up instead. Cc: stable@kernel.org Fixes: ef26157747d4 ("batman-adv: tvlv - basic infrastructure") Signed-off-by: Sven Eckelmann --- net/batman-adv/bat_iv_ogm.c | 16 +++++++++++++--- net/batman-adv/bat_v_ogm.c | 26 ++++++++++++++------------ net/batman-adv/tvlv.c | 17 ++++++++++++----- net/batman-adv/tvlv.h | 2 +- 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 74ef7dc2b2f9..7ad26128b5f7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -790,6 +790,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) u32 seqno; u16 tvlv_len = 0; unsigned long send_time; + int ret; lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); @@ -813,9 +814,18 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); - tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, - ogm_buff_len, - BATADV_OGM_HLEN); + ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM_HLEN); + if (ret < 0) { + /* OGMs must be queued even when the buffer allocation for + * TVLVs failed. just fall back to the non-TVLV version + */ + ret = 0; + *ogm_buff_len = BATADV_OGM_HLEN; + } + + tvlv_len = ret; } batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e955b4940c72..d66ca77b1aaa 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -269,10 +269,10 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; - unsigned char *ogm_buff; + unsigned char **ogm_buff; struct list_head *iter; - int ogm_buff_len; - u16 tvlv_len = 0; + int *ogm_buff_len; + u16 tvlv_len; int ret; lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); @@ -280,25 +280,27 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; - ogm_buff = bat_priv->bat_v.ogm_buff; - ogm_buff_len = bat_priv->bat_v.ogm_buff_len; + ogm_buff = &bat_priv->bat_v.ogm_buff; + ogm_buff_len = &bat_priv->bat_v.ogm_buff_len; + /* tt changes have to be committed before the tvlv data is * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); - tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff, - &ogm_buff_len, - BATADV_OGM2_HLEN); + ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM2_HLEN); + if (ret < 0) + goto reschedule; - bat_priv->bat_v.ogm_buff = ogm_buff; - bat_priv->bat_v.ogm_buff_len = ogm_buff_len; + tvlv_len = ret; - skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len); + skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + *ogm_buff_len); if (!skb) goto reschedule; skb_reserve(skb, ETH_HLEN); - skb_put_data(skb, ogm_buff, ogm_buff_len); + skb_put_data(skb, *ogm_buff, *ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 8129a3f9c44d..46ed61dbf087 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -306,9 +307,10 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, * The ogm packet might be enlarged or shrunk depending on the current size * and the size of the to-be-appended tvlv containers. * - * Return: size of all appended tvlv containers in bytes. + * Return: size of all appended tvlv containers in bytes (max U16_MAX), negative + * if operation failed */ -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, +int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, int *packet_buff_len, int packet_min_len) { @@ -316,6 +318,7 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, struct batadv_tvlv_hdr *tvlv_hdr; u16 tvlv_value_len; void *tvlv_value; + int tvlv_len_ret; bool ret; spin_lock_bh(&bat_priv->tvlv.container_list_lock); @@ -323,9 +326,12 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, packet_min_len, tvlv_value_len); - - if (!ret) + if (!ret) { + tvlv_len_ret = -ENOMEM; goto end; + } + + tvlv_len_ret = tvlv_value_len; if (!tvlv_value_len) goto end; @@ -344,7 +350,8 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, end: spin_unlock_bh(&bat_priv->tvlv.container_list_lock); - return tvlv_value_len; + + return tvlv_len_ret; } /** diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index e5697230d991..f96f6b3f44a0 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -16,7 +16,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len); -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, +int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, int *packet_buff_len, int packet_min_len); void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, -- cgit v1.2.3 From f50487e3566358b2b982b7801945e858c78ad9ab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 9 May 2026 21:55:29 +0200 Subject: batman-adv: tvlv: reject oversized TVLV packets batadv_tvlv_container_ogm_append() builds a TVLV packet section from the tvlv.container_list. The total size of this section is computed by batadv_tvlv_container_list_size(), which sums the sizes of all registered containers. The return type and accumulator in batadv_tvlv_container_list_size() were u16. If the accumulated size exceeds U16_MAX, the value wraps around, causing the subsequent allocation in batadv_tvlv_container_ogm_append() to be undersized. The memcpy-style copy that follows would then write beyond the end of the allocated buffer, corrupting kernel memory. Fix this by widening the return type of batadv_tvlv_container_list_size() to size_t. In batadv_tvlv_container_ogm_append(), check the computed length against U16_MAX before proceeding, and bail out as if the allocation had failed when the limit is exceeded. Cc: stable@kernel.org Fixes: ef26157747d4 ("batman-adv: tvlv - basic infrastructure") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Reviewed-by: Yuan Tan Signed-off-by: Sven Eckelmann --- net/batman-adv/tvlv.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 46ed61dbf087..cc6ac580c620 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -160,10 +161,10 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) * * Return: size of all currently registered tvlv containers in bytes. */ -static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +static size_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) { struct batadv_tvlv_container *tvlv; - u16 tvlv_len = 0; + size_t tvlv_len = 0; lockdep_assert_held(&bat_priv->tvlv.container_list_lock); @@ -316,13 +317,17 @@ int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, { struct batadv_tvlv_container *tvlv; struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_len; + size_t tvlv_value_len; void *tvlv_value; int tvlv_len_ret; bool ret; spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); + if (tvlv_value_len > U16_MAX) { + tvlv_len_ret = -E2BIG; + goto end; + } ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, packet_min_len, tvlv_value_len); -- cgit v1.2.3 From 71dce47f0758537fff78fddb5fb0d4632d29b29f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 13 May 2026 23:38:54 +0200 Subject: batman-adv: tp_meter: fix race condition in send error reporting batadv_tp_sender_shutdown() previously used two separate variables to track session state: sending (an atomic flag indicating whether the session was active) and reason (a plain enum storing the stop reason). This introduced a race window between the two writes: after sending was cleared to 0, batadv_tp_send() could observe the stopped state and call batadv_tp_sender_end() before reason was written, causing the wrong stop reason to be reported to the caller. Fix this by consolidating both variables into a single atomic send_result, which holds 0 while the session is running and the stop reason once it ends. Cc: stable@kernel.org Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 40 +++++++++++++++++++++++++--------------- net/batman-adv/types.h | 10 +++++----- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 1fd1526059d8..3ce6d9b2c9f3 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -413,11 +413,14 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) static void batadv_tp_sender_end(struct batadv_priv *bat_priv, struct batadv_tp_vars *tp_vars) { + enum batadv_tp_meter_reason reason; u32 session_cookie; + reason = atomic_read(&tp_vars->send_result); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Test towards %pM finished..shutting down (reason=%d)\n", - tp_vars->other_end, tp_vars->reason); + tp_vars->other_end, reason); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n", @@ -430,7 +433,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, session_cookie = batadv_tp_session_cookie(tp_vars->session, tp_vars->icmp_uid); - batadv_tp_batctl_notify(tp_vars->reason, + batadv_tp_batctl_notify(reason, tp_vars->other_end, bat_priv, tp_vars->start_time, @@ -446,10 +449,18 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, enum batadv_tp_meter_reason reason) { - if (atomic_xchg(&tp_vars->sending, 0) != 1) - return; + atomic_cmpxchg(&tp_vars->send_result, 0, reason); +} - tp_vars->reason = reason; +/** + * batadv_tp_sender_stopped() - check if tp session was stopped with reason + * @tp_vars: the private data of the current TP meter session + * + * Return: whether stop reason was found + */ +static bool batadv_tp_sender_stopped(struct batadv_tp_vars *tp_vars) +{ + return atomic_read(&tp_vars->send_result) != 0; } /** @@ -479,7 +490,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) /* most of the time this function is invoked while normal packet * reception... */ - if (unlikely(atomic_read(&tp_vars->sending) == 0)) + if (unlikely(batadv_tp_sender_stopped(tp_vars))) /* timer ref will be dropped in batadv_tp_sender_cleanup */ return; @@ -499,7 +510,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t) struct batadv_tp_vars *tp_vars = timer_container_of(tp_vars, t, timer); struct batadv_priv *bat_priv = tp_vars->bat_priv; - if (atomic_read(&tp_vars->sending) == 0) + if (batadv_tp_sender_stopped(tp_vars)) return; /* if the user waited long enough...shutdown the test */ @@ -661,7 +672,7 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, if (unlikely(tp_vars->role != BATADV_TP_SENDER)) goto out; - if (unlikely(atomic_read(&tp_vars->sending) == 0)) + if (unlikely(batadv_tp_sender_stopped(tp_vars))) goto out; /* old ACK? silently drop it.. */ @@ -827,21 +838,21 @@ static int batadv_tp_send(void *arg) if (unlikely(tp_vars->role != BATADV_TP_SENDER)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end); if (unlikely(!orig_node)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } primary_if = batadv_primary_if_get_selected(bat_priv); if (unlikely(!primary_if)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } @@ -860,7 +871,7 @@ static int batadv_tp_send(void *arg) queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work, msecs_to_jiffies(tp_vars->test_length)); - while (atomic_read(&tp_vars->sending) != 0) { + while (!batadv_tp_sender_stopped(tp_vars)) { if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) { batadv_tp_wait_available(tp_vars, payload_len); continue; @@ -883,8 +894,7 @@ static int batadv_tp_send(void *arg) "Meter: %s() cannot send packets (%d)\n", __func__, err); /* ensure nobody else tries to stop the thread now */ - if (atomic_xchg(&tp_vars->sending, 0) == 1) - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); break; } @@ -1006,7 +1016,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, ether_addr_copy(tp_vars->other_end, dst); kref_init(&tp_vars->refcount); tp_vars->role = BATADV_TP_SENDER; - atomic_set(&tp_vars->sending, 1); + atomic_set(&tp_vars->send_result, 0); memcpy(tp_vars->session, session_id, sizeof(session_id)); tp_vars->icmp_uid = icmp_uid; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index c8c3e8064f00..fb0e4cb89d79 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1320,15 +1320,15 @@ struct batadv_tp_vars { /** @role: receiver/sender modi */ enum batadv_tp_meter_role role; - /** @sending: sending binary semaphore: 1 if sending, 0 is not */ - atomic_t sending; + /** + * @send_result: 0 when sending is ongoing and otherwise + * enum batadv_tp_meter_reason + */ + atomic_t send_result; /** @receiving: receiving binary semaphore: 1 if receiving, 0 is not */ atomic_t receiving; - /** @reason: reason for a stopped session */ - enum batadv_tp_meter_reason reason; - /** @finish_work: work item for the finishing procedure */ struct delayed_work finish_work; -- cgit v1.2.3 From ff24f2ecfd94c07a2b89bac497433e3b23271cac Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 May 2026 12:33:41 +0200 Subject: batman-adv: tp_meter: avoid role confusion in tp_list Session lookups in tp_list matched only on destination address (and optionally session ID), leaving role validation to the caller. If two sessions with the same other_end coexisted (one as sender, one as receiver) a lookup could silently return the wrong one, causing the caller's role to bail out early, potentially skipping necessary cleanup. Move the role check into the lookup functions themselves so the correct entry is always returned, or none at all. Since batadv_tp_start() legitimately needs to detect any active session to a destination regardless of role, introduce a dedicated helper for that case rather than bending the existing lookup semantics. Cc: stable@kernel.org Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Signed-off-by: Sven Eckelmann --- net/batman-adv/tp_meter.c | 59 +++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 3ce6d9b2c9f3..0fc4ca78e84e 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -255,6 +255,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * batadv_tp_list_find() - find a tp_vars object in the global list * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for + * @role: role of the session * * Look for a tp_vars object matching dst as end_point and return it after * having increment the refcounter. Return NULL is not found @@ -262,7 +263,8 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * Return: matching tp_vars or NULL when no tp_vars with @dst was found */ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, - const u8 *dst) + const u8 *dst, + enum batadv_tp_meter_role role) { struct batadv_tp_vars *pos, *tp_vars = NULL; @@ -271,6 +273,9 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, if (!batadv_compare_eth(pos->other_end, dst)) continue; + if (pos->role != role) + continue; + /* most of the time this function is invoked during the normal * process..it makes sens to pay more when the session is * finished and to speed the process up during the measurement @@ -286,12 +291,33 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, return tp_vars; } +/** + * batadv_tp_list_active() - check if session from/to destination is ongoing + * @bat_priv: the bat priv with all the mesh interface information + * @dst: the other endpoint MAC address to look for + * + * Return: if matching session with @dst was found + */ +static bool batadv_tp_list_active(struct batadv_priv *bat_priv, const u8 *dst) + __must_hold(&bat_priv->tp_list_lock) +{ + struct batadv_tp_vars *tp_vars; + + hlist_for_each_entry_rcu(tp_vars, &bat_priv->tp_list, list) { + if (batadv_compare_eth(tp_vars->other_end, dst)) + return true; + } + + return false; +} + /** * batadv_tp_list_find_session() - find tp_vars session object in the global * list * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * @session: session identifier + * @role: role of the session * * Look for a tp_vars object matching dst as end_point, session as tp meter * session and return it after having increment the refcounter. Return NULL @@ -301,7 +327,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, */ static struct batadv_tp_vars * batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, - const u8 *session) + const u8 *session, enum batadv_tp_meter_role role) { struct batadv_tp_vars *pos, *tp_vars = NULL; @@ -313,6 +339,9 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, if (memcmp(pos->session, session, sizeof(pos->session)) != 0) continue; + if (pos->role != role) + continue; + /* most of the time this function is invoked during the normal * process..it makes sense to pay more when the session is * finished and to speed the process up during the measurement @@ -665,13 +694,10 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, /* find the tp_vars */ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_SENDER); if (unlikely(!tp_vars)) return; - if (unlikely(tp_vars->role != BATADV_TP_SENDER)) - goto out; - if (unlikely(batadv_tp_sender_stopped(tp_vars))) goto out; @@ -980,10 +1006,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, return; } - tp_vars = batadv_tp_list_find(bat_priv, dst); - if (tp_vars) { + if (batadv_tp_list_active(bat_priv, dst)) { spin_unlock_bh(&bat_priv->tp_list_lock); - batadv_tp_vars_put(tp_vars); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: test to or from the same node already ongoing, aborting\n"); batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING, @@ -1104,18 +1128,14 @@ void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, if (!orig_node) return; - tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig); + tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig, BATADV_TP_SENDER); if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: trying to interrupt an already over connection\n"); goto out_put_orig_node; } - if (unlikely(tp_vars->role != BATADV_TP_SENDER)) - goto out_put_tp_vars; - batadv_tp_sender_shutdown(tp_vars, return_value); -out_put_tp_vars: batadv_tp_vars_put(tp_vars); out_put_orig_node: batadv_orig_node_put(orig_node); @@ -1371,7 +1391,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, goto out_unlock; tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_RECEIVER); if (tp_vars) goto out_unlock; @@ -1442,7 +1462,7 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, } } else { tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_RECEIVER); if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Unexpected packet from %pM!\n", @@ -1451,13 +1471,6 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, } } - if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) { - batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: dropping packet: not expected (role=%u)\n", - tp_vars->role); - goto out; - } - tp_vars->last_recv_time = jiffies; /* if the packet is a duplicate, it may be the case that an ACK has been -- cgit v1.2.3 From 20c2d6a20ca936f5aaa6dd40f73f262ac45c87cc Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 14 May 2026 19:22:02 +0200 Subject: batman-adv: mcast: fix use-after-free in orig_node RCU release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit batadv_mcast_purge_orig() removes entries from RCU-protected hlists but does not wait for an RCU grace period before returning. Concurrent RCU readers may still accesses references to those entries at the point of removal. RCU-protected readers trying to operate on entries like orig->mcast_want_all_ipv6_node will then access already freed memory. Fix this by moving batadv_mcast_purge_orig() to batadv_orig_node_release(), just before the call_rcu() invocation. This ensures RCU readers that were active at purge time have drained before the orig_node memory is reclaimed. Cc: stable@kernel.org Fixes: ab49886e3da7 ("batman-adv: Add IPv4 link-local/IPv6-ll-all-nodes multicast support") Acked-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/originator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index b3468ccab535..ad4921b659d9 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -835,8 +835,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) orig_node = container_of(rcu, struct batadv_orig_node, rcu); - batadv_mcast_purge_orig(orig_node); - batadv_frag_purge_orig(orig_node, NULL); kfree(orig_node->tt_buff); @@ -887,6 +885,8 @@ void batadv_orig_node_release(struct kref *ref) } spin_unlock_bh(&orig_node->vlan_list_lock); + batadv_mcast_purge_orig(orig_node); + call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } -- cgit v1.2.3 From 86ed2d96db1965e9008e919b1936145ae66540e3 Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Mon, 11 May 2026 11:02:10 +0530 Subject: drm/i915/display: Copy color pipeline from plane in the primary joiner pipe When copying plane color state in a joiner configuration, use the plane in the primary joiner pipe since it carries the pipeline number selected by the user-space. This assumes that all pipes in the joiner are symmetric in their plane color capabilities. Cc: stable@vger.kernel.org # v6.19+ Fixes: a78f1b6baf4d ("drm/i915/color: Add framework to program CSC") Tested-by: Vidya Srinivas Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Uma Shankar Signed-off-by: Ankit Nautiyal Link: https://patch.msgid.link/20260511053213.3122314-2-chaitanya.kumar.borah@intel.com (cherry picked from commit e8308fb5e05ca08ddfb8b46f6d947a6e3fd80cd7) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 5390ceb21ca4..82f445c83158 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -373,7 +373,7 @@ intel_plane_color_copy_uapi_to_hw_state(struct intel_plane_state *plane_state, bool changed = false; int i = 0; - iter_colorop = plane_state->uapi.color_pipeline; + iter_colorop = from_plane_state->uapi.color_pipeline; while (iter_colorop) { for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { -- cgit v1.2.3 From f87abd0c6604fb6cc31cc86fc7ccc6a576924352 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Mon, 11 May 2026 18:02:15 +0530 Subject: drm/i915/dp: Fix readback for target_rr in Adaptive Sync SDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct the bit-shift logic to properly readback the 10 bit target_rr from DB3 and DB4. v2: Align the style with readback for vtotal. (Ville) Fixes: 12ea89291603 ("drm/i915/dp: Add Read/Write support for Adaptive Sync SDP") Cc: Mitul Golani Cc: Ankit Nautiyal Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://patch.msgid.link/20260511123218.1589830-2-ankit.k.nautiyal@intel.com (cherry picked from commit f7abc4af2b19240a145a221461dfe756cc01d74a) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 50d34b4fdf82..6ef2a0043cda 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5303,7 +5303,7 @@ int intel_dp_as_sdp_unpack(struct drm_dp_as_sdp *as_sdp, as_sdp->length = sdp->sdp_header.HB3 & DP_ADAPTIVE_SYNC_SDP_LENGTH; as_sdp->mode = sdp->db[0] & DP_ADAPTIVE_SYNC_SDP_OPERATION_MODE; as_sdp->vtotal = (sdp->db[2] << 8) | sdp->db[1]; - as_sdp->target_rr = (u64)sdp->db[3] | ((u64)sdp->db[4] & 0x3); + as_sdp->target_rr = ((sdp->db[4] & 0x3) << 8) | sdp->db[3]; as_sdp->target_rr_divider = sdp->db[4] & 0x20 ? true : false; return 0; -- cgit v1.2.3 From fbceb39b536e40c2f7cc47ab42037bb7c2b7ced9 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Fri, 15 May 2026 12:57:53 +0300 Subject: drm/i915/psr: Add defininitions for INTEL_WA_REGISTER_CAPS DPCD register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EDP specification says: "If either VSC SDP is unable to be transmitted 100 ns before the SU region, the Source device may optionally transmit the VSC SDP during the prior video scan line’s HBlank period There is a Intel specific drm dp register currently containing bits related how TCON can support PSR2 with SDP on prior line." Unfortunately many panels are having problems in implementing this. So there is a custom Intel specific DPCD register (INTEL_WA_REGISTER_CAPS) to figure out if this is properly implemented on a panel or if panel doesn't require that 100 ns delay before the SU region. Here are the definitions in this custom DPCD address: 0 = Panel doesn't support SDP on prior line 1 = Panel supports SDP on prior line 2 = Panel doesn't have 100ns requirement 3 = Reserved Add definitions for this new register and it's values into new header intel_dpcd.h. v2: add INTEL_DPCD_ prefix to definitions Bspec: 74741 Signed-off-by: Jouni Högander Reviewed-by: Suraj Kandpal Link: https://patch.msgid.link/20260515095756.2799483-2-jouni.hogander@intel.com (cherry picked from commit 1da1c9294825f08f622c473480d185680c2a3b75) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_dpcd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 drivers/gpu/drm/i915/display/intel_dpcd.h diff --git a/drivers/gpu/drm/i915/display/intel_dpcd.h b/drivers/gpu/drm/i915/display/intel_dpcd.h new file mode 100644 index 000000000000..4aea5326f2ed --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dpcd.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef __INTEL_DPCD_H__ +#define __INTEL_DPCD_H__ + +#define INTEL_DPCD_INTEL_WA_REGISTER_CAPS 0x3f0 +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_EARLYSCANLINE_SDP_SUPPORT_MASK REG_GENMASK(1, 0) +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_FALL_BACK_TO_PSR1 0 +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITH_EARLY_SCANLINE 1 +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITHOUT_EARLY_SCANLINE 2 + +#endif /* __INTEL_DPCD_H__ */ -- cgit v1.2.3 From f30bece421a4ae34359254e1dc2a187a42b6af9b Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Fri, 15 May 2026 12:57:54 +0300 Subject: drm/i915/psr: Read Intel DPCD workaround register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read Intel DPCD workaround register and store it into intel_connector->dp.psr_caps. psr_caps was chosen as currently it contains only PSR workaround for PSR2 SDP on prior scanline implementation. Signed-off-by: Jouni Högander Reviewed-by: Suraj Kandpal Link: https://patch.msgid.link/20260515095756.2799483-3-jouni.hogander@intel.com (cherry picked from commit c48ff24d0f4ab7ad696b2d35ad64ce7e049c668c) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_psr.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index f6cd0a062090..9c7c357afb09 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -584,6 +584,7 @@ struct intel_connector { struct { u8 dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; + u8 intel_wa_dpcd; bool support; bool su_support; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 53c10ae76ab5..82eac4048382 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -43,6 +43,7 @@ #include "intel_display_wa.h" #include "intel_dmc.h" #include "intel_dp.h" +#include "intel_dpcd.h" #include "intel_dp_aux.h" #include "intel_dsb.h" #include "intel_frontbuffer.h" @@ -716,8 +717,14 @@ static void _psr_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *co connector->dp.psr_caps.su_support ? "" : "not "); } - if (connector->dp.psr_caps.su_support) + if (connector->dp.psr_caps.su_support) { + ret = drm_dp_dpcd_read_byte(&intel_dp->aux, + INTEL_DPCD_INTEL_WA_REGISTER_CAPS, + &connector->dp.psr_caps.intel_wa_dpcd); + if (ret < 0) + return; _psr_compute_su_granularity(intel_dp, connector); + } } void intel_psr_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector) -- cgit v1.2.3 From 4703049f768fc1c1caac754134118bee1a3af189 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Fri, 15 May 2026 12:57:55 +0300 Subject: drm/i915/psr: Apply Intel DPCD workaround when SDP on prior line used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is Intel specific workaround DPCD address containing workaround for case where SDP is on prior line. Apply this workaround according to values in the offset. Fixes: 61e887329e33 ("drm/i915/xelpd: Handle PSR2 SDP indication in the prior scanline") Cc: # v5.15+ Signed-off-by: Jouni Högander Reviewed-by: Suraj Kandpal Link: https://patch.msgid.link/20260515095756.2799483-4-jouni.hogander@intel.com (cherry picked from commit c3fe899fbeac86ea4a5ca9dd845b2cbc0da46249) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_psr.c | 35 ++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 82eac4048382..29904a037575 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1365,9 +1365,35 @@ static bool psr2_granularity_check(struct intel_crtc_state *crtc_state, return true; } -static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state) +static bool apply_scanline_indication_wa(struct intel_crtc_state *crtc_state, + struct intel_connector *connector) { + struct intel_dp *intel_dp = intel_attached_dp(connector); + u8 early_scanline_support = connector->dp.psr_caps.intel_wa_dpcd & + INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_EARLYSCANLINE_SDP_SUPPORT_MASK; + + if (intel_dp->edp_dpcd[0] >= DP_EDP_15) + return true; + + switch (early_scanline_support) { + case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_FALL_BACK_TO_PSR1: + crtc_state->req_psr2_sdp_prior_scanline = false; + return false; + case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITH_EARLY_SCANLINE: + return true; + case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITHOUT_EARLY_SCANLINE: + crtc_state->req_psr2_sdp_prior_scanline = false; + return true; + default: + MISSING_CASE(early_scanline_support); + return false; + } +} + +static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_crtc_state *crtc_state, + struct intel_connector *connector) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_display *display = to_intel_display(intel_dp); const struct drm_display_mode *adjusted_mode = &crtc_state->uapi.adjusted_mode; u32 hblank_total, hblank_ns, req_ns; @@ -1386,7 +1412,8 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d return false; crtc_state->req_psr2_sdp_prior_scanline = true; - return true; + + return apply_scanline_indication_wa(crtc_state, connector); } static int intel_psr_entry_setup_frames(struct intel_dp *intel_dp, @@ -1667,7 +1694,7 @@ static bool intel_sel_update_config_valid(struct intel_crtc_state *crtc_state, conn_state)) goto unsupported; - if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) { + if (!_compute_psr2_sdp_prior_scanline_indication(crtc_state, connector)) { drm_dbg_kms(display->drm, "Selective update not enabled, SDP indication do not fit in hblank\n"); goto unsupported; -- cgit v1.2.3 From aa3153bd139a6c48667dcd02608d3b2c80bff02c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 15 May 2026 22:00:40 +0200 Subject: batman-adv: iv: recover OGM scheduling after forward packet error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When batadv_iv_ogm_schedule_buff() fails to allocate and queue a forward packet for OGM transmission, the work item that drives periodic OGM scheduling is never re-armed. This silently halts transmission of the node's own OGMs on the affected interface — only OGMs from other peers continue to be aggregated and forwarded. Fix this by tracking whether batadv_iv_ogm_queue_add() (and transitively batadv_iv_ogm_aggregate_new()) successfully scheduled a forward packet. When scheduling fails, batadv_iv_ogm_schedule_buff() falls back to queuing a dedicated recovery work item (reschedule_work) that fires after one originator interval and calls batadv_iv_ogm_schedule() again. Cc: stable@kernel.org Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann --- net/batman-adv/bat_iv_ogm.c | 76 +++++++++++++++++++++++++++++++++------------ net/batman-adv/types.h | 3 ++ 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 7ad26128b5f7..b8b1b997960a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -224,6 +224,8 @@ static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = NULL; mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + + cancel_delayed_work_sync(&hard_iface->bat_iv.reschedule_work); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) @@ -536,8 +538,10 @@ out: * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm + * + * Return: whether forward packet was scheduled */ -static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, +static bool batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, struct batadv_hard_iface *if_incoming, @@ -561,13 +565,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!skb) - return; + return false; forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, queue_left, bat_priv, skb); if (!forw_packet_aggr) { kfree_skb(skb); - return; + return false; } forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; @@ -590,6 +594,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, batadv_iv_send_outstanding_bat_ogm_packet); batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time); + + return true; } /* aggregate a new packet into the existing ogm packet */ @@ -617,8 +623,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm * @send_time: timestamp (jiffies) when the packet is to be sent + * + * Return: whether forward packet was scheduled */ -static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, +static bool batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, unsigned char *packet_buff, int packet_len, struct batadv_hard_iface *if_incoming, @@ -670,14 +678,16 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, if (!own_packet && atomic_read(&bat_priv->aggregated_ogms)) send_time += max_aggregation_jiffies; - batadv_iv_ogm_aggregate_new(packet_buff, packet_len, - send_time, direct_link, - if_incoming, if_outgoing, - own_packet); + return batadv_iv_ogm_aggregate_new(packet_buff, packet_len, + send_time, direct_link, + if_incoming, if_outgoing, + own_packet); } else { batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, direct_link); spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + return true; } } @@ -790,6 +800,8 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) u32 seqno; u16 tvlv_len = 0; unsigned long send_time; + bool reschedule = false; + bool scheduled; int ret; lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); @@ -818,11 +830,8 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) ogm_buff_len, BATADV_OGM_HLEN); if (ret < 0) { - /* OGMs must be queued even when the buffer allocation for - * TVLVs failed. just fall back to the non-TVLV version - */ - ret = 0; - *ogm_buff_len = BATADV_OGM_HLEN; + reschedule = true; + goto out; } tvlv_len = ret; @@ -844,8 +853,11 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) /* OGMs from secondary interfaces are only scheduled on their * respective interfaces. */ - batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, - hard_iface, hard_iface, 1, send_time); + scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, + hard_iface, hard_iface, 1, send_time); + if (!scheduled) + reschedule = true; + goto out; } @@ -857,15 +869,28 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) continue; - batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, - *ogm_buff_len, hard_iface, - tmp_hard_iface, 1, send_time); - + scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, + *ogm_buff_len, hard_iface, + tmp_hard_iface, 1, send_time); batadv_hardif_put(tmp_hard_iface); + + if (!scheduled && tmp_hard_iface == hard_iface) + reschedule = true; } rcu_read_unlock(); out: + if (reschedule) { + /* there was a failure scheduling the own forward packet. + * as result, the batadv_iv_send_outstanding_bat_ogm_packet() + * work item is no longer scheduled. it is therefore necessary + * to reschedule it manually + */ + queue_delayed_work(batadv_event_workqueue, + &hard_iface->bat_iv.reschedule_work, + msecs_to_jiffies(atomic_read(&bat_priv->orig_interval))); + } + batadv_hardif_put(primary_if); } @@ -880,6 +905,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } +static void batadv_iv_ogm_reschedule(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct batadv_hard_iface *hard_iface; + + hard_iface = container_of(delayed_work, + struct batadv_hard_iface, + bat_iv.reschedule_work); + batadv_iv_ogm_schedule(hard_iface); +} + /** * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface * @orig_node: originator which reproadcasted the OGMs directly @@ -2272,6 +2308,8 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { + INIT_DELAYED_WORK(&hard_iface->bat_iv.reschedule_work, batadv_iv_ogm_reschedule); + /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index fb0e4cb89d79..821ada05d86a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -83,6 +83,9 @@ struct batadv_hard_iface_bat_iv { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + /** @reschedule_work: recover OGM schedule after schedule error */ + struct delayed_work reschedule_work; + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct mutex ogm_buff_mutex; }; -- cgit v1.2.3 From 0459430add32ea41f3e2ef9351610e6d33627a6b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 10 May 2026 11:43:20 +0200 Subject: batman-adv: bla: fix report_work leak on backbone_gw purge batadv_bla_purge_backbone_gw() removes stale backbone gateway entries, but fails to properly handle their associated report_work: - If report_work is running, the purge must wait for it to finish before freeing the backbone_gw, otherwise the worker may access freed memory (e.g. bat_priv). - If report_work is pending, the purge must cancel it and release the reference held for that pending work item. The previous implementation called hlist_for_each_entry_safe() inside a spin_lock_bh() section, but cancel_work_sync() may sleep and therefore cannot be called from within a spinlock-protected region. Restructure the loop to handle one entry per spinlock critical section: acquire the lock, find the next entry to purge, remove it from the hash list, then release the lock before calling cancel_work_sync() and dropping the hash_entry reference. Repeat until no more entries require purging. Cc: stable@kernel.org Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Reviewed-by: Simon Wunderlich Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 54 ++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index cec11f1251d6..df1dfdf4a1a1 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1224,6 +1224,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) struct hlist_head *head; struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ + bool purged; int i; hash = bat_priv->bla.backbone_hash; @@ -1234,30 +1235,45 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) head = &hash->table[i]; list_lock = &hash->list_locks[i]; - spin_lock_bh(list_lock); - hlist_for_each_entry_safe(backbone_gw, node_tmp, - head, hash_entry) { - if (now) - goto purge_now; - if (!batadv_has_timed_out(backbone_gw->lasttime, - BATADV_BLA_BACKBONE_TIMEOUT)) - continue; + do { + purged = false; - batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "%s(): backbone gw %pM timed out\n", - __func__, backbone_gw->orig); + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(backbone_gw, node_tmp, + head, hash_entry) { + if (now) + goto purge_now; + if (!batadv_has_timed_out(backbone_gw->lasttime, + BATADV_BLA_BACKBONE_TIMEOUT)) + continue; + + batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, + "%s(): backbone gw %pM timed out\n", + __func__, backbone_gw->orig); purge_now: - /* don't wait for the pending request anymore */ - if (atomic_read(&backbone_gw->request_sent)) - atomic_dec(&bat_priv->bla.num_requests); + purged = true; - batadv_bla_del_backbone_claims(backbone_gw); + /* don't wait for the pending request anymore */ + if (atomic_read(&backbone_gw->request_sent)) + atomic_dec(&bat_priv->bla.num_requests); - hlist_del_rcu(&backbone_gw->hash_entry); - batadv_backbone_gw_put(backbone_gw); - } - spin_unlock_bh(list_lock); + batadv_bla_del_backbone_claims(backbone_gw); + + hlist_del_rcu(&backbone_gw->hash_entry); + break; + } + spin_unlock_bh(list_lock); + + if (purged) { + /* reference for pending report_work */ + if (cancel_work_sync(&backbone_gw->report_work)) + batadv_backbone_gw_put(backbone_gw); + + /* reference for hash_entry */ + batadv_backbone_gw_put(backbone_gw); + } + } while (purged); } } -- cgit v1.2.3 From 83ab69bd12b80f6ea169c8bea6977701b53a043d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 12 May 2026 09:13:31 +0200 Subject: batman-adv: bla: avoid double decrement of bla.num_requests The bla.num_requests is increased when no request_sent was in progress. And it is decremented in various places (announcement was received, backbone is purged, periodic work). But the check if the request_sent is actually set to a specific state and the atomic_dec/_inc are not safe because they are not atomic (TOCTOU) and multiple such code portions can run concurrently. At the same time, it is necessary to modify request_sent (state) and bla.num_requests atomically. Otherwise batadv_bla_send_request() might set request_sent to 1 and is interrupted. batadv_handle_announce() can then set request_sent back to 0 and decrement num_requests before batadv_bla_send_request() incremented it. The two operations must therefore be locked. And since state (request_sent) and wait_periods are only accessed inside this lock, they can be converted to simpler datatypes. And to avoid that the bla.num_requests is touched by a parallel running context with a valid backbone_gw reference after batadv_bla_purge_backbone_gw() ran, a third state "stopped" is required to correctly signal that a backbone_gw is in the state of being cleaned up. Cc: stable@kernel.org Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 51 +++++++++++++++++++++++----------- net/batman-adv/mesh-interface.c | 1 + net/batman-adv/types.h | 39 ++++++++++++++++++++------ 3 files changed, 67 insertions(+), 24 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index df1dfdf4a1a1..1bef12e659cb 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -514,8 +514,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, entry->crc = BATADV_BLA_CRC_INIT; entry->bat_priv = bat_priv; spin_lock_init(&entry->crc_lock); - atomic_set(&entry->request_sent, 0); - atomic_set(&entry->wait_periods, 0); + entry->state = BATADV_BLA_BACKBONE_GW_SYNCED; + entry->wait_periods = 0; ether_addr_copy(entry->orig, orig); INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report); kref_init(&entry->refcount); @@ -544,9 +544,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, batadv_bla_send_announce(bat_priv, entry); /* this will be decreased in the worker thread */ - atomic_inc(&entry->request_sent); - atomic_set(&entry->wait_periods, BATADV_BLA_WAIT_PERIODS); - atomic_inc(&bat_priv->bla.num_requests); + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (entry->state == BATADV_BLA_BACKBONE_GW_SYNCED) { + entry->state = BATADV_BLA_BACKBONE_GW_UNSYNCED; + entry->wait_periods = BATADV_BLA_WAIT_PERIODS; + atomic_inc(&bat_priv->bla.num_requests); + } + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } return entry; @@ -649,10 +653,12 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) backbone_gw->vid, BATADV_CLAIM_TYPE_REQUEST); /* no local broadcasts should be sent or received, for now. */ - if (!atomic_read(&backbone_gw->request_sent)) { + spin_lock_bh(&backbone_gw->bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_SYNCED) { + backbone_gw->state = BATADV_BLA_BACKBONE_GW_UNSYNCED; atomic_inc(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 1); } + spin_unlock_bh(&backbone_gw->bat_priv->bla.num_requests_lock); } /** @@ -873,10 +879,12 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, /* if we have sent a request and the crc was OK, * we can allow traffic again. */ - if (atomic_read(&backbone_gw->request_sent)) { + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) { + backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED; atomic_dec(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 0); } + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } batadv_backbone_gw_put(backbone_gw); @@ -1255,9 +1263,13 @@ purge_now: purged = true; /* don't wait for the pending request anymore */ - if (atomic_read(&backbone_gw->request_sent)) + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) atomic_dec(&bat_priv->bla.num_requests); + backbone_gw->state = BATADV_BLA_BACKBONE_GW_STOPPED; + spin_unlock_bh(&bat_priv->bla.num_requests_lock); + batadv_bla_del_backbone_claims(backbone_gw); hlist_del_rcu(&backbone_gw->hash_entry); @@ -1508,7 +1520,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) batadv_bla_send_loopdetect(bat_priv, backbone_gw); - /* request_sent is only set after creation to avoid + /* state is only set to unsynced after creation to avoid * problems when we are not yet known as backbone gw * in the backbone. * @@ -1517,14 +1529,21 @@ static void batadv_bla_periodic_work(struct work_struct *work) * some grace time. */ - if (atomic_read(&backbone_gw->request_sent) == 0) - continue; + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state != BATADV_BLA_BACKBONE_GW_UNSYNCED) + goto unlock_next; - if (!atomic_dec_and_test(&backbone_gw->wait_periods)) - continue; + if (backbone_gw->wait_periods > 0) + backbone_gw->wait_periods--; + + if (backbone_gw->wait_periods > 0) + goto unlock_next; + backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED; atomic_dec(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 0); + +unlock_next: + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } rcu_read_unlock(); } diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c index 56ca1c1b83f2..e7aa45bc6b7a 100644 --- a/net/batman-adv/mesh-interface.c +++ b/net/batman-adv/mesh-interface.c @@ -787,6 +787,7 @@ static int batadv_meshif_init_late(struct net_device *dev) atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); + spin_lock_init(&bat_priv->bla.num_requests_lock); #endif atomic_set(&bat_priv->tp_num, 0); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 821ada05d86a..a01ee46d97f3 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1026,6 +1026,12 @@ struct batadv_priv_bla { /** @num_requests: number of bla requests in flight */ atomic_t num_requests; + /** + * @num_requests_lock: locks update num_requests + + * batadv_backbone_gw::state + batadv_backbone_gw::wait_periods update + */ + spinlock_t num_requests_lock; + /** * @claim_hash: hash table containing mesh nodes this host has claimed */ @@ -1672,6 +1678,27 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_BLA +enum batadv_bla_backbone_gw_state { + /** + * @BATADV_BLA_BACKBONE_GW_STOPPED: backbone gw is being removed + * and it must not longer work on requests + */ + BATADV_BLA_BACKBONE_GW_STOPPED, + + /** + * @BATADV_BLA_BACKBONE_GW_UNSYNCED: backbone was detected out + * of sync and a request was send. No traffic is forwarded until the + * situation is resolved + */ + BATADV_BLA_BACKBONE_GW_UNSYNCED, + + /** + * @BATADV_BLA_BACKBONE_GW_SYNCED: backbone is consider to be in + * sync. traffic can be forwarded + */ + BATADV_BLA_BACKBONE_GW_SYNCED, +}; + /** * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN */ @@ -1697,16 +1724,12 @@ struct batadv_bla_backbone_gw { /** * @wait_periods: grace time for bridge forward delays and bla group * forming at bootup phase - no bcast traffic is formwared until it has - * elapsed + * elapsed. Must only be access with num_requests_lock. */ - atomic_t wait_periods; + u8 wait_periods; - /** - * @request_sent: if this bool is set to true we are out of sync with - * this backbone gateway - no bcast traffic is formwared until the - * situation was resolved - */ - atomic_t request_sent; + /** @state: sync state. Must only be access with num_requests_lock. */ + enum batadv_bla_backbone_gw_state state; /** @crc: crc16 checksum over all claims */ u16 crc; -- cgit v1.2.3 From 73d01051e8040c0b1de7fd26b3b8d0c2ffa6895c Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 30 Apr 2026 21:49:42 +0200 Subject: drm/bridge: chipone-icn6211: use devm_drm_bridge_add in i2c probe Use devm_drm_bridge_add() so the bridge is released if probe fails after registration, and drop drm_bridge_remove() in chipone_i2c_probe. Signed-off-by: Osama Abdelkader Fixes: 8dde6f7452a1 ("drm: bridge: icn6211: Add I2C configuration support") Cc: stable@vger.kernel.org Reviewed-by: Luca Ceresoli Link: https://patch.msgid.link/20260430194944.78119-1-osama.abdelkader@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/chipone-icn6211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index 814713c5bea9..553a1df4688d 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -758,7 +758,9 @@ static int chipone_i2c_probe(struct i2c_client *client) dev_set_drvdata(dev, icn); i2c_set_clientdata(client, icn); - drm_bridge_add(&icn->bridge); + ret = devm_drm_bridge_add(dev, &icn->bridge); + if (ret) + return ret; return chipone_dsi_host_attach(icn); } -- cgit v1.2.3 From 9785df3fd67083ac10f6bde83a316286044a66f1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 7 Apr 2026 14:45:22 +0800 Subject: iommu/vt-d: Simplify calculate_psi_aligned_address() This is doing far too much math for the simple task of finding a power of 2 that fully spans the given range. Use fls directly on the xor which computes the common binary prefix. Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v2-895748900b39+5303-iommupt_inv_vtd_jgg@nvidia.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 51 +++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index be8410f0e841..fdc88817709f 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -254,37 +254,29 @@ void cache_tag_unassign_domain(struct dmar_domain *domain, } static unsigned long calculate_psi_aligned_address(unsigned long start, - unsigned long end, - unsigned long *_mask) + unsigned long last, + unsigned long *size_order) { - unsigned long pages = aligned_nrpages(start, end - start + 1); - unsigned long aligned_pages = __roundup_pow_of_two(pages); - unsigned long bitmask = aligned_pages - 1; - unsigned long mask = ilog2(aligned_pages); - unsigned long pfn = IOVA_PFN(start); - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; - + unsigned int sz_lg2; + + /* Compute a sz_lg2 that spans start and last */ + start &= GENMASK(BITS_PER_LONG - 1, VTD_PAGE_SHIFT); + sz_lg2 = fls_long(start ^ last); + if (sz_lg2 <= 12) { + *size_order = 0; + return start; + } + if (unlikely(sz_lg2 >= BITS_PER_LONG)) { /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. + * MAX_AGAW_PFN_WIDTH triggers full invalidation in all + * downstream users. */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; + *size_order = MAX_AGAW_PFN_WIDTH; + return 0; } - *_mask = mask; - - return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); + *size_order = sz_lg2 - VTD_PAGE_SHIFT; + return start & GENMASK(BITS_PER_LONG - 1, sz_lg2); } static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) @@ -441,12 +433,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, struct cache_tag *tag; unsigned long flags; - if (start == 0 && end == ULONG_MAX) { - addr = 0; - mask = MAX_AGAW_PFN_WIDTH; - } else { - addr = calculate_psi_aligned_address(start, end, &mask); - } + addr = calculate_psi_aligned_address(start, end, &mask); spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { -- cgit v1.2.3 From f80d3d98d2ff78d9e2fe5d68b1f45948c4f7bd24 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 19 May 2026 09:23:49 +0200 Subject: batman-adv: bla: avoid NULL-ptr deref for claim via dropped interface Without rtnl_lock held, a hardif might be retrieved as primary interface of a meshif, but then (while operating on this interface) getting decoupled from the mesh interface. In this case, the meshif still exists but the pointer from the primary hardif to the meshif is set to NULL. The mesh_iface must be checked first to be non-NULL before continuing to send an ARP request using meshif. Cc: stable@kernel.org Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code") Reported-by: Ido Schimmel Reported-by: syzbot+9fdcc9f05a98a540b816@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9fdcc9f05a98a540b816 Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1bef12e659cb..ffe854018bd3 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -356,12 +356,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; - mesh_iface = primary_if->mesh_iface; + mesh_iface = READ_ONCE(primary_if->mesh_iface); + if (!mesh_iface) + goto out; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, /* IP DST: 0.0.0.0 */ zeroip, - primary_if->mesh_iface, + mesh_iface, /* IP SRC: 0.0.0.0 */ zeroip, /* Ethernet DST: Broadcast */ -- cgit v1.2.3 From d45d5c819f2cd0b6b5d76a194a537a5f4aeefecb Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Thu, 30 Apr 2026 21:56:59 +0200 Subject: drm/bridge: megachips: remove bridge when irq request fails If devm_request_threaded_irq() fails after drm_bridge_add(), remove the bridge before returning. Keep drm_bridge_add() rather than devm_drm_bridge_add(): registration is tied to the STDP4028 device while ge_b850v3_register() may complete from either I2C probe; devm would not unwind the bridge if the other client's probe fails. Signed-off-by: Osama Abdelkader Fixes: fcfa0ddc18ed ("drm/bridge: Drivers for megachips-stdpxxxx-ge-b850v3-fw (LVDS-DP++)") Cc: stable@vger.kernel.org Reviewed-by: Luca Ceresoli Tested-by: Ian Ray Link: https://patch.msgid.link/20260430195700.80317-1-osama.abdelkader@gmail.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index c9e6505cbd88..2d02cc69f237 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -251,7 +251,6 @@ static void ge_b850v3_lvds_remove(void) goto out; drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); - ge_b850v3_lvds_ptr = NULL; out: mutex_unlock(&ge_b850v3_lvds_dev_mutex); @@ -261,6 +260,7 @@ static int ge_b850v3_register(void) { struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; struct device *dev = &stdp4028_i2c->dev; + int ret; /* drm bridge initialization */ ge_b850v3_lvds_ptr->bridge.ops = DRM_BRIDGE_OP_DETECT | @@ -277,11 +277,15 @@ static int ge_b850v3_register(void) if (!stdp4028_i2c->irq) return 0; - return devm_request_threaded_irq(&stdp4028_i2c->dev, - stdp4028_i2c->irq, NULL, - ge_b850v3_lvds_irq_handler, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, - "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); + ret = devm_request_threaded_irq(&stdp4028_i2c->dev, + stdp4028_i2c->irq, NULL, + ge_b850v3_lvds_irq_handler, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); + if (ret) + drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); + + return ret; } static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c) -- cgit v1.2.3 From ea17fc4d7dc2ba6459b1a318962960520201baf1 Mon Sep 17 00:00:00 2001 From: Xiangxu Yin Date: Fri, 27 Feb 2026 20:15:01 +0800 Subject: phy: qcom: qmp-usbc: Fix out-of-bounds array access in dp swing config swing_tbl and pre_emphasis_tbl are 4x4 arrays (valid indices 0-3), but the boundary check uses "> 4" instead of ">= 4", allowing index 4 to cause an out-of-bounds access. Reported-by: Dan Carpenter Fixes: 81791c45c8e0 ("phy: qcom: qmp-usbc: Add QCS615 USB/DP PHY config and DP mode support") Signed-off-by: Xiangxu Yin Reviewed-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260227-master-v1-1-8d91b9407fdb@oss.qualcomm.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c index c342479a3798..dff27d30fc99 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c @@ -794,7 +794,7 @@ static int qmp_v2_configure_dp_swing(struct qmp_usbc *qmp) p_level = max(p_level, dp_opts->pre[i]); } - if (v_level > 4 || p_level > 4) { + if (v_level >= 4 || p_level >= 4) { dev_err(qmp->dev, "Invalid v(%d) | p(%d) level)\n", v_level, p_level); return -EINVAL; -- cgit v1.2.3 From 4af7ad0e6d7aa4403dbb1dac7b9659b0421efcaa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:48 +0200 Subject: usb: typec: wcove: don't write past struct pd_message in wcove_read_rx_buffer() wcove_read_rx_buffer() copies the PD RX FIFO into the caller's struct pd_message with for (i = 0; i < USBC_RXINFO_RXBYTES(info); i++) regmap_read(wcove->regmap, USBC_RX_DATA + i, msg + i); which has two problems: USBC_RXINFO_RXBYTES() is a 5-bit field (max 31) while struct pd_message is 30 bytes (__le16 header + __le32 payload[PD_MAX_PAYLOAD], packed). The byte count latched in RXINFO is the number of bytes the port partner put on the wire, so a malicious partner that transmits a 31-byte frame can drive the loop one byte past the destination if the WCOVE BMC receiver does not enforce the PD object-count limit in hardware. The existing FIXME flagged this as unverified. Independently, regmap_read() takes an unsigned int * and stores a full unsigned int at the destination. Passing the byte pointer msg + i means each iteration writes four bytes; the high three are zero (val_bits is 8) and are normally overwritten by the next iteration, but the final iteration's high bytes are not. With RXBYTES == 30 the i == 29 iteration already writes three zero bytes past msg, which sits on the IRQ thread's stack in wcove_typec_irq(). Clamp the loop to sizeof(struct pd_message) and read each register into a local before storing only its low byte, so the copy can never exceed the destination regardless of what RXINFO reports. Assisted-by: gkh_clanker_t1000 Cc: stable Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/2026051347-clustered-deflected-9543@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/wcove.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/tcpm/wcove.c b/drivers/usb/typec/tcpm/wcove.c index 759c982bb16a..0e5a3e277c3e 100644 --- a/drivers/usb/typec/tcpm/wcove.c +++ b/drivers/usb/typec/tcpm/wcove.c @@ -444,9 +444,11 @@ static int wcove_start_toggling(struct tcpc_dev *tcpc, return regmap_write(wcove->regmap, USBC_CONTROL1, usbc_ctrl); } -static int wcove_read_rx_buffer(struct wcove_typec *wcove, void *msg) +static int wcove_read_rx_buffer(struct wcove_typec *wcove, + struct pd_message *msg) { - unsigned int info; + unsigned int info, val, len; + u8 *buf = (u8 *)msg; int ret; int i; @@ -454,12 +456,13 @@ static int wcove_read_rx_buffer(struct wcove_typec *wcove, void *msg) if (ret) return ret; - /* FIXME: Check that USBC_RXINFO_RXBYTES(info) matches the header */ + len = min(USBC_RXINFO_RXBYTES(info), sizeof(*msg)); - for (i = 0; i < USBC_RXINFO_RXBYTES(info); i++) { - ret = regmap_read(wcove->regmap, USBC_RX_DATA + i, msg + i); + for (i = 0; i < len; i++) { + ret = regmap_read(wcove->regmap, USBC_RX_DATA + i, &val); if (ret) return ret; + buf[i] = val; } return regmap_write(wcove->regmap, USBC_RXSTATUS, -- cgit v1.2.3 From 8a18f896e667df491331371b55d4ad644dc51d60 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:49 +0200 Subject: usb: typec: altmodes/displayport: validate count before reading Status Update VDO A broken/malicious device can send the incorrect count for a status update VDO, which will cause the kernel to read uninitialized stack data and send it off elsewhere. Fix this up by correctly verifying the count for the update object. Assisted-by: gkh_clanker_t1000 Cc: stable Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/2026051350-reacquire-sculpture-4244@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 35d9c3086990..263a89c5f324 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -405,6 +405,8 @@ static int dp_altmode_vdm(struct typec_altmode *alt, dp->state = DP_STATE_EXIT_PRIME; break; case DP_CMD_STATUS_UPDATE: + if (count < 2) + break; dp->data.status = *vdo; ret = dp_altmode_status_update(dp); break; -- cgit v1.2.3 From aa2f716327be1818e1cb156da8a2844804aaec2f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:50 +0200 Subject: usb: typec: tcpm/tcpci_maxim: validate header NDO against RX_BYTE_CNT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A broken/malicious port can transmit a CRC-valid frame whose header advertises up to seven data objects but whose body carries fewer than that. Check for this, and rightfully reject the message, instead of reading from uninitialized stack memory. Assisted-by: gkh_clanker_t1000 Cc: Heikki Krogerus Cc: "André Draszik" Cc: Badhri Jagan Sridharan Cc: Amit Sunil Dhamne Cc: stable Link: https://patch.msgid.link/2026051350-sitter-canopener-9045@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_maxim_core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci_maxim_core.c b/drivers/usb/typec/tcpm/tcpci_maxim_core.c index c0ee7e6959ed..7324139d51c8 100644 --- a/drivers/usb/typec/tcpm/tcpci_maxim_core.c +++ b/drivers/usb/typec/tcpm/tcpci_maxim_core.c @@ -181,6 +181,15 @@ static void process_rx(struct max_tcpci_chip *chip, u16 status) rx_buf_ptr = rx_buf + TCPC_RECEIVE_BUFFER_RX_BYTE_BUF_OFFSET; msg.header = cpu_to_le16(*(u16 *)rx_buf_ptr); rx_buf_ptr = rx_buf_ptr + sizeof(msg.header); + + if (count < TCPC_RECEIVE_BUFFER_RX_BYTE_BUF_OFFSET + sizeof(msg.header) + + pd_header_cnt_le(msg.header) * sizeof(msg.payload[0])) { + max_tcpci_write16(chip, TCPC_ALERT, TCPC_ALERT_RX_STATUS); + dev_err(chip->dev, "Invalid TCPC_RX_BYTE_CNT %d for header cnt %d\n", + count, pd_header_cnt_le(msg.header)); + return; + } + for (payload_index = 0; payload_index < pd_header_cnt_le(msg.header); payload_index++, rx_buf_ptr += sizeof(msg.payload[0])) msg.payload[payload_index] = cpu_to_le32(*(u32 *)rx_buf_ptr); -- cgit v1.2.3 From 8fbc349e8383125dd2d8de1c1e926279d398ab17 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:51 +0200 Subject: usb: typec: tcpm: validate VDO count in Discover Identity ACK handlers Properly validate the count passed from a device when calling svdm_consume_identity() or svdm_consume_identity_sop_prime() as the device-controlled value could index off of the static arrays, which could leak data. Assisted-by: gkh_clanker_t1000 Cc: Heikki Krogerus Cc: stable Reviewed-by: Badhri Jagan Sridharan Link: https://patch.msgid.link/2026051350-plated-salute-0efe@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 55fee96d3342..44dab6c32c33 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1855,6 +1855,9 @@ static void svdm_consume_identity(struct tcpm_port *port, const u32 *p, int cnt) u32 vdo = p[VDO_INDEX_IDH]; u32 product = p[VDO_INDEX_PRODUCT]; + if (cnt <= VDO_INDEX_PRODUCT) + return; + memset(&port->mode_data, 0, sizeof(port->mode_data)); port->partner_ident.id_header = vdo; @@ -1875,6 +1878,9 @@ static void svdm_consume_identity_sop_prime(struct tcpm_port *port, const u32 *p u32 product = p[VDO_INDEX_PRODUCT]; int svdm_version; + if (cnt <= VDO_INDEX_CABLE_1) + return; + /* * Attempt to consume identity only if cable currently is not set */ @@ -1898,7 +1904,7 @@ static void svdm_consume_identity_sop_prime(struct tcpm_port *port, const u32 *p switch (port->negotiated_rev_prime) { case PD_REV30: port->cable_desc.pd_revision = 0x0300; - if (port->cable_desc.active) + if (port->cable_desc.active && cnt > VDO_INDEX_CABLE_2) port->cable_ident.vdo[1] = p[VDO_INDEX_CABLE_2]; break; case PD_REV20: -- cgit v1.2.3 From 3389c149c68c3fea61910ad5d34f7bf3bff44e32 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:53 +0200 Subject: usb: typec: tcpm: bound altmode_desc[] per iteration in svdm_consume_modes() svdm_consume_modes() checks pmdata->altmodes against the array size once before the loop over the count, but forgot to check the bound at every point in the loop. In the well-behaved SVDM discovery flow this is harmless because each of at most SVID_DISCOVERY_MAX SVIDs contributes at most MODE_DISCOVERY_MAX modes, exactly filling altmode_desc[ALTMODE_DISCOVERY_MAX]. But the CMDT_RSP_ACK handler in tcpm_pd_svdm() does not correlate an incoming ACK with any request the port actually sent. Once port->partner is set, an unsolicited Discover Modes ACK is consumed unconditionally. A broken or malicious port partner can therefore drive altmodes to ALTMODE_DISCOVERY_MAX - 1 via the normal flow, and then send one extra Discover Modes ACK with seven VDOs. Because the pre-loop check passes, the loop could then writes up to five entries past altmode_desc[]. For mode_data_prime the next field in struct tcpm_port is the partner_altmode[] pointer array, which then receives partner-chosen SVID/VDO bytes. Move the bound check inside the loop so the array can never be indexed past ALTMODE_DISCOVERY_MAX regardless of how many VDOs the partner supplies or how the function was reached. Assisted-by: gkh_clanker_t1000 Cc: Badhri Jagan Sridharan Cc: Heikki Krogerus Cc: stable Link: https://patch.msgid.link/2026051351-reshuffle-skillful-90af@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 44dab6c32c33..ed5f745a8231 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1992,23 +1992,19 @@ static void svdm_consume_modes(struct tcpm_port *port, const u32 *p, int cnt, switch (rx_sop_type) { case TCPC_TX_SOP_PRIME: pmdata = &port->mode_data_prime; - if (pmdata->altmodes >= ARRAY_SIZE(port->plug_prime_altmode)) { - /* Already logged in svdm_consume_svids() */ - return; - } break; case TCPC_TX_SOP: pmdata = &port->mode_data; - if (pmdata->altmodes >= ARRAY_SIZE(port->partner_altmode)) { - /* Already logged in svdm_consume_svids() */ - return; - } break; default: return; } for (i = 1; i < cnt; i++) { + if (pmdata->altmodes >= ALTMODE_DISCOVERY_MAX) { + /* Already logged in svdm_consume_svids() */ + return; + } paltmode = &pmdata->altmode_desc[pmdata->altmodes]; memset(paltmode, 0, sizeof(*paltmode)); -- cgit v1.2.3 From 167dd8d12226587ee554f520aed0256b7769cd5d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:54 +0200 Subject: usb: typec: ucsi: displayport: NAK DP_CMD_CONFIGURE without a payload VDO ucsi_displayport_vdm() handles a DP_CMD_CONFIGURE by copying the first payload VDO from data[], but unlike the equivalent handler in altmodes/displayport.c it does not check that count covers a VDO beyond the header. A header-only Configure VDM (count == 1) would read one u32 past the caller's array. In the normal UCSI path the caller controls count, so this is hardening for non-standard delivery paths. NAK and bail when no configuration VDO is present, matching the generic DP altmode driver's existing guard. Assisted-by: gkh_clanker_t1000 Cc: Pooja Katiyar Cc: Johan Hovold Cc: stable Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/2026051351-vividly-flattered-eb3d@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 8aae80b457d7..67a0991a7b76 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -240,6 +240,10 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, dp->header |= VDO_CMDT(CMDT_RSP_ACK); break; case DP_CMD_CONFIGURE: + if (count < 2) { + dp->header |= VDO_CMDT(CMDT_RSP_NAK); + break; + } dp->data.conf = *data; if (ucsi_displayport_configure(dp)) { dp->header |= VDO_CMDT(CMDT_RSP_NAK); -- cgit v1.2.3 From 288a81a8507052bcfbf884d39a463c44c42c5fd9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 May 2026 17:52:55 +0200 Subject: usb: typec: ucsi: validate connector number in ucsi_connector_change() The connector number in a UCSI CCI notification is a 7-bit field supplied by the PPM. ucsi_connector_change() uses it to index the ucsi->connector[] array without checking it against the number of connectors the PPM reported at init time, so a buggy or malicious PPM (EC firmware, or an I2C-attached UCSI controller on the ccg / stm32g0 / glink transports) can drive schedule_work() on memory past the end of the array. Reject connector numbers that are zero or exceed cap.num_connectors before dereferencing the array. Assisted-by: gkh_clanker_t1000 Cc: Heikki Krogerus Cc: Benson Leung Cc: Jameson Thies Cc: Nathan Rebello Cc: Johan Hovold Cc: Pooja Katiyar Cc: Hsin-Te Yuan Cc: Abel Vesa Cc: stable Reviewed-by: Abel Vesa Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Link: https://patch.msgid.link/2026051351-truck-steadfast-df48@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 5b7ad9e99cb9..539dc706798d 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1380,13 +1380,22 @@ out_unlock: */ void ucsi_connector_change(struct ucsi *ucsi, u8 num) { - struct ucsi_connector *con = &ucsi->connector[num - 1]; + struct ucsi_connector *con; if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) { dev_dbg(ucsi->dev, "Early connector change event\n"); return; } + if (!num || num > ucsi->cap.num_connectors) { + dev_warn_ratelimited(ucsi->dev, + "Bogus connector change on %u (max %u)\n", + num, ucsi->cap.num_connectors); + return; + } + + con = &ucsi->connector[num - 1]; + if (!test_and_set_bit(EVENT_PENDING, &ucsi->flags)) schedule_work(&con->work); } -- cgit v1.2.3 From 49f8fcde68898f5033082e8155cd344dd54ef232 Mon Sep 17 00:00:00 2001 From: Hasan Basbunar Date: Tue, 5 May 2026 18:11:02 +0200 Subject: modpost: prevent stack buffer overflow in do_input_entry() and do_dmi_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several functions in scripts/mod/file2alias.c build the module alias string by repeatedly appending into a fixed-size on-stack buffer: char alias[256] = {}; ... sprintf(alias + strlen(alias), "%X,*", i); This pattern is unbounded and silently corrupts the stack when the formatted output exceeds the destination size. Two functions in this file are realistically reachable with input that overflows their buffer: 1. do_input_entry() appends across nine bitmap classes (evbit/keybit/relbit/absbit/mscbit/ledbit/sndbit/ffbit/swbit). The keybit case alone scans bits from INPUT_DEVICE_ID_KEY_MIN_INTERESTING (0x71) to INPUT_DEVICE_ID_KEY_MAX (0x2ff), 655 iterations; if a MODULE_DEVICE_TABLE(input, ...) populates keybit[] densely, the emission reaches ~3132 bytes — overflowing the 256-byte buffer by about 12x. include/linux/mod_devicetable.h declares storage for the full bit range ("keybit[INPUT_DEVICE_ID_KEY_MAX / BITS_PER_LONG + 1]"), so the worst case is reachable per the ABI. 2. do_dmi_entry() emits one ":**" segment per matched DMI field, up to 4 matches per dmi_system_id. Each substr is sized as char[79] in struct dmi_strmatch (mod_devicetable.h:584), and dmi_ascii_filter() copies it verbatim into the alias buffer without bounds. Worst case: 4 × (1 + 3 + 1 + 79 + 1) = 336 bytes into alias[256], an 80-byte overflow. No driver in the current tree triggers either case — every in-tree INPUT_DEVICE_ID_MATCH_KEYBIT user populates keybit[] very sparsely (1-3 bits), and no in-tree dmi_system_id has four maximally-long matches. The concern is defense-in-depth: both unbounded sprintf chains are silent stack-corruption primitives in a host build tool, and the buffer sizes have not been revisited since the corresponding code was first introduced. The other do_*_entry() handlers in this file (do_usb_entry, do_cpu_entry, do_typec_entry, ...) were audited and are bounded by their input field sizes (uint16 IDs, fixed-length keys); their alias buffers do not need this treatment. Reproduced under AddressSanitizer with a stand-alone harness mirroring do_input on a fully-populated keybit: ==18319==ERROR: AddressSanitizer: stack-buffer-overflow WRITE of size 2 at offset 288 in frame [32, 288) 'alias' #6 do_input poc.c:44 Stack-canary build: Abort trap: 6 (strlen(alias)=3134, cap was 256-1) Add a small alias_append() helper around vsnprintf with a remaining- space check and call fatal() on overflow, matching the modpost style for unrecoverable build conditions. do_input() takes the buffer size as a new parameter; do_input_entry() and do_dmi_entry() pass sizeof(alias) at every call site. dmi_ascii_filter() takes the remaining buffer size as well and aborts on truncation. This bounds every write into the on-stack buffers and turns the latent overflow into a clean build error if it is ever reached. Fixes: 1d8f430c15b3 ("[PATCH] Input: add modalias support") Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Hasan Basbunar Link: https://patch.msgid.link/20260505161102.44087-1-basbunarhasan@gmail.com Signed-off-by: Nicolas Schier --- scripts/mod/file2alias.c | 79 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 4e99393a35f1..2ad87a74bb03 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -651,7 +651,26 @@ static void do_vio_entry(struct module *mod, void *symval) module_alias_printf(mod, true, "%s", alias); } -static void do_input(char *alias, +static void __attribute__((format(printf, 3, 4))) +alias_append(char *alias, size_t size, const char *fmt, ...) +{ + size_t len = strlen(alias); + va_list args; + int n; + + if (len >= size) + fatal("alias buffer (%zu) overflow before append\n", size); + + va_start(args, fmt); + n = vsnprintf(alias + len, size - len, fmt, args); + va_end(args); + + if (n < 0 || (size_t)n >= size - len) + fatal("alias buffer (%zu) overflow on append (need %d, have %zu)\n", + size, n, size - len); +} + +static void do_input(char *alias, size_t size, kernel_ulong_t *arr, unsigned int min, unsigned int max) { unsigned int i; @@ -659,13 +678,14 @@ static void do_input(char *alias, for (i = min; i <= max; i++) if (get_unaligned_native(arr + i / BITS_PER_LONG) & (1ULL << (i % BITS_PER_LONG))) - sprintf(alias + strlen(alias), "%X,*", i); + alias_append(alias, size, "%X,*", i); } /* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ static void do_input_entry(struct module *mod, void *symval) { char alias[256] = {}; + const size_t sizeof_alias = sizeof(alias); DEF_FIELD(symval, input_device_id, flags); DEF_FIELD(symval, input_device_id, bustype); @@ -687,35 +707,35 @@ static void do_input_entry(struct module *mod, void *symval) ADD(alias, "p", flags & INPUT_DEVICE_ID_MATCH_PRODUCT, product); ADD(alias, "e", flags & INPUT_DEVICE_ID_MATCH_VERSION, version); - sprintf(alias + strlen(alias), "-e*"); + alias_append(alias, sizeof_alias, "-e*"); if (flags & INPUT_DEVICE_ID_MATCH_EVBIT) - do_input(alias, *evbit, 0, INPUT_DEVICE_ID_EV_MAX); - sprintf(alias + strlen(alias), "k*"); + do_input(alias, sizeof_alias, *evbit, 0, INPUT_DEVICE_ID_EV_MAX); + alias_append(alias, sizeof_alias, "k*"); if (flags & INPUT_DEVICE_ID_MATCH_KEYBIT) - do_input(alias, *keybit, + do_input(alias, sizeof_alias, *keybit, INPUT_DEVICE_ID_KEY_MIN_INTERESTING, INPUT_DEVICE_ID_KEY_MAX); - sprintf(alias + strlen(alias), "r*"); + alias_append(alias, sizeof_alias, "r*"); if (flags & INPUT_DEVICE_ID_MATCH_RELBIT) - do_input(alias, *relbit, 0, INPUT_DEVICE_ID_REL_MAX); - sprintf(alias + strlen(alias), "a*"); + do_input(alias, sizeof_alias, *relbit, 0, INPUT_DEVICE_ID_REL_MAX); + alias_append(alias, sizeof_alias, "a*"); if (flags & INPUT_DEVICE_ID_MATCH_ABSBIT) - do_input(alias, *absbit, 0, INPUT_DEVICE_ID_ABS_MAX); - sprintf(alias + strlen(alias), "m*"); + do_input(alias, sizeof_alias, *absbit, 0, INPUT_DEVICE_ID_ABS_MAX); + alias_append(alias, sizeof_alias, "m*"); if (flags & INPUT_DEVICE_ID_MATCH_MSCIT) - do_input(alias, *mscbit, 0, INPUT_DEVICE_ID_MSC_MAX); - sprintf(alias + strlen(alias), "l*"); + do_input(alias, sizeof_alias, *mscbit, 0, INPUT_DEVICE_ID_MSC_MAX); + alias_append(alias, sizeof_alias, "l*"); if (flags & INPUT_DEVICE_ID_MATCH_LEDBIT) - do_input(alias, *ledbit, 0, INPUT_DEVICE_ID_LED_MAX); - sprintf(alias + strlen(alias), "s*"); + do_input(alias, sizeof_alias, *ledbit, 0, INPUT_DEVICE_ID_LED_MAX); + alias_append(alias, sizeof_alias, "s*"); if (flags & INPUT_DEVICE_ID_MATCH_SNDBIT) - do_input(alias, *sndbit, 0, INPUT_DEVICE_ID_SND_MAX); - sprintf(alias + strlen(alias), "f*"); + do_input(alias, sizeof_alias, *sndbit, 0, INPUT_DEVICE_ID_SND_MAX); + alias_append(alias, sizeof_alias, "f*"); if (flags & INPUT_DEVICE_ID_MATCH_FFBIT) - do_input(alias, *ffbit, 0, INPUT_DEVICE_ID_FF_MAX); - sprintf(alias + strlen(alias), "w*"); + do_input(alias, sizeof_alias, *ffbit, 0, INPUT_DEVICE_ID_FF_MAX); + alias_append(alias, sizeof_alias, "w*"); if (flags & INPUT_DEVICE_ID_MATCH_SWBIT) - do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); + do_input(alias, sizeof_alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); module_alias_printf(mod, false, "input:%s", alias); } @@ -895,12 +915,16 @@ static const struct dmifield { { NULL, DMI_NONE } }; -static void dmi_ascii_filter(char *d, const char *s) +static void dmi_ascii_filter(char *d, size_t avail, const char *s) { /* Filter out characters we don't want to see in the modalias string */ for (; *s; s++) - if (*s > ' ' && *s < 127 && *s != ':') + if (*s > ' ' && *s < 127 && *s != ':') { + if (avail <= 1) + fatal("%s: alias buffer overflow\n", __func__); *(d++) = *s; + avail--; + } *d = 0; } @@ -909,6 +933,8 @@ static void dmi_ascii_filter(char *d, const char *s) static void do_dmi_entry(struct module *mod, void *symval) { char alias[256] = {}; + const size_t sizeof_alias = sizeof(alias); + size_t len; int i, j; DEF_FIELD_ADDR(symval, dmi_system_id, matches); @@ -916,11 +942,12 @@ static void do_dmi_entry(struct module *mod, void *symval) for (j = 0; j < 4; j++) { if ((*matches)[j].slot && (*matches)[j].slot == dmi_fields[i].field) { - sprintf(alias + strlen(alias), ":%s*", - dmi_fields[i].prefix); - dmi_ascii_filter(alias + strlen(alias), + alias_append(alias, sizeof_alias, ":%s*", + dmi_fields[i].prefix); + len = strlen(alias); + dmi_ascii_filter(alias + len, sizeof_alias - len, (*matches)[j].substr); - strcat(alias, "*"); + alias_append(alias, sizeof_alias, "*"); } } } -- cgit v1.2.3 From 202550713128da20d9381d6d2dc0f6b73839f434 Mon Sep 17 00:00:00 2001 From: Viktor Jägersküpper Date: Fri, 15 May 2026 23:58:45 +0200 Subject: kbuild: pacman-pkg: make "rc" releases adhere to pacman versioning scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The package versioning scheme does not enable smooth upgrades from "rc" releases to the corresponding stable releases (e.g. 7.0.0-rc7 -> 7.0.0) because pacman considers that a downgrade due to the underscore in pkgver (e.g. 7.0.0_rc7), see e.g. vercmp(8) for an explanation of the package version comparison used by pacman. Package versions which are derived from said releases (e.g. built from git revisions) are similarly affected. Fix this by modifying pkgver in order to remove the hyphen from kernel versions containing "-rcN", where N is a non-negative integer. Acked-by: Thomas Weißschuh Signed-off-by: Viktor Jägersküpper Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://patch.msgid.link/20260515215913.92481-1-viktor_jaegerskuepper@freenet.de Fixes: c8578539deba ("kbuild: add script and target to generate pacman package") Signed-off-by: Nicolas Schier --- scripts/package/PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index 452374d63c24..1213c8e04671 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -10,7 +10,7 @@ for pkg in $_extrapackages; do pkgname+=("${pkgbase}-${pkg}") done -pkgver="${KERNELRELEASE//-/_}" +pkgver="$(echo "${KERNELRELEASE}" | sed 's/-\(rc[0-9]\+\)/\1/;s/-/_/g')" # The PKGBUILD is evaluated multiple times. # Running scripts/build-version from here would introduce inconsistencies. pkgrel="${KBUILD_REVISION}" -- cgit v1.2.3 From e824e40d0e841fab66ab7897d6c7b14dc81c66a7 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 14 May 2026 15:04:21 +0100 Subject: net: dsa: mt7530: fix FDB entries not aging out with short timeout The DSA forwarding selftests bridge_vlan_aware.sh and bridge_vlan_unaware.sh configure the bridge with ageing_time set to LOW_AGEING_TIME (1000 centiseconds, i.e. 10 seconds) and then run learning_test() in lib.sh, which expects a learned FDB entry to be removed after ageing_time + 10 seconds. On MT7530/MT7531 the entry persisted past the deadline and the "Found FDB record when should not" assertion failed. With msecs=10000, the algorithm in mt7530_set_ageing_time() finds AGE_CNT=0 and AGE_UNIT=9 as the first exact match (starting the search from tmp_age_count=0). The per-entry aging counter is initialized to AGE_CNT when a MAC address is learned, so with AGE_CNT=0 new entries start with a counter value of 0, which the hardware treats as "already aged" and never removes, effectively disabling aging. Fix this by starting the search from tmp_age_count=1 to ensure entries always have a non-zero initial aging counter. For a 10-second ageing time this yields AGE_CNT=1 and AGE_UNIT=4 instead: the timer ticks every 5 seconds and entries are removed after 2 ticks. Starting the search at AGE_CNT=1 raises the minimum representable ageing time from 1 to 2 seconds. Without bounds, a stale ageing_time of 1 second would now make the loop fall through without setting age_count and age_unit, leaving them uninitialized when written to the MT7530_AAC hardware register. Set ds->ageing_time_min and ds->ageing_time_max so the DSA core validates the range before the callback is invoked, and drop the now-redundant range check from mt7530_set_ageing_time(). Fixes: ea6d5c924e39 ("net: dsa: mt7530: support setting ageing time") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/7788ded12dc07b1bce329ec35fa70f4b45f3f9b7.1778766629.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/dsa/mt7530.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 44d670904ad8..cd311dfd3600 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1023,12 +1023,16 @@ mt7530_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) unsigned int age_count; unsigned int age_unit; - /* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds */ - if (secs < 1 || secs > (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1)) - return -ERANGE; - - /* iterate through all possible age_count to find the closest pair */ - for (tmp_age_count = 0; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) { + /* Applied timer is (AGE_CNT + 1) * (AGE_UNIT + 1) seconds. + * The DSA core has already validated the range using + * ds->ageing_time_min and ds->ageing_time_max. + * + * Iterate through all possible age_count values to find the closest + * pair. Start from 1 because the per-entry aging counter is + * initialized to AGE_CNT and a value of 0 means the entry will + * never be aged out. + */ + for (tmp_age_count = 1; tmp_age_count <= AGE_CNT_MAX; ++tmp_age_count) { unsigned int tmp_age_unit = secs / (tmp_age_count + 1) - 1; if (tmp_age_unit <= AGE_UNIT_MAX) { @@ -2428,6 +2432,8 @@ mt7530_setup(struct dsa_switch *ds) ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; + ds->ageing_time_min = 2 * 1000; + ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000; if (priv->id == ID_MT7530) { regulator_set_voltage(priv->core_pwr, 1000000, 1000000); @@ -2617,6 +2623,8 @@ mt7531_setup_common(struct dsa_switch *ds) ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; + ds->ageing_time_min = 2 * 1000; + ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000; mt753x_trap_frames(priv); -- cgit v1.2.3 From 3ac85bcfd404b588298c95c6fba8aad4ad334f57 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 14 May 2026 15:04:35 +0100 Subject: net: dsa: mt7530: preserve VLAN tags on trapped link-local frames The BPC, RGAC1 and RGAC2 registers control the handling of link-local frames with reserved MAC DAs (01:80:C2:00:00:0x). These frames are correctly trapped to the CPU port, but the egress VLAN tag attribute was set to MT7530_VLAN_EG_UNTAGGED which causes the switch to strip any VLAN tags from trapped frames before they reach the CPU. This causes VLAN-tagged link-local frames (STP BPDUs, LLDP, PTP Peer Delay Requests) to arrive at the CPU without their VLAN tag, so they are delivered to the base network interface instead of the VLAN sub-interface. The DSA local_termination selftest confirms this: all link-local protocol tests on VLAN upper interfaces fail. Set the EG_TAG attribute to MT7530_VLAN_EG_DISABLED (system default) so that the switch does not modify VLAN tags in trapped frames. This way VLAN-tagged frames retain their original tag and are delivered to the correct VLAN sub-interface, matching the behavior of non-trapped frames which pass through without VLAN tag modification. Fixes: 69ddba9d170b ("net: dsa: mt7530: fix handling of all link-local frames") Signed-off-by: Daniel Golle Acked-by: Chester A. Unal Link: https://patch.msgid.link/891e0cd34db2a5fe20ceb73283a81fb5f71427ca.1778766629.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/dsa/mt7530.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index cd311dfd3600..4f657ef6aa65 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1300,37 +1300,40 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface) static void mt753x_trap_frames(struct mt7530_priv *priv) { - /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them - * VLAN-untagged. + /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress + * them with the EG_TAG attribute set to disabled (system default) + * so that any VLAN tags in the frame are not modified by the + * switch egress VLAN tag processing. This preserves VLAN tags + * for reception on VLAN sub-interfaces. */ mt7530_rmw(priv, MT753X_BPC, PAE_BPDU_FR | PAE_EG_TAG_MASK | PAE_PORT_FW_MASK | BPDU_EG_TAG_MASK | BPDU_PORT_FW_MASK, - PAE_BPDU_FR | PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + PAE_BPDU_FR | PAE_EG_TAG(MT7530_VLAN_EG_DISABLED) | PAE_PORT_FW(TO_CPU_FW_CPU_ONLY) | - BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + BPDU_EG_TAG(MT7530_VLAN_EG_DISABLED) | TO_CPU_FW_CPU_ONLY); - /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress - * them VLAN-untagged. + /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and + * egress them with EG_TAG disabled. */ mt7530_rmw(priv, MT753X_RGAC1, R02_BPDU_FR | R02_EG_TAG_MASK | R02_PORT_FW_MASK | R01_BPDU_FR | R01_EG_TAG_MASK | R01_PORT_FW_MASK, - R02_BPDU_FR | R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R02_BPDU_FR | R02_EG_TAG(MT7530_VLAN_EG_DISABLED) | R02_PORT_FW(TO_CPU_FW_CPU_ONLY) | R01_BPDU_FR | - R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R01_EG_TAG(MT7530_VLAN_EG_DISABLED) | TO_CPU_FW_CPU_ONLY); - /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress - * them VLAN-untagged. + /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and + * egress them with EG_TAG disabled. */ mt7530_rmw(priv, MT753X_RGAC2, R0E_BPDU_FR | R0E_EG_TAG_MASK | R0E_PORT_FW_MASK | R03_BPDU_FR | R03_EG_TAG_MASK | R03_PORT_FW_MASK, - R0E_BPDU_FR | R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R0E_BPDU_FR | R0E_EG_TAG(MT7530_VLAN_EG_DISABLED) | R0E_PORT_FW(TO_CPU_FW_CPU_ONLY) | R03_BPDU_FR | - R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) | + R03_EG_TAG(MT7530_VLAN_EG_DISABLED) | TO_CPU_FW_CPU_ONLY); } -- cgit v1.2.3 From 2c4c76cacc9d5553f4c3342eb332d7123a4c3f14 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 14 May 2026 15:04:50 +0100 Subject: net: dsa: mt7530: fix CPU port VLAN not being reset to unaware After a VLAN-aware bridge is destroyed, creating any VLAN-unaware bridge loses all connectivity. The VID 0 VLAN table entry used by VLAN-unaware ports in FALLBACK mode gets corrupted during VLAN-aware operation: mt7530_hw_vlan_add() overwrites its EG_CON flag with VTAG_EN and bridge teardown removes ports from its PORT_MEM. The cleanup code that should restore it never runs because the current port's dp->vlan_filtering flag is still true when checked (DSA updates it only after the driver callback returns). Even when restored, the deferred VLAN deletion events from the switchdev workqueue can corrupt VID 0 again after the restoration. Skip the current port in the all_user_ports_removed check, call mt7530_setup_vlan0() to restore the VID 0 entry, and protect VID 0 from being modified by bridge VLAN operations in port_vlan_add and port_vlan_del since it is managed exclusively by mt7530_setup_vlan0(). Remove the CPU port PCR and PVC register writes which were clobbering PORT_VLAN mode and VLAN_ATTR with wrong values. Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/da8bdaf08b2427a9057e6cb33e26d41f8a8d5000.1778766629.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/dsa/mt7530.c | 111 ++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 49 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 4f657ef6aa65..752ba92b0851 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1623,6 +1623,49 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, return 0; } +static int +mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid) +{ + struct mt7530_dummy_poll p; + u32 val; + int ret; + + val = VTCR_BUSY | VTCR_FUNC(cmd) | vid; + mt7530_write(priv, MT7530_VTCR, val); + + INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_VTCR); + ret = readx_poll_timeout(_mt7530_read, &p, val, + !(val & VTCR_BUSY), 20, 20000); + if (ret < 0) { + dev_err(priv->dev, "poll timeout\n"); + return ret; + } + + val = mt7530_read(priv, MT7530_VTCR); + if (val & VTCR_INVALID) { + dev_err(priv->dev, "read VTCR invalid\n"); + return -EINVAL; + } + + return 0; +} + +static int +mt7530_setup_vlan0(struct mt7530_priv *priv) +{ + u32 val; + + /* Validate the entry with independent learning, keep the original + * ingress tag attribute. + */ + val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) | + VLAN_VALID; + mt7530_write(priv, MT7530_VAWD1, val); + mt7530_write(priv, MT7530_VAWD2, 0); + + return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0); +} + static void mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) { @@ -1648,6 +1691,8 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) G0_PORT_VID_DEF); for (i = 0; i < priv->ds->num_ports; i++) { + if (i == port) + continue; if (dsa_is_user_port(ds, i) && dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) { all_user_ports_removed = false; @@ -1659,13 +1704,9 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) * the CPU port get out of VLAN filtering mode. */ if (all_user_ports_removed) { - struct dsa_port *dp = dsa_to_port(ds, port); - struct dsa_port *cpu_dp = dp->cpu_dp; - - mt7530_write(priv, MT7530_PCR_P(cpu_dp->index), - PCR_MATRIX(dsa_user_ports(priv->ds))); - mt7530_write(priv, MT7530_PVC_P(cpu_dp->index), PORT_SPEC_TAG - | PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + mutex_lock(&priv->reg_mutex); + mt7530_setup_vlan0(priv); + mutex_unlock(&priv->reg_mutex); } } @@ -1853,33 +1894,6 @@ mt7530_port_mdb_del(struct dsa_switch *ds, int port, return ret; } -static int -mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid) -{ - struct mt7530_dummy_poll p; - u32 val; - int ret; - - val = VTCR_BUSY | VTCR_FUNC(cmd) | vid; - mt7530_write(priv, MT7530_VTCR, val); - - INIT_MT7530_DUMMY_POLL(&p, priv, MT7530_VTCR); - ret = readx_poll_timeout(_mt7530_read, &p, val, - !(val & VTCR_BUSY), 20, 20000); - if (ret < 0) { - dev_err(priv->dev, "poll timeout\n"); - return ret; - } - - val = mt7530_read(priv, MT7530_VTCR); - if (val & VTCR_INVALID) { - dev_err(priv->dev, "read VTCR invalid\n"); - return -EINVAL; - } - - return 0; -} - static int mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack) @@ -1984,21 +1998,6 @@ mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid, mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, vid); } -static int -mt7530_setup_vlan0(struct mt7530_priv *priv) -{ - u32 val; - - /* Validate the entry with independent learning, keep the original - * ingress tag attribute. - */ - val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) | - VLAN_VALID; - mt7530_write(priv, MT7530_VAWD1, val); - - return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0); -} - static int mt7530_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, @@ -2011,9 +2010,18 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, mutex_lock(&priv->reg_mutex); + /* VID 0 is managed exclusively by mt7530_setup_vlan0() for + * VLAN-unaware bridge operation. Don't let the bridge overwrite + * its EG_CON flag with VTAG_EN and corrupt PORT_MEM. + */ + if (vlan->vid == 0) + goto skip_vlan_table; + mt7530_hw_vlan_entry_init(&new_entry, port, untagged); mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add); +skip_vlan_table: + if (pvid) { priv->ports[port].pvid = vlan->vid; @@ -2053,10 +2061,15 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, mutex_lock(&priv->reg_mutex); + /* VID 0 is managed exclusively by mt7530_setup_vlan0(). */ + if (vlan->vid == 0) + goto skip_vlan_table; + mt7530_hw_vlan_entry_init(&target_entry, port, 0); mt7530_hw_vlan_update(priv, vlan->vid, &target_entry, mt7530_hw_vlan_del); +skip_vlan_table: /* PVID is being restored to the default whenever the PVID port * is being removed from the VLAN. */ -- cgit v1.2.3 From 4cb3cd670b2a29e52dd3cfd6463e44121674c9b8 Mon Sep 17 00:00:00 2001 From: Edward Parker Date: Thu, 14 May 2026 15:05:12 +0100 Subject: net: dsa: mt7530: untag VLAN-aware bridge PVID With bridge VLAN filtering enabled on a port configured as untagged member of the bridge PVID, ingress untagged frames do not reach the corresponding bridge VLAN upper interface (br-lan.). ARP and similar traffic is visible on the physical port but not delivered to the VLAN sub-interface. The MT7530/MT7531 forwards frames to the CPU port with the user port's PVID tag applied even when the frame ingressed untagged on the wire, because the CPU port is set to MT7530_VLAN_EG_CONSISTENT and is a tagged member of the VLAN entry created for the bridge VLAN. The DSA core then sees a hwaccel-tagged frame whose VID matches the port's PVID, which the bridge does not treat as the untagged-on-the-wire frame that the user expects. Set ds->untag_vlan_aware_bridge_pvid in the mt7530 and mt7531 setup paths so the DSA core strips that hwaccel tag in software when the parsed VID matches the bridge port's PVID, restoring the on-the-wire frame as the bridge expects to see it. Link: https://github.com/openwrt/openwrt/issues/18576 Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530") Signed-off-by: Edward Parker [daniel@makrotopia.org: improve commit message] Signed-off-by: Daniel Golle Link: https://patch.msgid.link/85d25ea1b26d3c907f815649f2e0bde6560282a3.1778766629.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/dsa/mt7530.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 752ba92b0851..3c2a3029b10c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2447,6 +2447,7 @@ mt7530_setup(struct dsa_switch *ds) } ds->assisted_learning_on_cpu_port = true; + ds->untag_vlan_aware_bridge_pvid = true; ds->mtu_enforcement_ingress = true; ds->ageing_time_min = 2 * 1000; ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000; @@ -2638,6 +2639,7 @@ mt7531_setup_common(struct dsa_switch *ds) int ret, i; ds->assisted_learning_on_cpu_port = true; + ds->untag_vlan_aware_bridge_pvid = true; ds->mtu_enforcement_ingress = true; ds->ageing_time_min = 2 * 1000; ds->ageing_time_max = (AGE_CNT_MAX + 1) * (AGE_UNIT_MAX + 1) * 1000; -- cgit v1.2.3 From 023453cb7eb0f53c5dc36babed8e706c1b0b0187 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 5 May 2018 07:57:10 -0500 Subject: i2c: smbus: fix a potential uninitialization bug In i2c_smbus_xfer_emulated(), there are two buffers: msgbuf0 and msgbuf1, which are used to save a series of messages, as mentioned in the comment. According to the value of the variable 'size', msgbuf0 is initialized to various values. In contrast, msgbuf1 is left uninitialized until the function i2c_transfer() is invoked. However, msgbuf1 is not always initialized on all possible execution paths (implementation) of i2c_transfer(). Thus, it is possible that msgbuf1 may still be uninitialized even after the invocation of the function i2c_transfer(), especially when the return value of i2c_transfer() is not checked properly. In the following execution, the uninitialized msgbuf1 will be used, such as for security checks. Since uninitialized values can be random and arbitrary, this will cause undefined behaviors or even check bypass. For example, it is expected that if the value of 'size' is I2C_SMBUS_BLOCK_PROC_CALL, the value of data->block[0] should not be larger than I2C_SMBUS_BLOCK_MAX. This patch initializes the first byte of msgbuf1 with 0 to avoid such undefined behaviors or security issues. Signed-off-by: Wenwen Wang [wsa: reworded commit message a little] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-smbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index ad6acb5ebadc..fa63bee0b345 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -353,6 +353,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, && size != I2C_SMBUS_I2C_BLOCK_DATA); msgbuf0[0] = command; + msgbuf1[0] = 0; switch (size) { case I2C_SMBUS_QUICK: msg[0].len = 0; -- cgit v1.2.3 From 35f0f0a2536a4d604b4dbad92c85c4a8fdebb870 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Thu, 14 May 2026 12:41:51 -0700 Subject: net: mana: Fix TOCTOU double-fetch of hwc_msg_id from DMA buffer In mana_hwc_rx_event_handler(), resp->response.hwc_msg_id is read from DMA-coherent memory and bounds-checked, then mana_hwc_handle_resp() re-reads the same field from the same DMA buffer for test_bit() and pointer arithmetic. DMA-coherent memory is mapped uncacheable on x86 and is shared, unencrypted, in Confidential VMs (SEV-SNP/TDX), so each load goes directly to host-visible memory. A H/W can modify the value between the check and the use, bypassing the bounds validation. Fix this by reading hwc_msg_id exactly once using READ_ONCE() into a stack-local variable in mana_hwc_rx_event_handler(), and passing the validated value as a parameter to mana_hwc_handle_resp(). Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Erni Sri Satya Vennela Link: https://patch.msgid.link/20260514194156.466823-1-ernis@linux.microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index dbbde0fa57e7..fd8b324d7fb6 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -77,21 +77,19 @@ static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, } static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, - struct hwc_work_request *rx_req) + struct hwc_work_request *rx_req, u16 msg_id) { const struct gdma_resp_hdr *resp_msg = rx_req->buf_va; struct hwc_caller_ctx *ctx; int err; - if (!test_bit(resp_msg->response.hwc_msg_id, - hwc->inflight_msg_res.map)) { - dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", - resp_msg->response.hwc_msg_id); + if (!test_bit(msg_id, hwc->inflight_msg_res.map)) { + dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", msg_id); mana_hwc_post_rx_wqe(hwc->rxq, rx_req); return; } - ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; + ctx = hwc->caller_ctx + msg_id; err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); if (err) goto out; @@ -251,6 +249,7 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, struct gdma_sge *sge; u64 rq_base_addr; u64 rx_req_idx; + u16 msg_id; u8 *wqe; if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id)) @@ -269,13 +268,17 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; resp = (struct gdma_resp_hdr *)rx_req->buf_va; - if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { - dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", - resp->response.hwc_msg_id); + /* Read msg_id once from DMA buffer to prevent TOCTOU: + * DMA memory is shared/unencrypted in CVMs - host can + * modify it between reads. + */ + msg_id = READ_ONCE(resp->response.hwc_msg_id); + if (msg_id >= hwc->num_inflight_msg) { + dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", msg_id); return; } - mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req); + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req, msg_id); /* Can no longer use 'resp', because the buffer is posted to the HW * in mana_hwc_handle_resp() above. -- cgit v1.2.3 From 0488073a6c84571dd3cffe581a4a73a5fceb099d Mon Sep 17 00:00:00 2001 From: Oliver White Date: Thu, 9 Apr 2026 15:43:47 +1200 Subject: platform/surface: aggregator_registry: omit battery & AC nodes on Surface Laptop 7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface Laptop 7 exposes battery and AC status via Qualcomm PMIC GLINK qcom_battmgr. Registering the standard SSAM battery and AC client devices on this platform causes duplicate power-supply devices to appear. Drop the SSAM battery and AC nodes from the Surface Laptop 7 registry group so that only the qcom_battmgr power supplies are instantiated. Fixes: b27622f13172 ("platform/surface: Add OF support") Signed-off-by: Oliver White Link: https://patch.msgid.link/20260409034347.17381-1-oliverjwhite07@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_aggregator_registry.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 0599d5adf02e..f0881edfb616 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -295,8 +295,6 @@ static const struct software_node *ssam_node_group_sl6[] = { /* Devices for Surface Laptop 7. */ static const struct software_node *ssam_node_group_sl7[] = { &ssam_node_root, - &ssam_node_bat_ac, - &ssam_node_bat_main, &ssam_node_tmp_perf_profile_with_fan, &ssam_node_fan_speed, &ssam_node_hid_sam_keyboard, -- cgit v1.2.3 From 50c2d91c5dfa0e465826ec1f8dbad9cdc254bd85 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Fri, 15 May 2026 06:27:32 +0200 Subject: mptcp: do not drop partial packets When a packet arrives with map_seq < ack_seq < end_seq, the beginning of the packet has already been acknowledged but the end contains new data. Currently the entire packet is dropped as "old data," forcing the sender to retransmit. Instead, skip the already-acked bytes by adjusting the skb offset and enqueue only the new portion. Update bytes_received and ack_seq to reflect the new data consumed. A previous attempt at this fix has been sent by Paolo Abeni [1], but had issues [2]: it also added a zero-window check and changed rcv_wnd_sent initialization, which caused test regressions. This version addresses only the partial packet handling without modifying receive window accounting. Fixes: ab174ad8ef76 ("mptcp: move ooo skbs into msk out of order queue.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/c9b426a4e163aa3c4fe8b80c79f1a610f47ae7d8.1763075056.git.pabeni@redhat.com [1] Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/600 [2] Signed-off-by: Shardul Bankar [pabeni@redhat.com: update map] Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-1-701e96419f2f@kernel.org Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4546a8b09884..859df49e16dc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -397,12 +397,26 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb) return false; } - /* old data, keep it simple and drop the whole pkt, sender - * will retransmit as needed, if needed. + /* Completely old data? */ + if (!after64(MPTCP_SKB_CB(skb)->end_seq, msk->ack_seq)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); + mptcp_drop(sk, skb); + return false; + } + + /* Partial packet: map_seq < ack_seq < end_seq. + * Skip the already-acked bytes and enqueue the new data. */ - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); - mptcp_drop(sk, skb); - return false; + copy_len = MPTCP_SKB_CB(skb)->end_seq - msk->ack_seq; + MPTCP_SKB_CB(skb)->offset += msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; + MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq - + MPTCP_SKB_CB(skb)->map_seq; + msk->bytes_received += copy_len; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); + + skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + return true; } static void mptcp_stop_rtx_timer(struct sock *sk) -- cgit v1.2.3 From 51e398a3b8961b26a8c0a4ba9a777c5339791707 Mon Sep 17 00:00:00 2001 From: Li Xiasong Date: Fri, 15 May 2026 06:27:33 +0200 Subject: mptcp: pm: fix ADD_ADDR timer infinite retry on option space insufficient When TCP option space is insufficient (e.g., when sending ADD_ADDR with an IPv6 address and port while tcp_timestamps is enabled), the original code jumped to out_unlock without clearing the addr_signal flag. This caused mptcp_pm_add_timer to keep rescheduling indefinitely, not sending ADD_ADDR, preventing subsequent addresses in the endpoint list from being announced. Handle this case by clearing the ADD_ADDR signal and skipping the matching ADD_ADDR retransmission entry. The skip path cancels the matching timer (with id check) and advances PM state progression, preserving forward progress to subsequent PM work. This cancellation is inherently best-effort. A concurrent add_timer callback may already be running and may acquire pm.lock before the cancel path updates entry state. In that case, one final ADD_ADDR transmit attempt can still be executed. Once the cancel path sets entry->retrans_times to ADD_ADDR_RETRANS_MAX, the callback-side retrans_times check suppresses further ADD_ADDR retransmissions. Note that when an ADD_ADDR is being prepared, a pure-ACK is queued. On the output side, it means that it is fine to skip non-pure-ACK packets, when drop_other_suboptions is set: a pure-ACK will be processed soon after. Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Cc: stable@vger.kernel.org Signed-off-by: Li Xiasong Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-2-701e96419f2f@kernel.org Signed-off-by: Paolo Abeni --- net/mptcp/pm.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3c152bf66cd5..3e770c7407e1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -364,7 +364,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_lock_bh(&msk->pm.lock); - if (!mptcp_pm_should_add_signal_addr(msk)) { + /* The cancel path (mptcp_pm_del_add_timer()) can race with this + * callback. Once cancel updates retrans_times to MAX, suppress further + * retransmissions here. If this callback acquires pm.lock first, one + * final transmit attempt is still possible. + */ + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX && + !mptcp_pm_should_add_signal_addr(msk)) { pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); @@ -414,8 +420,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, /* Note: entry might have been removed by another thread. * We hold rcu_read_lock() to ensure it is not freed under us. */ - if (stop_timer) - sk_stop_timer_sync(sk, &entry->add_timer); + if (stop_timer) { + if (check_id) + sk_stop_timer(sk, &entry->add_timer); + else + sk_stop_timer_sync(sk, &entry->add_timer); + } rcu_read_unlock(); return entry; @@ -882,6 +892,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, struct mptcp_addr_info *addr, bool *echo, bool *drop_other_suboptions) { + bool skip_add_addr = false; int ret = false; u8 add_addr; u8 family; @@ -903,24 +914,49 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, } *echo = mptcp_pm_should_add_signal_echo(msk); - port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); - - family = *echo ? msk->pm.remote.family : msk->pm.local.family; - if (remaining < mptcp_add_addr_len(family, *echo, port)) - goto out_unlock; - if (*echo) { *addr = msk->pm.remote; add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); + port = !!msk->pm.remote.port; + family = msk->pm.remote.family; } else { *addr = msk->pm.local; add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); + port = !!msk->pm.local.port; + family = msk->pm.local.family; } - WRITE_ONCE(msk->pm.addr_signal, add_addr); + + if (remaining < mptcp_add_addr_len(family, *echo, port)) { + struct net *net = sock_net((struct sock *)msk); + + if (!*drop_other_suboptions) + goto out_unlock; + + if (*echo) { + MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP); + } else { + skip_add_addr = true; + MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP); + } + goto drop_signal_mark; + } + ret = true; +drop_signal_mark: + WRITE_ONCE(msk->pm.addr_signal, add_addr); + out_unlock: spin_unlock_bh(&msk->pm.lock); + + /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR: + * clear the signal bit, cancel the matching retransmission timer, and + * let the PM state machine progress. + */ + if (skip_add_addr) { + mptcp_pm_del_add_timer(msk, addr, true); + mptcp_pm_subflow_established(msk); + } return ret; } -- cgit v1.2.3 From fc5ef4331810b160427ad2d0165dff713e968e9b Mon Sep 17 00:00:00 2001 From: Li Xiasong Date: Fri, 15 May 2026 06:27:34 +0200 Subject: selftests: mptcp: join: cover ADD_ADDR tx drop and list progress Extend add_addr_ports_tests with IPv6 signaling cases that exercise ADD_ADDR tx-space shortage when tcp_timestamps are enabled. Add one case to verify PM still progresses to later signal endpoints after the first one is dropped. This covers both failure accounting and the non-blocking behavior of the announce list after a tx-space drop on pure ACK. Signed-off-by: Li Xiasong Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-3-701e96419f2f@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index beec41f6662a..5acd12021e6e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1828,6 +1828,22 @@ chk_add_tx_nr() fi } +chk_add_drop_tx_nr() +{ + local drop_tx_nr=$1 + local count + + print_check "add addr tx drop" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTxDrop") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$drop_tx_nr" ]; then + fail_test "got $count ADD_ADDR drop[s] TX, expected $drop_tx_nr" + else + print_ok + fi +} + chk_rm_nr() { local rm_addr_nr=$1 @@ -3278,6 +3294,21 @@ add_addr_ports_tests() chk_mpc_endp_attempt ${retl} 1 fi + + # first signal address drops, second one still progresses + if reset "signal addr list progresses after tx drop"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 0 + ip netns exec $ns1 sysctl -q net.ipv4.tcp_timestamps=1 + ip netns exec $ns2 sysctl -q net.ipv4.tcp_timestamps=1 + + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal port 10100 + pm_nl_add_endpoint $ns1 dead:beef:3::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 + chk_add_drop_tx_nr 1 + chk_add_tx_nr 1 1 + chk_add_nr 1 1 0 + fi } bind_tests() -- cgit v1.2.3 From 0981f90e1a05773a4c29c6e720f5ea1e3c8f1876 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 15 May 2026 06:27:35 +0200 Subject: mptcp: reset rcv wnd on disconnect If the MPTCP socket fallback to TCP before the MP handshake completion, the IASN remain 0, and the rcv_wnd_sent field is not explicitly initialized, just incremented over time with the data transfer. At disconnect time such value is not cleared. If the next connection falls back to TCP before the MP handshake completion, the data transfer will keep incrementing the receive window end sequence starting from the last value used in the previous connection: the announced window will be unrelated from the actual receiver buffer size and likely too big. Address the issue zeroing the field at disconnect time. Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-4-701e96419f2f@kernel.org Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 859df49e16dc..a72a6ad6ee8b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3487,6 +3487,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) /* for fallback's sake */ WRITE_ONCE(msk->ack_seq, 0); + atomic64_set(&msk->rcv_wnd_sent, 0); WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); -- cgit v1.2.3 From 3a543ae0e2092d5c2085d5f21f7a7dbafdffea3c Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Fri, 15 May 2026 06:27:36 +0200 Subject: mptcp: update window_clamp on subflows when SO_RCVBUF is set Add __mptcp_subflow_set_rcvbuf() helper to write the subflow sk_rcvbuf, but also to call the recently added tcp_set_rcvbuf() helper to update window_clamp. This is needed because the window clap is updated when scaling_ratio changes, in tcp_measure_rcv_mss(). Until scaling_ratio changes, the subflow is stuck with the old window clamp which may be based on a small initial buffer. Use this new helper in both mptcp_sol_socket_sync_intval() (setsockopt path) and sync_socket_options() (new subflow creation path). Note that this patch depends on commit b025461303d8 ("tcp: update window_clamp when SO_RCVBUF is set"): it fixes the issue on TCP side, but the same fix is needed on MPTCP side as well. Fixes: a2cbb1603943 ("tcp: Update window clamping condition") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/619 Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-5-701e96419f2f@kernel.org Signed-off-by: Paolo Abeni --- net/mptcp/sockopt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 1cf608e7357b..87b5796d0135 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -67,6 +67,12 @@ static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, return 0; } +static void __mptcp_subflow_set_rcvbuf(struct sock *ssk, int val) +{ + WRITE_ONCE(ssk->sk_rcvbuf, val); + tcp_set_rcvbuf(ssk, val); +} + static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) { struct mptcp_subflow_context *subflow; @@ -100,7 +106,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in case SO_RCVBUF: case SO_RCVBUFFORCE: ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; - WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); + __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf); break; case SO_MARK: if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { @@ -1560,7 +1566,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; } if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) - WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); + __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf); } if (sock_flag(sk, SOCK_LINGER)) { -- cgit v1.2.3 From 01ff78e4b3d98689184c52d97f9575dfbdc3b10f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 May 2026 06:27:37 +0200 Subject: selftests: mptcp: drop nanoseconds width specifier Using the format specifier +%s%3N with GNU date is honoured, and only prints 3 digits of the nanoseconds portion of the seconds since epoch, which corresponds to the milliseconds. The uutils implementation of date currently does not honour this, and always prints all 9 digits. This is a known issue [1], but can be worked around by adapting this test to use nanoseconds instead of microseconds, and then divide it by 1e6. This fix is similar to what has been done on systemd side [2], and it is needed to run the selftests on Ubuntu 26.04, containing uutils 0.8.0. Note that the Fixes tag is there even if this patch doesn't fix an issue in the kernel selftests, but it is useful for those using uutils 0.8.0. Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Cc: stable@vger.kernel.org Link: https://github.com/uutils/coreutils/issues/11658 [1] Link: https://github.com/systemd/systemd/pull/41627 [2] Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260515-net-mptcp-misc-fixes-7-1-rc4-v2-6-701e96419f2f@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 6 +++--- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index a6447f7a31fe..d158678fa6ab 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -401,7 +401,7 @@ do_transfer() mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start - start=$(date +%s%3N) + start=$(date +%s%N) ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_args $connect_addr < "$cin" > "$cout" & @@ -423,7 +423,7 @@ do_transfer() fi local stop - stop=$(date +%s%3N) + stop=$(date +%s%N) if $capture; then sleep 1 @@ -439,7 +439,7 @@ do_transfer() fi local duration - duration=$((stop-start)) + duration=$(((stop-start) / 1000000)) printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 989a5975dcea..5ef6033775c8 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -28,7 +28,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_SUBTEST_FLAKY=0 -MPTCP_LIB_SUBTESTS_LAST_TS_MS= +MPTCP_LIB_SUBTESTS_LAST_TS_NS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" MPTCP_LIB_IP_MPTCP=0 @@ -236,7 +236,7 @@ mptcp_lib_kversion_ge() { } mptcp_lib_subtests_last_ts_reset() { - MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" + MPTCP_LIB_SUBTESTS_LAST_TS_NS="$(date +%s%N)" } mptcp_lib_subtests_last_ts_reset @@ -255,7 +255,7 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" local time="time=" - local ts_prev_ms + local ts_prev_ns shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) @@ -265,9 +265,9 @@ __mptcp_lib_result_add() { # not to add two '#' [[ "${*}" != *"#"* ]] && time="# ${time}" - ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + ts_prev_ns="${MPTCP_LIB_SUBTESTS_LAST_TS_NS}" mptcp_lib_subtests_last_ts_reset - time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + time+="$(((MPTCP_LIB_SUBTESTS_LAST_TS_NS - ts_prev_ns) / 1000000))ms" MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } -- cgit v1.2.3 From e7537735028c3ad4b0bfc02ff8fa2a1a28aa04fe Mon Sep 17 00:00:00 2001 From: Heechan Kang Date: Sun, 17 May 2026 15:22:32 +0900 Subject: fwctl: pds: Validate RPC input size before parsing The fwctl core allocates the device-specific RPC input buffer with fwctl_rpc.in_len and passes that buffer to the driver callback. pdsfc_fw_rpc() casts the buffer to struct fwctl_rpc_pds and then calls pdsfc_validate_rpc(), which reads fields from that structure before checking that the input buffer is large enough to contain it. A short in_len can make pds_fwctl read beyond the allocation. Reject pds RPC buffers that are smaller than struct fwctl_rpc_pds before parsing any pds-specific fields. Fixes: 92c66ee829b9 ("pds_fwctl: add rpc and query support") Link: https://patch.msgid.link/r/20260517062232.1858747-1-gganji11@naver.com Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Heechan Kang Reviewed-by: Dave Jiang Signed-off-by: Jason Gunthorpe --- drivers/fwctl/pds/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c index 08872ee8422f..68fe254dd10a 100644 --- a/drivers/fwctl/pds/main.c +++ b/drivers/fwctl/pds/main.c @@ -362,6 +362,9 @@ static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, void *out = NULL; int err; + if (in_len < sizeof(*rpc)) + return ERR_PTR(-EINVAL); + err = pdsfc_validate_rpc(pdsfc, rpc, scope); if (err) return ERR_PTR(err); -- cgit v1.2.3 From 00c9753435e8a800761feeeea029a83c4c4847c4 Mon Sep 17 00:00:00 2001 From: Коненко Андрей Викторович Date: Tue, 19 May 2026 16:37:42 +0300 Subject: hp-wmi: fix support for thermal profile Omen 16-с0xxx laptops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HP Omen 16-c0xxx (board ID: 8902) has the same WMI interface as other Victus S boards, but requires additional quirks for correctly switching thermal profile. Add the DMI board name to victus_s_thermal_profile_boards[] table and map it to the omen_v1_legacy_thermal_params quirk. Testing on board 8902 confirmed that platform profile is registered successfully and fan RPMs are readable and controllable. Signed-off-by: Konenko Andrey Viktorovich Link: https://patch.msgid.link/T3DTKbKwQzOgk_0eUG-kMg@aquinas.su Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 6950bec2a9d8..f63bc00d9a9b 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -189,6 +189,10 @@ static const char * const victus_thermal_profile_boards[] = { /* DMI Board names of Victus 16-r and Victus 16-s laptops */ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst = { + { + .matches = { DMI_MATCH(DMI_BOARD_NAME, "8902") }, + .driver_data = (void *)&omen_v1_legacy_thermal_params, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "8A44") }, .driver_data = (void *)&omen_v1_legacy_thermal_params, -- cgit v1.2.3 From e7a9a6ea40e352cd7977f6a8c80bdeadf65ad838 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 17:11:49 +0200 Subject: platform/x86: adv_swbutton: Check ACPI_HANDLE() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_HANDLE() check against NULL to the platform/x86 adv_swbutton driver. Fixes: 3d904005f686 ("platform/x86: add support for Advantech software defined button") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/5115425.31r3eYUQgx@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/adv_swbutton.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/adv_swbutton.c b/drivers/platform/x86/adv_swbutton.c index 6fa60f3fc53c..8f7a26e6de81 100644 --- a/drivers/platform/x86/adv_swbutton.c +++ b/drivers/platform/x86/adv_swbutton.c @@ -48,10 +48,14 @@ static int adv_swbutton_probe(struct platform_device *device) { struct adv_swbutton *button; struct input_dev *input; - acpi_handle handle = ACPI_HANDLE(&device->dev); + acpi_handle handle; acpi_status status; int error; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + button = devm_kzalloc(&device->dev, sizeof(*button), GFP_KERNEL); if (!button) return -ENOMEM; -- cgit v1.2.3 From abfbe5ee8ae89f1f5449790423d5dd3e423545bd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 17:12:40 +0200 Subject: platform/x86: hp_accel: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 hp_accel driver. Fixes: 8ebcb6c94c71 ("platform/x86: hp_accel: Convert to be a platform driver") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/2425918.ElGaqSPkdT@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp_accel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/hp/hp_accel.c b/drivers/platform/x86/hp/hp_accel.c index 10d5af18d639..39b73dc473f1 100644 --- a/drivers/platform/x86/hp/hp_accel.c +++ b/drivers/platform/x86/hp/hp_accel.c @@ -300,6 +300,9 @@ static int lis3lv02d_probe(struct platform_device *device) int ret; lis3_dev.bus_priv = ACPI_COMPANION(&device->dev); + if (!lis3_dev.bus_priv) + return -ENODEV; + lis3_dev.init = lis3lv02d_acpi_init; lis3_dev.read = lis3lv02d_acpi_read; lis3_dev.write = lis3lv02d_acpi_write; -- cgit v1.2.3 From 5c69e090ae5dd93d910f70db0796357080707d26 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 17:13:28 +0200 Subject: platform/x86: intel-hid: Check ACPI_HANDLE() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_HANDLE() check against NULL to the platform/x86 intel-hid driver. Fixes: ecc83e52b28c ("intel-hid: new hid event driver for hotkeys") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/1971512.tdWV9SEqCh@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 2ddd8af8c1ce..085093506dda 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -688,12 +688,16 @@ static bool button_array_present(struct platform_device *device) static int intel_hid_probe(struct platform_device *device) { - acpi_handle handle = ACPI_HANDLE(&device->dev); unsigned long long mode, dummy; struct intel_hid_priv *priv; + acpi_handle handle; acpi_status status; int err; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + intel_hid_init_dsm(handle); if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_HDMM_FN, &mode)) { -- cgit v1.2.3 From 2765f16c12af7c2533763e46b8113b727354012d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 17:15:32 +0200 Subject: platform/x86: intel_sar: Check ACPI_HANDLE() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_HANDLE() check against NULL to the platform/x86 intel_sar driver. Fixes: dcfbd31ef4bc ("platform/x86: BIOS SAR driver for Intel M.2 Modem") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/14023870.uLZWGnKmhe@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/int1092/intel_sar.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/int1092/intel_sar.c b/drivers/platform/x86/intel/int1092/intel_sar.c index 88822023a149..849f7b415c1e 100644 --- a/drivers/platform/x86/intel/int1092/intel_sar.c +++ b/drivers/platform/x86/intel/int1092/intel_sar.c @@ -245,15 +245,20 @@ static void sar_get_data(int reg, struct wwan_sar_context *context) static int sar_probe(struct platform_device *device) { struct wwan_sar_context *context; + acpi_handle handle; int reg; int result; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + context = kzalloc_obj(*context); if (!context) return -ENOMEM; context->sar_device = device; - context->handle = ACPI_HANDLE(&device->dev); + context->handle = handle; dev_set_drvdata(&device->dev, context); result = guid_parse(SAR_DSM_UUID, &context->guid); -- cgit v1.2.3 From a9f305c5a355efeb240d406d378491d9eec02d07 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 17:16:22 +0200 Subject: platform/x86: intel-vbtn: Check ACPI_HANDLE() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_HANDLE() check against NULL to the platform/x86 intel-vbtn driver. Fixes: 26173179fae1 ("platform/x86: intel-vbtn: Eval VBDL after registering our notifier") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/3426431.aeNJFYEL58@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vbtn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 9ca87e707582..874023c38fd1 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -275,12 +275,16 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel) static int intel_vbtn_probe(struct platform_device *device) { - acpi_handle handle = ACPI_HANDLE(&device->dev); bool dual_accel, has_buttons, has_switches; struct intel_vbtn_priv *priv; + acpi_handle handle; acpi_status status; int err; + handle = ACPI_HANDLE(&device->dev); + if (!handle) + return -ENODEV; + dual_accel = dual_accel_detect(); has_buttons = acpi_has_method(handle, "VBDL"); has_switches = intel_vbtn_has_switches(handle, dual_accel); -- cgit v1.2.3 From 5798b46271e229dab05e47537ac30b76f81728da Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 18:30:26 +0200 Subject: platform/surface: surfacepro3_button: Check ACPI_COMPANION() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the surfacepro3_button driver. Fixes: d913a5a12b40 ("platform/surface: surfacepro3_button: Convert to a platform driver") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Reviewed-by: Chen Yu Link: https://patch.msgid.link/23119222.EfDdHjke4D@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surfacepro3_button.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/platform/surface/surfacepro3_button.c b/drivers/platform/surface/surfacepro3_button.c index 0293bc517b54..388a3e1a488c 100644 --- a/drivers/platform/surface/surfacepro3_button.c +++ b/drivers/platform/surface/surfacepro3_button.c @@ -185,12 +185,15 @@ static bool surface_button_check_MSHW0040(struct device *dev, acpi_handle handle static int surface_button_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct surface_button *button; + struct acpi_device *device; struct input_dev *input; - const char *hid = acpi_device_hid(device); int error; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + if (strncmp(acpi_device_bid(device), SURFACE_BUTTON_OBJ_NAME, strlen(SURFACE_BUTTON_OBJ_NAME))) return -ENODEV; @@ -210,7 +213,8 @@ static int surface_button_probe(struct platform_device *pdev) } strscpy(acpi_device_name(device), SURFACE_BUTTON_DEVICE_NAME); - snprintf(button->phys, sizeof(button->phys), "%s/buttons", hid); + snprintf(button->phys, sizeof(button->phys), "%s/buttons", + acpi_device_hid(device)); input->name = acpi_device_name(device); input->phys = button->phys; -- cgit v1.2.3 From c12cc42dadd85dea210d5699d4f21def827382eb Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 13 May 2026 01:21:38 +0200 Subject: platform/x86: uniwill-laptop: Properly initialize charging threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EC might initialize the charge threshold with 0 to signal that said threshold is uninitialized. Detect this and replace said value with 100 to signal the EC that we want to take control of battery charging. Also set the threshold to 100 if the EC-provided value is invalid. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Reviewed-by: Werner Sembach Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260512232145.329260-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/uniwill/uniwill-acpi.c | 35 ++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index 945df5092637..d9c202fc8c71 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -1359,6 +1359,16 @@ static int uniwill_led_init(struct uniwill_data *data) &init_data); } +static unsigned int uniwill_sanitize_battery_threshold(unsigned int value) +{ + /* 0 means "charging threshold not active" */ + if (!value) + return 100; + + /* Guard against invalid values */ + return min(value, 100); +} + static int uniwill_get_property(struct power_supply *psy, const struct power_supply_ext *ext, void *drvdata, enum power_supply_property psp, union power_supply_propval *val) @@ -1405,7 +1415,8 @@ static int uniwill_get_property(struct power_supply *psy, const struct power_sup if (ret < 0) return ret; - val->intval = clamp_val(FIELD_GET(CHARGE_CTRL_MASK, regval), 0, 100); + regval = FIELD_GET(CHARGE_CTRL_MASK, regval); + val->intval = uniwill_sanitize_battery_threshold(regval); return 0; default: return -EINVAL; @@ -1500,11 +1511,33 @@ static int uniwill_remove_battery(struct power_supply *battery, struct acpi_batt static int uniwill_battery_init(struct uniwill_data *data) { + unsigned int value, threshold, sanitized; int ret; if (!uniwill_device_supports(data, UNIWILL_FEATURE_BATTERY)) return 0; + ret = regmap_read(data->regmap, EC_ADDR_CHARGE_CTRL, &value); + if (ret < 0) + return ret; + + /* + * The charge control threshold might be initialized with 0 by + * the EC to signal that said threshold is uninitialized. We thus + * need to replace this placeholder value with a valid one (100) + * to signal that we want to take control of battery charging. + * For the sake of completeness we also apply this to other + * invalid threshold values. + */ + threshold = FIELD_GET(CHARGE_CTRL_MASK, value); + sanitized = uniwill_sanitize_battery_threshold(threshold); + if (threshold != sanitized) { + FIELD_MODIFY(CHARGE_CTRL_MASK, &value, sanitized); + ret = regmap_write(data->regmap, EC_ADDR_CHARGE_CTRL, value); + if (ret < 0) + return ret; + } + ret = devm_mutex_init(data->dev, &data->battery_lock); if (ret < 0) return ret; -- cgit v1.2.3 From c16a4823cc60a32b891f7a148bb30c0f51d12cf4 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 13 May 2026 01:21:39 +0200 Subject: platform/x86: uniwill-laptop: Accept charging threshold of 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power supply sysfs ABI states that: Not all hardware is capable of setting this to an arbitrary percentage. Drivers will round written values to the nearest supported value. Reading back the value will show the actual threshold set by the driver. The driver currently violates this ABI by rejecting a charging threshold of 0. Fix this by clamping this value to 1. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Reviewed-by: Werner Sembach Reviewed-by: Ilpo Järvinen Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260512232145.329260-3-W_Armin@gmx.de Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/uniwill/uniwill-acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index d9c202fc8c71..1f9e9f61d387 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -1431,11 +1431,11 @@ static int uniwill_set_property(struct power_supply *psy, const struct power_sup switch (psp) { case POWER_SUPPLY_PROP_CHARGE_CONTROL_END_THRESHOLD: - if (val->intval < 1 || val->intval > 100) + if (val->intval < 0 || val->intval > 100) return -EINVAL; return regmap_update_bits(data->regmap, EC_ADDR_CHARGE_CTRL, CHARGE_CTRL_MASK, - val->intval); + max(val->intval, 1)); default: return -EINVAL; } -- cgit v1.2.3 From fb4b67c44557cb4cbb15900083d4e1af22320339 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 13 May 2026 01:21:40 +0200 Subject: platform/x86: uniwill-laptop: Fix behavior of "force" module param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users might want to force-enable all possible features even on machines with a valid device descriptor. Until now the "force" module param was ignored on such machines. Fix this to make it easier to test for support of new features. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Reviewed-by: Werner Sembach Reviewed-by: Ilpo Järvinen Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260512232145.329260-4-W_Armin@gmx.de Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/uniwill/uniwill-acpi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index 1f9e9f61d387..481c4cf46e63 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -2489,8 +2489,6 @@ static int __init uniwill_init(void) if (!force) return -ENODEV; - /* Assume that the device supports all features */ - device_descriptor.features = UINT_MAX; pr_warn("Loading on a potentially unsupported device\n"); } else { /* @@ -2508,6 +2506,12 @@ static int __init uniwill_init(void) device_descriptor = *descriptor; } + if (force) { + /* Assume that the device supports all features */ + device_descriptor.features = UINT_MAX; + pr_warn("Enabling potentially unsupported features\n"); + } + ret = platform_driver_register(&uniwill_driver); if (ret < 0) return ret; -- cgit v1.2.3 From 26cbe119f99c86dcb4a0136d2bc73c0c716d80e4 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 13 May 2026 01:21:41 +0200 Subject: platform/x86: uniwill-laptop: Do not enable the charging limit even when forced MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that on some older models (~2020) the battery charging limit can permanently damage the battery. Prevent users from enabling this feature thru the "force" module parameter to avoid causing permanent hardware damage on such devices. Fixes: d050479693bb ("platform/x86: Add Uniwill laptop driver") Link: https://www.reddit.com/r/XMG_gg/comments/ld9yyf/battery_limit_hidden_function_discovered_on/ Reviewed-by: Werner Sembach Reviewed-by: Ilpo Järvinen Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20260512232145.329260-5-W_Armin@gmx.de Signed-off-by: Ilpo Järvinen --- Documentation/admin-guide/laptops/uniwill-laptop.rst | 10 ++++++++++ drivers/platform/x86/uniwill/uniwill-acpi.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/laptops/uniwill-laptop.rst b/Documentation/admin-guide/laptops/uniwill-laptop.rst index 561334865feb..1f3ca84c7d88 100644 --- a/Documentation/admin-guide/laptops/uniwill-laptop.rst +++ b/Documentation/admin-guide/laptops/uniwill-laptop.rst @@ -43,6 +43,11 @@ Support for changing the platform performance mode is currently not implemented. Battery Charging Control ------------------------ +.. warning:: Some devices do not properly implement the charging threshold interface. Forcing + the driver to enable access to said interface on such devices might damage the + battery [1]_. Because of this the driver will not enable said feature even when + using the ``force`` module parameter. + The ``uniwill-laptop`` driver supports controlling the battery charge limit. This happens over the standard ``charge_control_end_threshold`` power supply sysfs attribute. All values between 1 and 100 percent are supported. @@ -70,3 +75,8 @@ The ``uniwill-laptop`` driver allows to set the configurable TGP for devices wit allow it. See Documentation/ABI/testing/sysfs-driver-uniwill-laptop for details. + +References +========== + +.. [1] https://www.reddit.com/r/XMG_gg/comments/ld9yyf/battery_limit_hidden_function_discovered_on/ diff --git a/drivers/platform/x86/uniwill/uniwill-acpi.c b/drivers/platform/x86/uniwill/uniwill-acpi.c index 481c4cf46e63..8cc01bec77b9 100644 --- a/drivers/platform/x86/uniwill/uniwill-acpi.c +++ b/drivers/platform/x86/uniwill/uniwill-acpi.c @@ -2507,8 +2507,8 @@ static int __init uniwill_init(void) } if (force) { - /* Assume that the device supports all features */ - device_descriptor.features = UINT_MAX; + /* Assume that the device supports all features except the charge limit */ + device_descriptor.features = UINT_MAX & ~UNIWILL_FEATURE_BATTERY; pr_warn("Enabling potentially unsupported features\n"); } -- cgit v1.2.3 From 2ccd8ff980b50e842481bae71102fa3883fc4377 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 15 May 2026 14:37:29 +0100 Subject: arm64: probes: Handle probes on hinted conditional branch instructions BC.cond instructions introduced by FEAT_HBC cannot be executed out-of-line, like other branch instructions. However, they can be simulated in the same way as B.cond instructions. Extend the B.cond decoder mask to match BC.cond instructions as well, and handle them using the existing B.cond simulation path. Fixes: 7f86d128e437 ("arm64: add HWCAP for FEAT_HBC (hinted conditional branches)") Cc: Signed-off-by: Vladimir Murzin Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/insn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index f463a654a2bb..cc0702fa64a7 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -409,7 +409,7 @@ __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) __AARCH64_INSN_FUNCS(cbnz, 0x7F000000, 0x35000000) __AARCH64_INSN_FUNCS(tbz, 0x7F000000, 0x36000000) __AARCH64_INSN_FUNCS(tbnz, 0x7F000000, 0x37000000) -__AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000) +__AARCH64_INSN_FUNCS(bcond, 0xFF000000, 0x54000000) __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) __AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) -- cgit v1.2.3 From 348ccc754d8939e21ca5956ff45720b81d6e407f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 14 May 2026 07:40:42 +0200 Subject: platform/x86/intel/vsec: Fix enable_cnt imbalance on PCIe error recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a PCIe Uncorrectable Error has been reported by a device with Intel Vendor Specific Extended Capabilities and has been recovered through a Secondary Bus Reset, its driver calls intel_vsec_pci_probe() to rescan and reinitialize VSECs. intel_vsec_pci_probe() invokes pcim_enable_device() and thereby adds another devm action which calls pcim_disable_device() on driver unbind. So once the driver unbinds, pcim_disable_device() will be called as many times as an Uncorrectable Error occurred, plus one. This will lead to an enable_cnt imbalance on driver unbind. Additionally, since commit dc957ab6aa05 ("platform/x86/intel/vsec: Add private data for per-device data"), a devm_kzalloc() allocation is leaked on every Uncorrectable Error. Avoid by splitting the VSEC rescan out of intel_vsec_pci_probe() into a separate helper and calling that on PCIe error recovery. Fixes: 936874b77dd0 ("platform/x86/intel/vsec: Add PCI error recovery support to Intel PMT") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v6.0+ Link: https://patch.msgid.link/bd594d09fa866dc51dddc9a447c3b23f9b1402cc.1778736835.git.lukas@wunner.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/vsec.c | 54 ++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index 7d5dbc1c1d05..18e4a892bf0f 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -649,29 +649,13 @@ static void intel_vsec_skip_missing_dependencies(struct pci_dev *pdev) } } -static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int intel_vsec_pci_init(struct pci_dev *pdev) { - const struct intel_vsec_platform_info *info; - struct vsec_priv *priv; - int num_caps, ret; + struct vsec_priv *priv = pci_get_drvdata(pdev); + const struct intel_vsec_platform_info *info = priv->info; int run_once = 0; bool found_any = false; - - ret = pcim_enable_device(pdev); - if (ret) - return ret; - - pci_save_state(pdev); - info = (const struct intel_vsec_platform_info *)id->driver_data; - if (!info) - return -EINVAL; - - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->info = info; - pci_set_drvdata(pdev, priv); + int num_caps; num_caps = hweight_long(info->caps); while (num_caps--) { @@ -692,6 +676,31 @@ static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id return 0; } +static int intel_vsec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + const struct intel_vsec_platform_info *info; + struct vsec_priv *priv; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + pci_save_state(pdev); + info = (const struct intel_vsec_platform_info *)id->driver_data; + if (!info) + return -EINVAL; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->info = info; + pci_set_drvdata(pdev, priv); + + return intel_vsec_pci_init(pdev); +} + int intel_vsec_set_mapping(struct oobmsm_plat_info *plat_info, struct intel_vsec_device *vsec_dev) { @@ -832,7 +841,6 @@ static pci_ers_result_t intel_vsec_pci_slot_reset(struct pci_dev *pdev) { struct intel_vsec_device *intel_vsec_dev; pci_ers_result_t status = PCI_ERS_RESULT_DISCONNECT; - const struct pci_device_id *pci_dev_id; unsigned long index; dev_info(&pdev->dev, "Resetting PCI slot\n"); @@ -853,10 +861,8 @@ static pci_ers_result_t intel_vsec_pci_slot_reset(struct pci_dev *pdev) devm_release_action(&pdev->dev, intel_vsec_remove_aux, &intel_vsec_dev->auxdev); } - pci_disable_device(pdev); pci_restore_state(pdev); - pci_dev_id = pci_match_id(intel_vsec_pci_ids, pdev); - intel_vsec_pci_probe(pdev, pci_dev_id); + intel_vsec_pci_init(pdev); out: return status; -- cgit v1.2.3 From d2d2e7c8fb37b27301ee5c8343b2f7037efc6ea6 Mon Sep 17 00:00:00 2001 From: Ahmed Yaseen Date: Sun, 17 May 2026 18:30:11 +0000 Subject: platform/x86: asus-armoury: fix mini-LED mode get/set on MODE2 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mini-LED current_value attribute does not work on devices that use ASUS_WMI_DEVID_MINI_LED_MODE2 (2024 and newer models). Reading is broken: mini_led_mode_current_value_show() fetches the mode from the device but then decodes a literal 0 instead of the value it just read: mode = FIELD_GET(ASUS_MINI_LED_MODE_MASK, 0); So mode is always 0, and the attribute always reports the same thing regardless of the real hardware state. Writing is broken too. The number a user writes is an index; the value the firmware actually wants is looked up from that index in mini_led_mode_map[]. mini_led_mode_current_value_store() skips that lookup and passes the raw index straight to armoury_attr_uint_store(). On 2024 devices the firmware numbers its modes differently from the index, so some writes are rejected with -EINVAL and the rest send the wrong mode to the hardware. Fix both paths: decode the value actually read from the device when reading, and look up the firmware value before sending it when writing. Older (MODE1) devices were unaffected because there the index and the firmware value are the same. Fixes: f99eb098090e ("platform/x86: asus-armoury: move existing tunings to asus-armoury module") Signed-off-by: Ahmed Yaseen Reviewed-by: Denis Benato Link: https://patch.msgid.link/20260517182957.11069-1-yaseen@ghoul.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/asus-armoury.c b/drivers/platform/x86/asus-armoury.c index 5b0987ccc270..495dc1e31d40 100644 --- a/drivers/platform/x86/asus-armoury.c +++ b/drivers/platform/x86/asus-armoury.c @@ -370,7 +370,7 @@ static ssize_t mini_led_mode_current_value_show(struct kobject *kobj, if (err) return err; - mode = FIELD_GET(ASUS_MINI_LED_MODE_MASK, 0); + mode = FIELD_GET(ASUS_MINI_LED_MODE_MASK, mode); for (i = 0; i < mini_led_mode_map_size; i++) if (mode == mini_led_mode_map[i]) @@ -386,6 +386,7 @@ static ssize_t mini_led_mode_current_value_store(struct kobject *kobj, { u32 *mini_led_mode_map; size_t mini_led_mode_map_size; + char mapped_value[12]; u32 mode; int err; @@ -414,9 +415,16 @@ static ssize_t mini_led_mode_current_value_store(struct kobject *kobj, return -ENODEV; } - return armoury_attr_uint_store(kobj, attr, buf, count, - 0, mini_led_mode_map[mode], - NULL, asus_armoury.mini_led_dev_id); + /* + * armoury_attr_uint_store() parses and sends the value from the + * passed buffer; hand it the mapped firmware value so the device + * receives the translated mode instead of the raw index. + */ + snprintf(mapped_value, sizeof(mapped_value), "%u", mini_led_mode_map[mode]); + + return armoury_attr_uint_store(kobj, attr, mapped_value, count, 0, + mini_led_mode_map[mode], NULL, + asus_armoury.mini_led_dev_id); } static ssize_t mini_led_mode_possible_values_show(struct kobject *kobj, -- cgit v1.2.3 From 9ac7c8dc07af88f5169f217fec431c992b4eb550 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 17 May 2026 22:00:02 +0000 Subject: platform/x86: asus-armoury: add support for FX607VU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model FX607VU. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260517220005.4594-2-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index c30d2b451e01..bd394ef94264 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -886,6 +886,33 @@ static const struct dmi_system_id power_limits[] = { .requires_fan_curve = true, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "FX607VU"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 28, + .ppt_pl1_spl_def = 115, + .ppt_pl1_spl_max = 135, + .ppt_pl2_sppt_min = 28, + .ppt_pl2_sppt_max = 135, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 25, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 45, + .ppt_pl2_sppt_min = 35, + .ppt_pl2_sppt_max = 60, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "GA401Q"), -- cgit v1.2.3 From 239c7acb9e3f73ada3ccce06302f6f3d2d89762d Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 17 May 2026 22:00:03 +0000 Subject: platform/x86: asus-armoury: add support for G614FR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model G614FR. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260517220005.4594-3-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index bd394ef94264..6c8f46d289d8 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1786,6 +1786,40 @@ static const struct dmi_system_id power_limits[] = { .requires_fan_curve = true, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "G614FR"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 30, + .ppt_pl1_spl_max = 120, + .ppt_pl2_sppt_min = 65, + .ppt_pl2_sppt_def = 140, + .ppt_pl2_sppt_max = 162, + .ppt_pl3_fppt_min = 65, + .ppt_pl3_fppt_def = 140, + .ppt_pl3_fppt_max = 162, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 25, + .nv_tgp_min = 65, + .nv_tgp_max = 115, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 65, + .ppt_pl2_sppt_min = 25, + .ppt_pl2_sppt_max = 65, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 75, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "G614J"), -- cgit v1.2.3 From df1e0b209fa09676585402d5437ee190a424302b Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 17 May 2026 22:00:04 +0000 Subject: platform/x86: asus-armoury: add support for FA401EA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model FA401EA. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260517220005.4594-4-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 6c8f46d289d8..71df2ca061a7 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -346,6 +346,29 @@ struct power_data { * _def is not required and will be assumed to be default == max if missing. */ static const struct dmi_system_id power_limits[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "FA401EA"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 95, + .ppt_pl2_sppt_min = 35, + .ppt_pl2_sppt_max = 100, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 115, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 15, + .ppt_pl1_spl_max = 71, + .ppt_pl2_sppt_min = 35, + .ppt_pl2_sppt_max = 71, + .ppt_pl3_fppt_min = 35, + .ppt_pl3_fppt_max = 71, + }, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "FA401UM"), -- cgit v1.2.3 From 841ff62aa5f85b13771b33e7b9c102cbaf87432f Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 17 May 2026 22:00:05 +0000 Subject: platform/x86: asus-armoury: add support for GU605CP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TDP data for laptop model GU605CP. Signed-off-by: Denis Benato Link: https://patch.msgid.link/20260517220005.4594-5-denis.benato@linux.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-armoury.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/platform/x86/asus-armoury.h b/drivers/platform/x86/asus-armoury.h index 71df2ca061a7..692978b61959 100644 --- a/drivers/platform/x86/asus-armoury.h +++ b/drivers/platform/x86/asus-armoury.h @@ -1303,6 +1303,35 @@ static const struct dmi_system_id power_limits[] = { }, }, }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GU605CP"), + }, + .driver_data = &(struct power_data) { + .ac_data = &(struct power_limits) { + .ppt_pl1_spl_min = 45, + .ppt_pl1_spl_max = 75, + .ppt_pl2_sppt_min = 56, + .ppt_pl2_sppt_max = 95, + .nv_dynamic_boost_min = 5, + .nv_dynamic_boost_max = 15, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + .nv_tgp_min = 55, + .nv_tgp_def = 75, + .nv_tgp_max = 95, + }, + .dc_data = &(struct power_limits) { + .ppt_pl1_spl_min = 25, + .ppt_pl1_spl_max = 75, + .ppt_pl2_sppt_min = 32, + .ppt_pl2_sppt_max = 95, + .nv_temp_target_min = 75, + .nv_temp_target_max = 87, + }, + .requires_fan_curve = true, + }, + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "GU605CR"), -- cgit v1.2.3 From b088fe35019433541225d315263d8477899e0657 Mon Sep 17 00:00:00 2001 From: Guilherme Giacomo Simoes Date: Sun, 3 May 2026 16:16:09 -0300 Subject: x86/vdso: Fix incorrect size in munmap() on map_vdso() failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In map_vdso(), if a failure occurs during the installation of the VVAR mappings, the error path attempts to clean up previously allocated mappings using do_munmap(). However, the cleanup for the VVAR mapping is incorrectly using image->size (the size of the vDSO text) instead of the actual size allocated for the VVAR area. Replace the incorrect do_munmap() image->size parameter with the constant VDSO_NR_PAGES * PAGE_SIZE. Ensure the unmap size exactly matches the size used during the vdso_install_vvar_mapping() phase to provide a symmetrical and complete teardown of the memory region. Fixes: e93d2521b27f ("x86/vdso: Split virtual clock pages into dedicated mapping") Signed-off-by: Guilherme Giacomo Simoes Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Weißschuh Link: https://patch.msgid.link/20260503191609.551817-1-trintaeoitogc@gmail.com --- arch/x86/entry/vdso/vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index a6bfcc8243cd..d903bce24f15 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -178,7 +178,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) if (IS_ERR(vma)) { ret = PTR_ERR(vma); do_munmap(mm, text_start, image->size, NULL); - do_munmap(mm, addr, image->size, NULL); + do_munmap(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, NULL); goto up_fail; } -- cgit v1.2.3 From 298a43b54432fbc3a32949a94c72544ee18c8c00 Mon Sep 17 00:00:00 2001 From: Robertus Diawan Chris Date: Tue, 19 May 2026 12:40:24 +0700 Subject: ASoC: soc-utils: Add missing va_end in snd_soc_ret() The default case in snd_soc_ret() use va_start without va_end to cleanup "args" object which can cause undefined behavior. So, add missing va_end to cleanup "args" object. This is reported by Coverity Scan as "Missing varargs init or cleanup". Fixes: 943116ba2a6a ("ASoC: add common snd_soc_ret() and use it") Signed-off-by: Robertus Diawan Chris Link: https://patch.msgid.link/20260519054024.274741-1-robertusdchris@gmail.com Signed-off-by: Mark Brown --- sound/soc/soc-utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index c8adfff826bd..9cb7567e263e 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -36,6 +36,7 @@ int snd_soc_ret(const struct device *dev, int ret, const char *fmt, ...) vaf.va = &args; dev_err(dev, "ASoC error (%d): %pV", ret, &vaf); + va_end(args); } return ret; -- cgit v1.2.3 From 13c6da02e767152c9ac4330962247a5e47011035 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 19 May 2026 10:03:00 +0200 Subject: efi: Allocate runtime workqueue before ACPI init Since commit 5894cf571e14 ("acpi/prmt: Use EFI runtime sandbox to invoke PRM handlers") ACPI PRM calls are delegated to a workqueue which runs in a kernel thread, making it easier to detect and mitigate faulting memory accesses performed by the firmware. Rafael reports that such PRM accesses may occur before efisubsys_init() executes, which is where the workqueue is allocated, leading to NULL pointer dereferences. Since acpi_init() [which triggers the early PRM accesses] executes as a subsys_initcall() as well, and has its own dependencies that may be sensitive to initcall ordering, deferring acpi_init() is not an option. So instead, split off the workqueue allocation into its own postcore initcall, as this is the only missing piece to allow EFI runtime calls to be made. This ensures that EFI runtime call (including PRM calls) are accessible to all code running at subsys_initcall() level. Cc: Fixes: 5894cf571e14 ("acpi/prmt: Use EFI runtime sandbox to invoke PRM handlers") Reviewed-by: Rafael J. Wysocki (Intel) Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index d04be38f1750..318d1cc9a066 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -402,21 +402,11 @@ static void __init efi_debugfs_init(void) static inline void efi_debugfs_init(void) {} #endif -/* - * We register the efi subsystem with the firmware subsystem and the - * efivars subsystem with the efi subsystem, if the system was booted with - * EFI. - */ -static int __init efisubsys_init(void) +static int __init efipostcore_init(void) { - int error; - if (!efi_enabled(EFI_RUNTIME_SERVICES)) efi.runtime_supported_mask = 0; - if (!efi_enabled(EFI_BOOT)) - return 0; - if (efi.runtime_supported_mask) { /* * Since we process only one efi_runtime_service() at a time, an @@ -428,9 +418,23 @@ static int __init efisubsys_init(void) pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi.runtime_supported_mask = 0; - return 0; } } + return 0; +} +postcore_initcall(efipostcore_init); + +/* + * We register the efi subsystem with the firmware subsystem and the + * efivars subsystem with the efi subsystem, if the system was booted with + * EFI. + */ +static int __init efisubsys_init(void) +{ + int error; + + if (!efi_enabled(EFI_BOOT)) + return 0; if (efi_rt_services_supported(EFI_RT_SUPPORTED_TIME_SERVICES)) platform_device_register_simple("rtc-efi", 0, NULL, 0); -- cgit v1.2.3 From 8939562b16052c75b908d3c5f968bffb526fc6e9 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Mon, 18 May 2026 15:02:08 +0800 Subject: efi: efi.h: Remove extra semicolon Remove extra semicolons from comments. Signed-off-by: Rong Tao Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index 72e76ec54641..ccbc35479684 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -61,7 +61,7 @@ typedef void *efi_handle_t; /* * The UEFI spec and EDK2 reference implementation both define EFI_GUID as - * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment + * struct { u32 a; u16 b; u16 c; u8 d[8]; }; and so the implied alignment * is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM), * this means that firmware services invoked by the kernel may assume that * efi_guid_t* arguments are 32-bit aligned, and use memory accessors that -- cgit v1.2.3 From d8809f6931065cbbf3554647a50a65a471ab5983 Mon Sep 17 00:00:00 2001 From: Marius Hoch Date: Sun, 17 May 2026 21:23:40 +0200 Subject: efi: sysfb_efi: Extend quirk to cover IdeaPad Duet 3 10IGL5-LTE The LTE enabled version of the IdeaPad Duet 3 10IGL5 needs the same quirk as the non-LTE version. As these are the only two IdeaPad Duet 3 10IGL5 versions, we can safely use non exact matching. Tested on a IdeaPad Duet 3 10IGL5-LTE. Signed-off-by: Marius Hoch Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/sysfb_efi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index 4c3986ddcd54..685283bb7327 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -311,11 +311,14 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = { .callback = efifb_swap_width_height, }, { - /* Lenovo IdeaPad Duet 3 10IGL5 with 1200x1920 portrait screen */ + /* + * Lenovo IdeaPad Duet 3 10IGL5 and 10IGL5-LTE with + * 1200x1920 portrait screen + */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, - "IdeaPad Duet 3 10IGL5"), + /* Non exact match to also match the LTE version */ + DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"), }, .callback = efifb_swap_width_height, }, -- cgit v1.2.3 From 00907da2126ed785451b2a2f0fef282246dad104 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 18 May 2026 12:16:40 -0700 Subject: drm/xe/multi_queue: Fix secondary queue error case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If xe_lrc_create() fails, the secondary queue added to the multi-queue group list is not removed before freeing the queue. Fix error path handling for secondary queues by removing it from the multi-queue group list at the right place. Reported-by: Sebastian Österlund Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/work_items/7979 Fixes: d716a5088c88 ("drm/xe/multi_queue: Handle tearing down of a multi queue") Cc: stable@vger.kernel.org # v7.0+ Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Auld Link: https://patch.msgid.link/20260518191639.320890-2-niranjana.vishwanathapura@intel.com (cherry picked from commit d2d23c12789cf69eddc35b8d38cd8eaabd0168f1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 10556156eaad..912182dc7704 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1673,6 +1673,14 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q) struct xe_guc_exec_queue *ge = q->guc; struct xe_guc *guc = exec_queue_to_guc(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + struct xe_exec_queue_group *group = q->multi_queue.group; + + mutex_lock(&group->list_lock); + list_del(&q->multi_queue.link); + mutex_unlock(&group->list_lock); + } + release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); @@ -1694,14 +1702,6 @@ static void __guc_exec_queue_destroy_async(struct work_struct *w) guard(xe_pm_runtime)(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - if (xe_exec_queue_is_multi_queue_secondary(q)) { - struct xe_exec_queue_group *group = q->multi_queue.group; - - mutex_lock(&group->list_lock); - list_del(&q->multi_queue.link); - mutex_unlock(&group->list_lock); - } - /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); -- cgit v1.2.3 From 4d8690dace005a38e6dbde9ecce2da3ad85c7c41 Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Thu, 14 May 2026 20:18:25 -0300 Subject: smb: client: protect tc_count increment in smb2_find_smb_sess_tcon_unlocked() Commit 96c4af418586 ("cifs: Fix locking usage for tcon fields") refactored cifs code to change cifs_tcp_ses_lock for tc_lock around tc_count changes. There was missing lock around tc_count increment inside smb2_find_smb_sess_tcon_unlocked(). Cc: stable@vger.kernel.org Fixes: 96c4af418586 ("cifs: Fix locking usage for tcon fields") Reviewed-by: Shyam Prasad N Signed-off-by: Henrique Carvalho Signed-off-by: Steve French --- fs/smb/client/smb2transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index e8eeff9e50d6..1143ee52470a 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -169,7 +169,9 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != tid) continue; + spin_lock(&tcon->tc_lock); ++tcon->tc_count; + spin_unlock(&tcon->tc_lock); trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, netfs_trace_tcon_ref_get_find_sess_tcon); return tcon; -- cgit v1.2.3 From 3da1fdf4efbc490041eb4f836bf596201203f8f2 Mon Sep 17 00:00:00 2001 From: Asim Viladi Oglu Manizada Date: Sat, 16 May 2026 21:15:39 +0000 Subject: smb: client: reject userspace cifs.spnego descriptions cifs.spnego key descriptions contain authority-bearing fields such as pid, uid, creduid, and upcall_target that cifs.upcall treats as kernel-originating inputs. However, userspace can also create keys of this type through request_key(2) or add_key(2), allowing those fields to be supplied without CIFS origin. Only accept cifs.spnego descriptions while CIFS is using its private spnego_cred to request the key. Fixes: f1d662a7d5e5 ("[CIFS] Add upcall files for cifs to use spnego/kerberos") Assisted-by: avom-custom-harness:gpt-5.5-qwen3.6-mod-mix Reviewed-by: David Howells Signed-off-by: Asim Viladi Oglu Manizada Signed-off-by: Steve French --- fs/smb/client/cifs_spnego.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 3a41bbada04c..44c407275680 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -40,12 +41,27 @@ cifs_spnego_key_destroy(struct key *key) kfree(key->payload.data[0]); } +static int +cifs_spnego_key_vet_description(const char *description) +{ + /* + * cifs.spnego descriptions are authority-bearing inputs to cifs.upcall. + * They are only valid when produced by CIFS while using the private + * spnego_cred installed below. Do not let userspace create this type + * of key through request_key(2)/add_key(2), since the helper treats + * pid/uid/creduid/upcall_target as kernel-originating fields. + */ + if (current_cred() != spnego_cred) + return -EPERM; + return 0; +} /* * keytype for CIFS spnego keys */ struct key_type cifs_spnego_key_type = { .name = "cifs.spnego", + .vet_description = cifs_spnego_key_vet_description, .instantiate = cifs_spnego_key_instantiate, .destroy = cifs_spnego_key_destroy, .describe = user_describe, -- cgit v1.2.3 From 457b046b7dfc0fd5b3437766a4ec43d16d02482c Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 18 May 2026 08:57:05 -0700 Subject: accel/amdxdna: Remove mmap and export support for ubuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ubuf pages should not be mmaped or exported. Remove the ubuf mmap callback and return -EOPNOTSUPP when exporting ubuf objects. ubuf vmap is also removed for there is not a real use case yet. Fixes: bd72d4acda10 ("accel/amdxdna: Support user space allocated buffer") Reviewed-by: Christian König Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260518155706.937461-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_gem.c | 9 ++++++- drivers/accel/amdxdna/amdxdna_gem.h | 2 ++ drivers/accel/amdxdna/amdxdna_ubuf.c | 50 ------------------------------------ 3 files changed, 10 insertions(+), 51 deletions(-) diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 238ee244d4a6..6e367ddb9e1b 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -490,6 +490,9 @@ static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + if (abo->private_buffer) + return ERR_PTR(-EOPNOTSUPP); + if (abo->dma_buf) { get_dma_buf(abo->dma_buf); return abo->dma_buf; @@ -685,6 +688,7 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create { struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_drm_va_tbl va_tbl; + struct amdxdna_gem_obj *abo; struct drm_gem_object *gobj; struct dma_buf *dma_buf; @@ -711,7 +715,10 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create dma_buf_put(dma_buf); - return to_xdna_obj(gobj); + abo = to_xdna_obj(gobj); + abo->private_buffer = true; + + return abo; } struct drm_gem_object * diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index 4fc48a1189d2..957305ccb485 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -54,6 +54,8 @@ struct amdxdna_gem_obj { /* True, if BO is managed by XRT, not application */ bool internal; + /* True, if BO is not exportable */ + bool private_buffer; }; #define to_gobj(obj) (&(obj)->base.base) diff --git a/drivers/accel/amdxdna/amdxdna_ubuf.c b/drivers/accel/amdxdna/amdxdna_ubuf.c index fb999aa25318..85390e3cc9f9 100644 --- a/drivers/accel/amdxdna/amdxdna_ubuf.c +++ b/drivers/accel/amdxdna/amdxdna_ubuf.c @@ -69,60 +69,10 @@ static void amdxdna_ubuf_release(struct dma_buf *dbuf) kfree(ubuf); } -static vm_fault_t amdxdna_ubuf_vm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct amdxdna_ubuf_priv *ubuf; - unsigned long pfn; - pgoff_t pgoff; - - ubuf = vma->vm_private_data; - pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; - - pfn = page_to_pfn(ubuf->pages[pgoff]); - return vmf_insert_pfn(vma, vmf->address, pfn); -} - -static const struct vm_operations_struct amdxdna_ubuf_vm_ops = { - .fault = amdxdna_ubuf_vm_fault, -}; - -static int amdxdna_ubuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma) -{ - struct amdxdna_ubuf_priv *ubuf = dbuf->priv; - - vma->vm_ops = &amdxdna_ubuf_vm_ops; - vma->vm_private_data = ubuf; - vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); - - return 0; -} - -static int amdxdna_ubuf_vmap(struct dma_buf *dbuf, struct iosys_map *map) -{ - struct amdxdna_ubuf_priv *ubuf = dbuf->priv; - void *kva; - - kva = vmap(ubuf->pages, ubuf->nr_pages, VM_MAP, PAGE_KERNEL); - if (!kva) - return -EINVAL; - - iosys_map_set_vaddr(map, kva); - return 0; -} - -static void amdxdna_ubuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map) -{ - vunmap(map->vaddr); -} - static const struct dma_buf_ops amdxdna_ubuf_dmabuf_ops = { .map_dma_buf = amdxdna_ubuf_map, .unmap_dma_buf = amdxdna_ubuf_unmap, .release = amdxdna_ubuf_release, - .mmap = amdxdna_ubuf_mmap, - .vmap = amdxdna_ubuf_vmap, - .vunmap = amdxdna_ubuf_vunmap, }; struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev, -- cgit v1.2.3 From eb359cc314365f50272883805adfe96b1e4ecefc Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 8 May 2026 12:21:20 +0530 Subject: drm/amdgpu/userq: use drm_exec in amdgpu_userq_fence_read_wptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To access the bo from vm mapping first lock the root bo and then the object bo of the mapping to make sure both locks are taken safely. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 3aab50410653fe7eb35eb6f9c2b27e3549ab09e6) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 55 ++++++++++++------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 53a8944bab05..a41fb72dba94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -370,51 +370,48 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; + struct drm_exec exec; u64 addr, *ptr; - int r; - - r = amdgpu_bo_reserve(queue->vm->root.bo, false); - if (r) - return r; + int ret; addr = queue->userq_prop->wptr_gpu_addr; addr &= AMDGPU_GMC_HOLE_MASK; - mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); - if (!mapping) { - amdgpu_bo_unreserve(queue->vm->root.bo); - DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); - return -EINVAL; - } + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; - bo = amdgpu_bo_ref(mapping->bo_va->base.bo); - amdgpu_bo_unreserve(queue->vm->root.bo); - r = amdgpu_bo_reserve(bo, true); - if (r) { - amdgpu_bo_unref(&bo); - DRM_ERROR("Failed to reserve userqueue wptr bo"); - return r; + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + ret = -EINVAL; + goto lock_error; + } + + ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; } - r = amdgpu_bo_kmap(bo, (void **)&ptr); - if (r) { + bo = mapping->bo_va->base.bo; + ret = amdgpu_bo_kmap(bo, (void **)&ptr); + if (ret) { DRM_ERROR("Failed mapping the userqueue wptr bo"); - goto map_error; + goto lock_error; } *wptr = le64_to_cpu(*ptr); amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - + drm_exec_fini(&exec); return 0; -map_error: - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - - return r; +lock_error: + drm_exec_fini(&exec); + return ret; } static void -- cgit v1.2.3 From be045c5c8305fca1214ebdd4b8433ac80635339b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 8 May 2026 15:58:09 +0530 Subject: drm/amdgpu/userq: pin mqd and fw object bo to avoid eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mqd and fw objects are queue core objects which should remain valid and never be unmapped and evicted for user queues to work properly. During eviction if these buffers are evicted the hw continue to use the invalid addresses and caused page faults and system hung. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit a3bbf32a336939a1d21b9561f8e53333b684b7ef) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 70d74f04d2dd..8841955927bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -504,16 +504,20 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, goto free_obj; } + r = amdgpu_bo_pin(userq_obj->obj, AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto unresv; + r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); if (r) { drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); - goto unresv; + goto unpin_bo; } r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); if (r) { drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); - goto unresv; + goto unpin_bo; } userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); @@ -521,11 +525,13 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, memset(userq_obj->cpu_ptr, 0, size); return 0; +unpin_bo: + amdgpu_bo_unpin(userq_obj->obj); unresv: amdgpu_bo_unreserve(userq_obj->obj); - free_obj: amdgpu_bo_unref(&userq_obj->obj); + return r; } @@ -533,6 +539,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj) { amdgpu_bo_kunmap(userq_obj->obj); + amdgpu_bo_unpin(userq_obj->obj); amdgpu_bo_unref(&userq_obj->obj); } -- cgit v1.2.3 From c8ed2de0f2ee842d108ef96c125e931ea82b453c Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 21 Apr 2026 12:39:54 +0200 Subject: drm/amdgpu: rework userq reset work handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is illegal to schedule reset work from another reset work! Fix this by scheduling the userq reset work directly on the work queue of the reset domain. Not fully tested, I leave that to the IGT test cases. Signed-off-by: Christian König Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher (cherry picked from commit fd9200ccefab94f27877d1943761d6b0ccbd89c8) --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 84 ++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 16 +++++- 4 files changed, 60 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8bc591deb546..fd50da4c7b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1190,7 +1190,6 @@ struct amdgpu_device { bool apu_prefer_gtt; bool userq_halt_for_enforce_isolation; - struct work_struct userq_reset_work; struct amdgpu_uid *uid_info; struct amdgpu_uma_carveout_info uma_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 66ca043658ff..1424c98d2006 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3787,7 +3787,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); - INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); amdgpu_coredump_init(adev); @@ -5478,7 +5477,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); #endif - cancel_work(&adev->userq_reset_work); + amdgpu_userq_mgr_cancel_reset_work(adev); if (adev->kfd.dev) cancel_work(&adev->kfd.reset_work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 8841955927bb..aa6d4c71fba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev, return false; } -static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev) -{ - if (amdgpu_device_should_recover_gpu(adev)) { - amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->userq_reset_work); - /* Wait for the reset job to complete */ - flush_work(&adev->userq_reset_work); - } -} - -static int -amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) +static void amdgpu_userq_mgr_reset_work(struct work_struct *work) { + struct amdgpu_userq_mgr *uq_mgr = + container_of(work, struct amdgpu_userq_mgr, + reset_work); struct amdgpu_device *adev = uq_mgr->adev; const int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, @@ -103,12 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) }; const int num_queue_types = ARRAY_SIZE(queue_types); bool gpu_reset = false; - int r = 0; - int i; + int i, r; if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); - return 0; + return; } /* @@ -116,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) * skip all reset detection logic */ if (!amdgpu_gpu_recovery) - return 0; + return; /* * Iterate through all queue types to detect and reset problematic queues @@ -141,10 +132,19 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) } } - if (gpu_reset) - amdgpu_userq_gpu_reset(adev); + if (gpu_reset) { + struct amdgpu_reset_context reset_context; - return r; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_USERQ; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static void amdgpu_userq_hang_detect_work(struct work_struct *work) @@ -153,7 +153,11 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work) container_of(work, struct amdgpu_usermode_queue, hang_detect_work.work); - amdgpu_userq_detect_and_reset_queues(queue->userq_mgr); + /* + * Don't schedule the work here! Scheduling or queue work from one reset + * handler to another is illegal if you don't take extra precautions! + */ + amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work); } /* @@ -182,8 +186,8 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - schedule_delayed_work(&queue->hang_detect_work, - msecs_to_jiffies(timeout_ms)); + queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work, + msecs_to_jiffies(timeout_ms)); } void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) @@ -1259,28 +1263,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); - amdgpu_userq_detect_and_reset_queues(uq_mgr); + amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain, + &uq_mgr->reset_work); + flush_work(&uq_mgr->reset_work); } return ret; } -void amdgpu_userq_reset_work(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - userq_reset_work); - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.src = AMDGPU_RESET_SRC_USERQ; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); -} - static void amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { @@ -1314,9 +1303,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f userq_mgr->file = file_priv; INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); return 0; } +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags, queue_id; + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, queue_id, queue) { + cancel_delayed_work(&queue->hang_detect_work); + cancel_work(&queue->userq_mgr->reset_work); + } + xa_unlock_irqrestore(xa, flags); +} + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) { cancel_delayed_work_sync(&userq_mgr->resume_work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 85f460e7c31b..49b33e2d6932 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -84,7 +84,13 @@ struct amdgpu_usermode_queue { u32 xcp_id; int priority; struct dentry *debugfs_queue; - struct delayed_work hang_detect_work; + + /** + * @hang_detect_work: + * + * Delayed work which runs when userq_fences time out. + */ + struct delayed_work hang_detect_work; struct kref refcount; struct list_head userq_va_list; @@ -116,6 +122,13 @@ struct amdgpu_userq_mgr { struct amdgpu_device *adev; struct delayed_work resume_work; struct drm_file *file; + + /** + * @reset_work: + * + * Reset work which is used when eviction fails. + */ + struct work_struct reset_work; atomic_t userq_count[AMDGPU_RING_TYPE_MAX]; }; @@ -134,6 +147,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev); void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); -- cgit v1.2.3 From 291df3dc7d10855c26841b8f42843bc996d097cf Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 12 May 2026 14:52:40 +0530 Subject: drm/amdgpu/userq: cancel reset work while tear down in progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While tear down of a userq_mgr is happening when all the queues are free we should cancel any reset work if pending before exiting. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 160164609f71f774c4f661227a9b7a370a86b112) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index aa6d4c71fba6..49e9c75d3151 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -1346,6 +1346,14 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) } xa_destroy(&userq_mgr->userq_xa); + + /* + * Drain any in-flight reset_work. By this point all queues are freed + * and userq_count is 0, so if reset_work starts now it exits early. + * We still need to wait in case it was already executing gpu_recover. + */ + cancel_work_sync(&userq_mgr->reset_work); + mutex_destroy(&userq_mgr->userq_mutex); } -- cgit v1.2.3 From 0be97436bf249503b2c5898e7459744962b70f15 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 12 May 2026 16:00:18 +0530 Subject: drm/amdgpu/userq: update the vm task info during signal ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pagefaults does not have process information correctly populated as vm->task is not set during vm_init but should be updated while real submission. So setting that up during signal_ioctl to get the correct submission process details. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit a9b14d88b4d83e21ab965f23d1fb7b07b87e0517) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 49e9c75d3151..f79e54e0a04a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -802,6 +802,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto clean_fence_driver; } + /* Update VM owner at userq submit-time for page-fault attribution. */ + amdgpu_vm_set_task_info(&fpriv->vm); + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); /* don't map the queue if scheduling is halted */ -- cgit v1.2.3 From b6074630a461b1322a814988779005cbc43612ea Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Fri, 1 May 2026 12:35:48 +0800 Subject: drm/amdgpu/vpe: Force collaborate sync after TRAP VPE1 could possibly hang and fail to power off at the end of commands in collaboration mode. This workaround adds a COLLAB_SYNC after TRAP to force instances synchronized to avoid VPE1 fail to power off. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Alan liu Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/5171 Signed-off-by: Alex Deucher (cherry picked from commit a8b749c5c5afb7e5daa2bfb95d958fb3c6b8f055) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index fd881388d612..f27f917e3cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, amdgpu_ring_write(ring, 0); } + /* WA: Force sync after TRAP to avoid VPE1 fail to power off */ + if (ring->adev->vpe.collaborate_mode) { + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0)); + amdgpu_ring_write(ring, 0xabcd); + } } static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .emit_frame_size = 5 + /* vpe_ring_init_cond_exec */ 6 + /* vpe_ring_emit_pipeline_sync */ - 10 + 10 + 10 + /* vpe_ring_emit_fence */ + 12 + 12 + 12 + /* vpe_ring_emit_fence */ /* vpe_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, -- cgit v1.2.3 From 2c34c7b88ba4f04b3a862fac879bfd8567f06541 Mon Sep 17 00:00:00 2001 From: Amir Shetaia Date: Thu, 7 May 2026 13:24:55 -0400 Subject: drm/amdgpu: reject non-user addresses early in GEM_USERPTR ioctl amdgpu_gem_userptr_ioctl() currently accepts any value of args->addr and only discovers an out-of-range pointer much later, inside amdgpu_gem_object_create() and the HMM mirror registration path. Userspace can drive that path with kernel-side virtual addresses; the get_user_pages() layer rejects them, but only after the driver has already allocated a GEM object and started wiring up notifier state that then has to be torn down on failure. Add an access_ok() guard at the top of the ioctl, right after the existing page-alignment check and before flag validation, so any address that does not lie within the calling task's user address range is rejected with -EFAULT before any allocation occurs. No legitimate ROCm/HSA userspace passes kernel-mode pointers through this interface, so this is defense-in-depth rather than a behaviour change for valid callers; -EFAULT matches the convention already used by other uaccess-style rejections in the kernel. Also add an explicit #include ; access_ok() is otherwise only available transitively through other headers in this translation unit. Signed-off-by: Amir Shetaia Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 7a076df36397d780d7e4fb595287b4980451a7f5) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5376035d32fe..23f2304ee7e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -508,6 +509,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (offset_in_page(args->addr | args->size)) return -EINVAL; + if (!access_ok((void __user *)(uintptr_t)args->addr, args->size)) + return -EFAULT; + /* reject unknown flag values */ if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY | AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE | -- cgit v1.2.3 From d892a6eca7a847dd57ede6e55d7494a01b43fa5f Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 12 May 2026 22:29:48 +0530 Subject: drm/amdgpu: remove va cursors for all mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit va_cursor struct needs to be cleaned even if the mapping has been removed already. Also simplify it by make it a void function as return value check isn't needed as its called during tear down. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 4d35a45c9b4c1ac5b6e3219f83c3db706b675fa2) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index f79e54e0a04a..6111f6858d2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -303,13 +303,14 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, struct amdgpu_userq_va_cursor *va_cursor) { - atomic_set(&mapping->bo_va->userq_va_mapped, 0); + if (mapping) + atomic_set(&mapping->bo_va->userq_va_mapped, 0); list_del(&va_cursor->list); kfree(va_cursor); } -static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *queue) +static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_va_cursor *va_cursor, *tmp; struct amdgpu_bo_va_mapping *mapping; @@ -319,15 +320,11 @@ static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); - if (!mapping) { - return -EINVAL; - } - dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", - queue, va_cursor->gpu_addr); + if (mapping) + dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", + queue, va_cursor->gpu_addr); amdgpu_userq_buffer_va_list_del(mapping, va_cursor); } - - return 0; } static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) -- cgit v1.2.3 From 9baf02bf88a8228248c546a3be7bbe3bfe74b7d5 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 12 May 2026 20:29:52 +0530 Subject: drm/amdgpu: Fix discovery offset check under VF Discovery table may be kept at offset 0 by host driver. Remove the validation check. Fixes: 01bdc7e219c4 ("drm/amdgpu: New interface to get IP discovery binary v3") Signed-off-by: Lijo Lazar Reviewed-by: Ellen Pan Signed-off-by: Alex Deucher (cherry picked from commit d3f5bbd007133c64a20e81ef290a93e46c75df40) --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8d99bfaa498f..80efeca0ab73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -304,7 +304,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; adev->discovery.size = adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; - if (!adev->discovery.offset || !adev->discovery.size) + if (!adev->discovery.size) return -EINVAL; } else { goto out; -- cgit v1.2.3 From d796558def777f9a9cc274861e06b8b61851b409 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Sat, 9 May 2026 15:20:39 +0800 Subject: drm/amd/pm: fix memleak of dpm_policies on smu v15 In smu_v15_0_fini_smc_tables, dpm_policies was not freed or NULLed, causing a memory leak. Add kfree() and NULL assignment to properly release memory and avoid dangling pointers. Fixes: 2beedc3a92b7 ("drm/amd/pm: Add initial support for smu v15_0_8"); Signed-off-by: Yang Wang Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher (cherry picked from commit 014f329074f688b9b49383e8b70e79e9ef99359e) --- drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c index c3cb36813806..940b43105817 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c @@ -435,10 +435,12 @@ int smu_v15_0_fini_smc_tables(struct smu_context *smu) smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; + kfree(smu_dpm->dpm_policies); kfree(smu_dpm->dpm_context); kfree(smu_dpm->golden_dpm_context); kfree(smu_dpm->dpm_current_power_state); kfree(smu_dpm->dpm_request_power_state); + smu_dpm->dpm_policies = NULL; smu_dpm->dpm_context = NULL; smu_dpm->golden_dpm_context = NULL; smu_dpm->dpm_context_size = 0; -- cgit v1.2.3 From 48b13bfbdf94e683cc5b8c5cb35b5af4221e657f Mon Sep 17 00:00:00 2001 From: Sunday Clement Date: Wed, 13 May 2026 11:22:19 -0400 Subject: drm/amdkfd: Fix OOB memory exposure in get_wave_state() The get_wave_state() function for v9 trusts cp_hqd_cntl_stack_size and cp_hqd_cntl_stack_offset values read directly from the MQD, which are written by GPU microcode and fully attacker-controlled on the CRIU-restore path (via AMDKFD_IOC_RESTORE_PROCESS with H3). this leads to an unbounded copy_to_user() that can leak adjacent GTT/kernel memory. If offset > size, integer underflow produces a ~4 GiB read length, if size is set to 1 MiB against a 4 KiB allocation, we leak 1 MiB of adjacent kernel memory (other queues' MQDs, ring buffers, KASLR pointers). Fix by clamping both cp_hqd_cntl_stack_size to the actual allocated buffer size (q->ctl_stack_size) and cp_hqd_cntl_stack_offset to the clamped size before performing arithmetic and copy_to_user(). This ensures we never read beyond the allocated kernel BO regardless of attacker-supplied MQD field values. Signed-off-by: Sunday Clement Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 7ef144458f48d5589e36f1b3d83e83db2e5c5ba5) --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index e8f97de9d6e4..f6d9d81003dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -364,11 +364,15 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, { struct v9_mqd *m; struct kfd_context_save_area_header header; + u32 cntl_stack_size; + u32 cntl_stack_offset; /* Control stack is located one page after MQD. */ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE); m = get_mqd(mqd); + cntl_stack_size = min_t(u32, m->cp_hqd_cntl_stack_size, q->ctl_stack_size); + cntl_stack_offset = min_t(u32, m->cp_hqd_cntl_stack_offset, cntl_stack_size); *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; @@ -384,9 +388,10 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state))) return -EFAULT; - if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset, - mqd_ctl_stack + m->cp_hqd_cntl_stack_offset, - *ctl_stack_used_size)) + *ctl_stack_used_size = cntl_stack_size - cntl_stack_offset; + + if (copy_to_user(ctl_stack + cntl_stack_offset, mqd_ctl_stack + cntl_stack_offset, + *ctl_stack_used_size)) return -EFAULT; return 0; -- cgit v1.2.3 From 4d798ea0712fddbd35b439cef32b8ac735eb76f9 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:08 +0200 Subject: drm/amdgpu: Align amdgpu_gtt_mgr entries to TLB size on Tahiti (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TLB is organized in groups of 8 entries, each one is 4K. On Tahiti, the HW requires these GART entries to be 32K-aligned. This fixes a VCE 1 firmware validation failure that can happen after suspend/resume since we use amdgpu_gtt_mgr for VCE 1. v2: - Change variable declaration order - Add comment about "V bit HW bug" Fixes: 698fa62f56aa ("drm/amdgpu: Add helper to alloc GART entries") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 530411b465ef0b2c0cc18c2e3d7e38422b1117d1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 620fddde4c4d..a5d26b943f6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -199,11 +199,18 @@ int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, enum drm_mm_insert_mode mode) { struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); + u32 alignment = 0; int r; + /* Align to TLB L2 cache entry size to work around "V bit HW bug" */ + if (adev->asic_type == CHIP_TAHITI) { + alignment = 32 * 1024 / AMDGPU_GPU_PAGE_SIZE; + num_pages = ALIGN(num_pages, alignment); + } + spin_lock(&mgr->lock); r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, - 0, GART_ENTRY_WITHOUT_BO_COLOR, 0, + alignment, GART_ENTRY_WITHOUT_BO_COLOR, 0, adev->gmc.gart_size >> PAGE_SHIFT, mode); spin_unlock(&mgr->lock); -- cgit v1.2.3 From 9f907adb66d8369dd45412794a04845011503fa8 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:09 +0200 Subject: drm/amdgpu/vce1: Check that the GPU address is < 128 MiB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ensuring the low 32-bit address, make sure it is less than 128 MiB, otherwise the VCE seems to fail to initialize. This seems to be an undocumented limitation of the firmware validation mechanism. Note that in case of VCE1 the BAR address is zero and we can't change it also due to the firmware validator. When programming the mmVCE_VCPU_CACHE_OFFSETn registers, don't AND them with a mask. This is incorrect because the register mask is actually 0x0fffffff and useless because we already ensure the addresses are below the limit. Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit e729ae5f3ac73c861c062080ac8c3d666c972404) --- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 5b7b46d242c6..edabec442cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -313,17 +313,17 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev) offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V1_0_FW_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); offset += size; size = VCE_V1_0_STACK_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset); WREG32(mmVCE_VCPU_CACHE_SIZE1, size); offset += size; size = VCE_V1_0_DATA_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset); WREG32(mmVCE_VCPU_CACHE_SIZE2, size); WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); @@ -527,11 +527,15 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) * To accomodate that, we put GART to the LOW address range * and reserve some GART pages where we map the VCPU BO, * so that it gets a 32-bit address. + * + * The BAR address is zero and we can't change it + * due to the firmware validation mechanism. + * It seems that it fails to initialize if the address is >= 128 MiB. */ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) { u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); - u64 max_vcpu_bo_addr = 0xffffffff - bo_size; + u64 max_vcpu_bo_addr = 0x07ffffff - bo_size; u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE; u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID; -- cgit v1.2.3 From d993851b6db9abf0840e8b100e33df232bcc879e Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:10 +0200 Subject: drm/amdgpu/vce1: Remove superfluous address check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same thing is already checked a few lines above. Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit c1dc555e760dbfc4a4710f7270f525a03d433af8) --- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index edabec442cb6..884f24be3685 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -557,8 +557,6 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start, num_pages, flags, adev->gart.ptr); adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs; - if (adev->vce.gpu_addr > max_vcpu_bo_addr) - return -EINVAL; return 0; } -- cgit v1.2.3 From 12b60cf345e84aa7546dd225b3ce3380c9ab97f5 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:11 +0200 Subject: drm/amdgpu/vce1: Check if VRAM address is lower than GART. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, I had assumed this was not possible so it was OK to not handle it, but now we got a report from a user who has a board that is configured this way. When the VCPU BO is already located in a low 32-bit address in VRAM (eg. when VRAM is mapped to the low address space), don't do the workaround. Fixes: 71aec08f80e7 ("amdgpu/vce: use amdgpu_gtt_mgr_alloc_entries") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit f370ec9b164698a9ca1a7b59bfbea07f70df769d) --- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 884f24be3685..a49f11be74b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -542,6 +542,9 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) u64 vce_gart_start_offs; int r; + if (adev->gmc.vram_start < adev->gmc.gart_start) + return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL; + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, &adev->vce.gart_node, num_pages, DRM_MM_INSERT_LOW); -- cgit v1.2.3 From 3ebcab11320588fd9bb17a26027dddce4419ae43 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:12 +0200 Subject: drm/amdgpu/vce1: Don't repeat GTT MGR node allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only allocate entries from the GTT manager when the VCE GTT node is not allocated yet. This prevents the possibility of allocating them multiple times, which causes issues during GPU reset and suspend/resume. Fixes: 71aec08f80e7 ("amdgpu/vce: use amdgpu_gtt_mgr_alloc_entries") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 8d2a20c1721cb17e22821e1b4ecbb02d475d91c5) --- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index a49f11be74b2..92c3cf3fce4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -545,11 +545,13 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) if (adev->gmc.vram_start < adev->gmc.gart_start) return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL; - r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, - &adev->vce.gart_node, num_pages, - DRM_MM_INSERT_LOW); - if (r) - return r; + if (!drm_mm_node_allocated(&adev->vce.gart_node)) { + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, + &adev->vce.gart_node, num_pages, + DRM_MM_INSERT_LOW); + if (r) + return r; + } vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node); -- cgit v1.2.3 From 3e5a1d5bb2ff061e64c7992f8e5404dfd4c2d0f3 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:13 +0200 Subject: drm/amdgpu/vce1: Fix VCE 1 firmware size and offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VCPU BO contains the actual FW at an offset, but it was not calculated into the VCPU BO size. Subtract this from the FW size to make sure there is no out of bounds access. Make sure the stack and data offsets are aligned to the 32K TLB size. Check that the FW microcode actually fits in the space that is reserved for it. Fixes: d4a640d4b9f3 ("drm/amdgpu/vce1: Implement VCE1 IP block (v2)") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit c16fe59f622a080fc457a57b3e8f14c780699449) --- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 92c3cf3fce4f..32ee6452f95d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -42,9 +42,10 @@ #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" +#define VCE_V1_0_ALIGNMENT (32 * 1024) #define VCE_V1_0_FW_SIZE (256 * 1024) #define VCE_V1_0_STACK_SIZE (64 * 1024) -#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) +#define VCE_V1_0_DATA_SIZE (ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT)) #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); @@ -189,17 +190,22 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) { const struct common_firmware_header *hdr; struct vce_v1_0_fw_signature *sign; - unsigned int ucode_offset; + u32 ucode_offset; + u32 ucode_size; uint32_t chip_id; u32 *cpu_addr; int i; hdr = (const struct common_firmware_header *)adev->vce.fw->data; ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *); cpu_addr = adev->vce.cpu_addr; sign = (void *)adev->vce.fw->data + ucode_offset; + if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET) + return -EINVAL; + switch (adev->asic_type) { case CHIP_TAHITI: chip_id = 0x01000014; @@ -231,7 +237,7 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64); memset_io(&cpu_addr[5], 0, 44); - memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign)); + memcpy_toio(&cpu_addr[16], &sign[1], ucode_size); cpu_addr += (le32_to_cpu(sign->length) + 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16); @@ -312,17 +318,22 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES); offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V1_0_FW_SIZE; + size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); offset += size; size = VCE_V1_0_STACK_SIZE; + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset); WREG32(mmVCE_VCPU_CACHE_SIZE1, size); offset += size; size = VCE_V1_0_DATA_SIZE; + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo)); WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset); WREG32(mmVCE_VCPU_CACHE_SIZE2, size); -- cgit v1.2.3 From f5a247e0377cd30d658d52ae8a70c82978cc37df Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:14 +0200 Subject: drm/amdgpu/vce1: Stop using amdgpu_vce_resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VCE1 firmware works slightly differently and is already loaded by vce_v1_0_load_fw(). It doesn't actually need to call amdgpu_vce_resume(). Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 33d8951405e2dd81ac61edebc680e2dfb6b4fc9f) --- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 32ee6452f95d..93253db5e2de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -178,7 +178,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) } /** - * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO + * vce_v1_0_load_fw() - load firmware signature into VCPU BO * * @adev: amdgpu_device pointer * @@ -186,7 +186,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) * This function finds the signature appropriate for the current * ASIC and writes that into the VCPU BO. */ -static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) +static int vce_v1_0_load_fw(struct amdgpu_device *adev) { const struct common_firmware_header *hdr; struct vce_v1_0_fw_signature *sign; @@ -232,6 +232,8 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) return -EINVAL; } + memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo)); + cpu_addr += (256 - 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16); cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64); @@ -592,10 +594,7 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); @@ -714,10 +713,7 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); -- cgit v1.2.3 From 5dc3d16cd072a3b8595f430b6683d688d1d62f37 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:15 +0200 Subject: drm/amdgpu/vce2: Fix VCE 2 firmware size and offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VCPU BO contains the actual FW at an offset, but it was not calculated into the VCPU BO size. Subtract this from the FW size to make sure there is no out of bounds access. Additionally, increase the VCE_V2_0_DATA_SIZE to have extra space after the VCE handles. Also increase the data size used for each VCE handle. The FW needs 23744 bytes, use 24K to be safe. This fixes VM faults when using VCE 2. Cc: John Olender Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4802 Fixes: e98226221467 ("drm/amdgpu: recalculate VCE firmware BO size") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit a20d21df625548c1738c0745f753c5d6eb823bc3) --- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index db149eda6204..3a6fc8604108 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -37,9 +37,14 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" + +/* Use 24K to be safe. The FW supposedly only requires 23744 bytes. */ +#define VCE_V2_0_DATA_ENTRY_SIZE (24 * 1024) + #define VCE_V2_0_FW_SIZE (256 * 1024) #define VCE_V2_0_STACK_SIZE (64 * 1024) -#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES) +#define VCE_V2_0_DATA_SIZE (VCE_V2_0_DATA_ENTRY_SIZE * (AMDGPU_MAX_VCE_HANDLES + 1)) + #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev); @@ -183,7 +188,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V2_0_FW_SIZE; + size = VCE_V2_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); -- cgit v1.2.3 From 0c61a9732a35b0a96213119c8212349da9cda117 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 13 May 2026 22:04:16 +0200 Subject: drm/amdgpu/vce3: Fix VCE 3 firmware size and offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VCPU BO contains the actual FW at an offset, but it was not calculated into the VCPU BO size. Subtract this from the FW size to make sure there is no out of bounds access. This may fix VM faults when using VCE 3. Cc: John Olender Fixes: e98226221467 ("drm/amdgpu: recalculate VCE firmware BO size") Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 15c369257bd85f47a514744f960c5a51c867716f) --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 03d79e464f04..c69f7d82060f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -574,7 +574,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) } else WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V3_0_FW_SIZE; + size = VCE_V3_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); -- cgit v1.2.3 From a1d4b228e3dc5134c4bd06e55e81dbb604c8cadb Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 12 May 2026 15:15:33 -0400 Subject: drm/amdkfd: Check bounds on allocate_doorbell allocated_doorbell has an option to set the doorbell id to a specific value (used by CRIU). This value was not bounds checked. Check to confirm it's less than KFD_MAX_NUM_OF_QUEUES_PER_PROCESS. Signed-off-by: David Francis Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher (cherry picked from commit 1f087bb8cf9e8797633da35c85435e557ef74d06) --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 9185ebe4c079..c9239728afd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -475,6 +475,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd, } else { /* For CP queues on SOC15 */ if (restore_id) { + if (*restore_id >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + return -EINVAL; + /* make sure that ID is free */ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) return -EINVAL; -- cgit v1.2.3 From 0978406224d21bd35f9230a25534849ec06bf74c Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Thu, 14 May 2026 12:31:00 +0530 Subject: drm/amdgpu: use atomic operation to achieve lockless serialization In amdgpu_seq64_alloc there is a possibility that two difference cores from two separate NODES can try to and could get the same free slot. So this fixes that race here using atomic test_and_set clear operations. Signed-off-by: Sunil Khatri Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 4d50a14d346141e03a7c3905e496d91e048bc30c) --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index a0b479d5fff1..f4be19223588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -175,11 +175,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, { unsigned long bit_pos; - bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); - if (bit_pos >= adev->seq64.num_sem) - return -ENOSPC; + for (;;) { + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; - __set_bit(bit_pos, adev->seq64.used); + if (!test_and_set_bit(bit_pos, adev->seq64.used)) + break; + } *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); @@ -205,7 +208,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); if (bit_pos < adev->seq64.num_sem) - __clear_bit(bit_pos, adev->seq64.used); + clear_bit(bit_pos, adev->seq64.used); } /** -- cgit v1.2.3 From 6dc2c49a705195c89b09b134d0bc4dc5e42d1fea Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 12 May 2026 15:18:18 -0400 Subject: drm/amdkfd: Check bounds for allocate_sdma_queue restore_sdma_id allocate_sdma_queue has an option where the sdma queue id can be specified (used by CRIU). We weren't bounds-checking that value. Confirm it's less than the maximum number of queues. Signed-off-by: David Francis Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher (cherry picked from commit bfe9a7545b2a7be1c543f1741e16f2d5ec4116ae) --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c9239728afd6..e0a31e11f0ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1590,6 +1590,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, } if (restore_sdma_id) { + if (*restore_sdma_id >= get_num_sdma_queues(dqm)) + return -EINVAL; + /* Re-use existing sdma_id */ if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { dev_err(dev, "SDMA queue already in use\n"); @@ -1616,6 +1619,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, return -ENOMEM; } if (restore_sdma_id) { + if (*restore_sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return -EINVAL; + /* Re-use existing sdma_id */ if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { dev_err(dev, "SDMA queue already in use\n"); -- cgit v1.2.3 From cd86529ec61474a38c3837fb7823790a7c3f8cce Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 4 May 2026 11:14:45 -0400 Subject: drm/amd/display: Fix integer overflow in bios_get_image() [Why&How] The bounds check in bios_get_image() computes 'offset + size' using unsigned 32-bit arithmetic before comparing against bios_size. If a VBIOS image contains a near-UINT32_MAX offset the addition wraps to a small value, the comparison passes, and the function returns a wild pointer past the VBIOS mapping. Additionally, the comparison uses '<' (strict), which incorrectly rejects the valid exact-fit case where offset + size == bios_size. Fix both issues by restructuring the check to avoid the addition entirely: first reject if offset alone exceeds bios_size, then check size against the remaining space (bios_size - offset). This eliminates the overflow and correctly permits exact-fit accesses. Assisted-by: GitHub Copilot:claude-opus-4.6 Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d40fb392af659c4a02b560319f226842f6ec1a95) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index 8d2cf95ae739..e00dc05c2d9d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -37,10 +37,13 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset, uint32_t size) { - if (bp->bios && offset + size < bp->bios_size) - return bp->bios + offset; - else + if (!bp->bios) return NULL; + + if (offset > bp->bios_size || size > bp->bios_size - offset) + return NULL; + + return bp->bios + offset; } #include "reg_helper.h" -- cgit v1.2.3 From 86d2b20644b11d21fe52c596e6e922b4590a3e3f Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 4 May 2026 16:14:11 -0400 Subject: drm/amd/display: Validate GPIO pin LUT table size before iterating [Why&How] The GPIO pin table parsers in get_gpio_i2c_info() and bios_parser_get_gpio_pin_info() derive an element count from the VBIOS table_header.structuresize field, then iterate over gpio_pin[] entries. However, GET_IMAGE() only validates that the table header itself fits within the BIOS image. If the VBIOS reports a structuresize larger than the actual mapped data, the loop reads past the end of the BIOS image, causing an out-of-bounds read. Fix this by calling bios_get_image() to validate that the full claimed structuresize is accessible within the BIOS image before entering the loop in both functions. Assisted-by: GitHub Copilot:claude-opus-4-6 Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit ba5e95b43b773ae1bf1f66ee6b31eb774e65afe3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index a1c08e1cc411..c51c4b2c6fae 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -493,6 +493,10 @@ static enum bp_result get_gpio_i2c_info( - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + pin = (struct atom_gpio_pin_assignment *) header->gpio_pin; for (table_index = 0; table_index < count; table_index++) { @@ -681,6 +685,11 @@ static enum bp_result bios_parser_get_gpio_pin_info( count = (le16_to_cpu(header->table_header.structuresize) - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + for (i = 0; i < count; ++i) { if (header->gpio_pin[i].gpio_id != gpio_id) continue; -- cgit v1.2.3 From 6c92f6d9600efa3ef0d9e560a2b52776d9803c29 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 7 May 2026 16:26:31 -0400 Subject: drm/amd/display: Validate payload length and link_index in dc_process_dmub_aux_transfer_async [Why&How] dc_process_dmub_aux_transfer_async() copies payload->length bytes into a 16-byte stack buffer (dpaux.data[16]) guarded only by an ASSERT(), which is a no-op in release builds. If a caller ever passes length > 16 this results in a stack buffer overflow via memcpy. Additionally, link_index is used to dereference dc->links[] without bounds checking against dc->link_count, risking an out-of-bounds access. Replace the ASSERT with a hard runtime check that returns false when payload->length exceeds the destination buffer size, and add a bounds check for link_index before it is used. Assisted-by: GitHub Copilot:Claude claude-4-opus Reviewed-by: Alex Hung Signed-off-by: Harry Wentland Signed-off-by: Ivan Lipski Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit ba4caa9fecdf7a38f98c878ad05a8a64148b6881) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 419f894c87b0..b3530fbf32f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6071,7 +6071,11 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, uint8_t action; union dmub_rb_cmd cmd = {0}; - ASSERT(payload->length <= 16); + if (link_index >= dc->link_count || !dc->links[link_index]) + return false; + + if (payload->length > sizeof(cmd.dp_aux_access.aux_control.dpaux.data)) + return false; cmd.dp_aux_access.header.type = DMUB_CMD__DP_AUX_ACCESS; cmd.dp_aux_access.header.payload_bytes = 0; -- cgit v1.2.3 From 353f7430d1eccd481cc089decd1fc377d4312f4a Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Mon, 11 May 2026 22:14:23 +0800 Subject: drm/amdgpu: unmap all user mappings of framebuffer and doorbell before mode1 reset During Mode 1 reset, the ASIC undergoes a reset cycle and becomes temporarily inaccessible via PCIe. Any attempt to access framebuffer or MMIO registers during this window can result in uncompleted PCIe transactions, leading to NMI panics or system hangs. To prevent this, Unmap all of the applications mappings of the framebuffer and doorbell BARs before mode1 reset. Also prevent new mappings from coming in during the reset process. v2: remove inode in kfd_dev (Christian) v3: correct unmap offset (Felix), remove prevent new mappings part to avoid deadlock (Christian) Reviewed-by: Felix Kuehling Signed-off-by: Yifan Zhang Signed-off-by: Alex Deucher (cherry picked from commit 70cadefcc6160c575b04f763ada34c20e868d577) --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 5 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d9e283f3b57d..9783a3cefb04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,9 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" +#if IS_ENABLED(CONFIG_HSA_AMD) +#include "kfd_priv.h" +#endif /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -320,6 +323,28 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) (void)amdgpu_reset_domain_schedule(adev->reset_domain, &adev->kfd.reset_work); } +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_HSA_AMD) + struct kfd_dev *kfd = adev->kfd.dev; + unsigned int i; + + if (!kfd) + return; + + for (i = 0; i < kfd->num_nodes; i++) { + struct kfd_node *node = kfd->nodes[i]; + + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_DOORBELL | + KFD_MMAP_GPU_ID(node->id), + kfd_doorbell_process_slice(kfd)); + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_MMIO | + KFD_MMAP_GPU_ID(node->id), + PAGE_SIZE); + } +#endif +} + int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size, u32 domain, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool cp_mqd_gfx9) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cdbab7f8cee8..2b4108f83f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -358,6 +358,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev); u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1424c98d2006..feab90e3efd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5835,6 +5835,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); + /* unmap all the mappings of doorbell and framebuffer to prevent user space from + * accessing them + */ + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); + amdgpu_amdkfd_clear_kfd_mapping(adev); + amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (need_emergency_restart) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f95bf6d95534..03b266b26738 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -67,6 +67,21 @@ static const struct class kfd_class = { .name = kfd_dev_name, }; +/* + * Cache the address space of the chardev on first open so that the reset + * path can drop all userspace mappings of doorbell and MMIO ranges via + * unmap_mapping_range(). + */ +static struct address_space *kfd_dev_mapping; + +void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen) +{ + struct address_space *mapping = READ_ONCE(kfd_dev_mapping); + + if (mapping) + unmap_mapping_range(mapping, holebegin, holelen, 1); +} + static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) { struct kfd_process_device *pdd; @@ -133,6 +148,13 @@ static int kfd_open(struct inode *inode, struct file *filep) if (iminor(inode) != 0) return -ENODEV; + /* + * /dev/kfd is a single chardev so all opens share one inode. Cache + * its address_space on the first open for use by the reset path. + */ + if (!READ_ONCE(kfd_dev_mapping)) + cmpxchg(&kfd_dev_mapping, NULL, inode->i_mapping); + is_32bit_user_mode = in_compat_syscall(); if (is_32bit_user_mode) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7b5b12206919..d5b07789eda4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -395,6 +395,7 @@ enum kfd_mempool { /* Character device interface */ int kfd_chardev_init(void); void kfd_chardev_exit(void); +void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen); /** * enum kfd_unmap_queues_filter - Enum for queue filters. -- cgit v1.2.3 From 893fea60f8393fc99fa522f1718690421a5f9951 Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Mon, 11 May 2026 21:28:59 +0800 Subject: drm/amd/ras: Fix UMC error address allocation leak amdgpu_umc_handle_bad_pages() allocates err_data->err_addr before querying UMC error information. In the direct and firmware query paths, the pointer is reassigned to a fresh allocation before the original buffer is released, so the initial allocation is leaked on each handled event. Free the existing buffer before replacing it in those query paths so the function exit cleanup only owns the active allocation. Signed-off-by: Xiang Liu Reviewed-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher (cherry picked from commit 911b1bdd22c3712a22b60fcc58f7b9f2d07b0803) --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0238c2798de4..b8ed931f8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -130,6 +130,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); @@ -160,6 +161,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); -- cgit v1.2.3 From cd7cfcdb4dd45c79ef6825a3dc918782f604f5ed Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Mon, 11 May 2026 18:04:57 +0800 Subject: drm/amdgpu: avoid integer overflow in VA range check The original addition operation in 64-bit unsigned type may encounter overflow situations. To prevent such issues and safely reject invalid inputs, the check_add_overflow() function is used. Signed-off-by: Ce Sun Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher (cherry picked from commit cc768f4dd0bb9083c813683eeec44fc23921f771) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 23f2304ee7e0..123d4a09114d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -825,7 +825,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *timeline_syncobj = NULL; struct dma_fence_chain *timeline_chain = NULL; struct drm_exec exec; - uint64_t vm_size; + uint64_t vm_size, tmp; int r = 0; /* Validate virtual address range against reserved regions. */ @@ -849,7 +849,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_TOP; - if (args->va_address + args->map_size > vm_size) { + if (check_add_overflow(args->va_address, args->map_size, &tmp) || tmp > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", args->va_address + args->map_size, vm_size); -- cgit v1.2.3 From b6a28b77b88e776a8cc7739005718e03c4c9be57 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 13 May 2026 13:29:35 +0530 Subject: drm/amdgpu: userq_va_mapped should remain true once done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple queues needs these bo_va objects belonging to the same uq_mgr. So once they are mapped lets not unmap them as at any point of time any of the queues might be using it. Also userq_va_mapped should be a boolean than atomic. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 5c02889ea22575c3bcfdf212e65fac316cbc6c6a) --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 16 ++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 912c9afaf9e1..4d68732d6223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -96,7 +96,8 @@ struct amdgpu_bo_va { * if non-zero, cannot unmap from GPU because user queues may still access it */ unsigned int queue_refcount; - atomic_t userq_va_mapped; + /* Indicates if this buffer is mapped for any user queue. Once set, never reset. */ + bool userq_va_mapped; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 6111f6858d2d..f677a9f2d4ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -227,7 +227,7 @@ static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, INIT_LIST_HEAD(&va_cursor->list); va_cursor->gpu_addr = addr; - atomic_set(&va_map->bo_va->userq_va_mapped, 1); + va_map->bo_va->userq_va_mapped = true; list_add(&va_cursor->list, &queue->userq_va_list); return 0; @@ -274,7 +274,7 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) dma_resv_assert_held(vm->root.bo->tbo.base.resv); mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); - if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped)) + if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->userq_va_mapped) r = true; else r = false; @@ -300,15 +300,6 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) return false; } -static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, - struct amdgpu_userq_va_cursor *va_cursor) -{ - if (mapping) - atomic_set(&mapping->bo_va->userq_va_mapped, 0); - list_del(&va_cursor->list); - kfree(va_cursor); -} - static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue) { @@ -323,7 +314,8 @@ static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, if (mapping) dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", queue, va_cursor->gpu_addr); - amdgpu_userq_buffer_va_list_del(mapping, va_cursor); + list_del(&va_cursor->list); + kfree(va_cursor); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ba9de16a27a..fccd758b6699 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2002,7 +2002,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * during user requests GEM unmap IOCTL except for forcing the unmap * from user space. */ - if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) + if (unlikely(bo_va->userq_va_mapped)) amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); list_del(&mapping->list); -- cgit v1.2.3 From d093c01d30fc32cb82458731c33fa2a787ff1e7b Mon Sep 17 00:00:00 2001 From: Vitaliy Triang3l Kuzmin Date: Sat, 16 May 2026 00:48:32 +0300 Subject: drm/radeon/evergreen_cs: Add missing NULL prefix check in surface check 'evergreen_surface_check' is called with a NULL warning prefix when handling potentially recoverable issues or just to compute the alignment requirements, and 'evergreen_surface_check' is called again in case of failure (with the correct prefix, as opposed to NULL), therefore, the initial check must not print a warning, because the surface may be accepted successfully after having been corrected, however if it isn't, the final check will print the warning anyway. The surface check functions specific to array modes already implement this behavior, but the 'evergreen_surface_check' function itself doesn't. This is also supposed to fix the "'%s' directive argument is null [-Werror=format-overflow=]" compiler warning. Fixes: 285484e2d55e ("drm/radeon: add support for evergreen/ni tiling informations v11") Reported-by: Arnd Bergmann Signed-off-by: Vitaliy Triang3l Kuzmin Signed-off-by: Alex Deucher (cherry picked from commit e20ea411c99f6968af35fd03e9ee21f70d799144) --- drivers/gpu/drm/radeon/evergreen_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 3142ef4da7f4..9196f85db9ce 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -312,8 +312,10 @@ static int evergreen_surface_check(struct radeon_cs_parser *p, case ARRAY_2D_TILED_THIN1: return evergreen_surface_check_2d(p, surf, prefix); default: - dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", - __func__, __LINE__, prefix, surf->mode); + if (prefix) { + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); + } return -EINVAL; } return -EINVAL; -- cgit v1.2.3 From d4d76c9ee1997cc8c977a63f6c43551c253c1066 Mon Sep 17 00:00:00 2001 From: Jeremy Erazo Date: Fri, 15 May 2026 19:31:41 +0000 Subject: smb: client: use data_len for SMB2 READ encrypted folioq copy In handle_read_data() the encrypted/folioq branch (buf_len <= data_offset, reached via receive_encrypted_read for transform PDUs > CIFSMaxBufSize + MAX_HEADER_SIZE) copies the READ payload using buffer_len rather than data_len: rdata->result = cifs_copy_folioq_to_iter(buffer, buffer_len, cur_off, &rdata->subreq.io_iter); ... rdata->got_bytes = buffer_len; buffer_len comes from the SMB3 transform header OriginalMessageSize field (OriginalMessageSize - read_rsp_size); it represents the size of the decrypted message after the SMB2 header. data_len comes from the SMB2 READ response DataLength field; it represents the actual READ payload size and may be smaller than buffer_len when the decrypted message contains padding or other trailing bytes after the READ payload. The existing check `data_len > buffer_len - pad_len` only enforces an upper bound, so a server that emits OriginalMessageSize larger than read_rsp_size + pad_len + data_len passes the check and the kernel copies buffer_len bytes per response, ignoring the server-asserted DataLength. Two observable failures with a crafted server (DataLength=4, buffer_len=20000): - the kernel returns 20000 bytes per sub-request to userspace and sets got_bytes = buffer_len, even though the response claimed only 4 bytes of payload; - on a partial netfs sub-request whose iterator is sized to data_len, the over-large copy_folio_to_iter() short-reads, cifs_copy_folioq_to_iter() returns -EIO via the n != len path, and the entire netfs read collapses to -EIO even though the leading sub-requests succeeded. Use data_len for the copy length and for got_bytes so the kernel honours the server-asserted READ payload size. For well-formed servers (where buffer_len == pad_len + data_len) the change is behaviour-equivalent. Cc: stable@vger.kernel.org Signed-off-by: Jeremy Erazo Acked-by: David Howells Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3738204984f5..ee83700269cf 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4826,7 +4826,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, } /* Copy the data to the output I/O iterator. */ - rdata->result = cifs_copy_folioq_to_iter(buffer, buffer_len, + rdata->result = cifs_copy_folioq_to_iter(buffer, data_len, cur_off, &rdata->subreq.io_iter); if (rdata->result != 0) { if (is_offloaded) @@ -4835,7 +4835,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, dequeue_mid(server, mid, rdata->result); return 0; } - rdata->got_bytes = buffer_len; + rdata->got_bytes = data_len; } else if (!check_add_overflow(data_offset, data_len, &end_off) && buf_len >= end_off) { -- cgit v1.2.3 From b6fe4ff340560ecf39e10733366f85832550699a Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 27 Apr 2026 16:31:31 +0200 Subject: drm/amdgpu: fix handling in amdgpu_userq_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Well mostly the same issues the other code had as well: 1. Memory allocation while holding the userq_mutex lock is forbidden! 2. Things were created/started/published in the wrong order. 3. The reset lock was taken in the wrong order and seems to be unecessary in the first place. 4. Error messages on invalid input parameters can spam the logs. 5. Error messages on memory allocation failures are usually superflous as well. Signed-off-by: Christian König Reviewed-by: Sunil Khatri Reviewed-by: Prike Liang Signed-off-by: Alex Deucher (cherry picked from commit 89e50de5654dbe7a137e03d78629542e17ba7202) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 118 +++++++++++++----------------- 1 file changed, 52 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index f677a9f2d4ad..f070ea37d918 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -708,14 +708,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; struct amdgpu_db_info db_info; - bool skip_map_queue; - u32 qid; uint64_t index; - int r = 0; - int priority = - (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> - AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; + int priority; + u32 qid; + int r; + priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) + >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; r = amdgpu_userq_priority_permit(filp, priority); if (r) return r; @@ -728,40 +728,43 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { - drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", - args->in.ip_type); r = -EINVAL; goto err_pm_runtime; } queue = kzalloc_obj(struct amdgpu_usermode_queue); if (!queue) { - drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); r = -ENOMEM; goto err_pm_runtime; } + kref_init(&queue->refcount); INIT_LIST_HEAD(&queue->userq_va_list); queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; queue->priority = priority; - - db_info.queue_type = queue->queue_type; - db_info.doorbell_handle = queue->doorbell_handle; - db_info.db_obj = &queue->db_obj; - db_info.doorbell_offset = args->in.doorbell_offset; - queue->userq_mgr = uq_mgr; + INIT_DELAYED_WORK(&queue->hang_detect_work, + amdgpu_userq_hang_detect_work); - /* Validate the userq virtual address.*/ - r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + mutex_init(&queue->fence_drv_lock); + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) goto free_queue; - if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) || - amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || - amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + /* Make sure the queue can actually run with those virtual addresses. */ + r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + if (r) + goto free_fence_drv; + + if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, + args->in.queue_size) || + amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, + AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, + AMDGPU_GPU_PAGE_SIZE)) { r = -EINVAL; amdgpu_bo_unreserve(fpriv->vm.root.bo); goto clean_mapping; @@ -769,6 +772,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) amdgpu_bo_unreserve(fpriv->vm.root.bo); /* Convert relative doorbell offset into absolute doorbell index */ + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); @@ -777,85 +784,64 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; - mutex_init(&queue->fence_drv_lock); - xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); - r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); - if (r) { - drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); - goto clean_mapping; - } - r = uq_funcs->mqd_create(queue, &args->in); if (r) { drm_file_err(uq_mgr->file, "Failed to create Queue\n"); - goto clean_fence_driver; + goto clean_mapping; } /* Update VM owner at userq submit-time for page-fault attribution. */ amdgpu_vm_set_task_info(&fpriv->vm); + r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, + GFP_KERNEL)); + if (r) + goto clean_mqd; + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); /* don't map the queue if scheduling is halted */ - if (adev->userq_halt_for_enforce_isolation && - ((queue->queue_type == AMDGPU_HW_IP_GFX) || - (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) - skip_map_queue = true; - else - skip_map_queue = false; - if (!skip_map_queue) { + if (!adev->userq_halt_for_enforce_isolation || + ((queue->queue_type != AMDGPU_HW_IP_GFX) && + (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) { r = amdgpu_userq_map_helper(queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - goto clean_mqd; + mutex_unlock(&uq_mgr->userq_mutex); + goto clean_doorbell; } } - /* drop this refcount during queue destroy */ - kref_init(&queue->refcount); - - /* Wait for mode-1 reset to complete */ - down_read(&adev->reset_domain->sem); + atomic_inc(&uq_mgr->userq_count[queue->queue_type]); + mutex_unlock(&uq_mgr->userq_mutex); r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, - XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); - if (r) { - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - r = -ENOMEM; - goto clean_reset_domain; - } - - r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); + XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), + GFP_KERNEL); if (r) { - xa_erase(&uq_mgr->userq_xa, qid); - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - goto clean_reset_domain; + /* + * This drops the last reference which should take care of + * all cleanup. + */ + amdgpu_userq_put(queue); + return r; } - up_read(&adev->reset_domain->sem); amdgpu_debugfs_userq_init(filp, queue, qid); - INIT_DELAYED_WORK(&queue->hang_detect_work, - amdgpu_userq_hang_detect_work); - args->out.queue_id = qid; - atomic_inc(&uq_mgr->userq_count[queue->queue_type]); - mutex_unlock(&uq_mgr->userq_mutex); return 0; -clean_reset_domain: - up_read(&adev->reset_domain->sem); +clean_doorbell: + xa_erase_irq(&adev->userq_doorbell_xa, index); clean_mqd: - mutex_unlock(&uq_mgr->userq_mutex); uq_funcs->mqd_destroy(queue); -clean_fence_driver: - amdgpu_userq_fence_driver_free(queue); clean_mapping: amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(fpriv->vm.root.bo); mutex_destroy(&queue->fence_drv_lock); +free_fence_drv: + amdgpu_userq_fence_driver_free(queue); free_queue: kfree(queue); err_pm_runtime: -- cgit v1.2.3 From 431e40042d3599559e588b8946bb28bd440b4f65 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 13 May 2026 12:29:21 -0600 Subject: bio-integrity-fs: pass data iter to bio_integrity_verify() bio_integrity_verify() expects the passed struct bvec_iter to be an iterator over bio data, not integrity. So construct a separate data bvec_iter without the bio_integrity_bytes() conversion and pass it to bio_integrity_verify() instead of bip_iter. Fixes: 0bde8a12b554 ("block: add fs_bio_integrity helpers") Signed-off-by: Caleb Sander Mateos Reviewed-by: Anuj Gupta Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260513182924.1753582-1-csander@purestorage.com Signed-off-by: Jens Axboe --- block/bio-integrity-fs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/bio-integrity-fs.c b/block/bio-integrity-fs.c index acb1e5f270d2..0daa42d9ead7 100644 --- a/block/bio-integrity-fs.c +++ b/block/bio-integrity-fs.c @@ -55,6 +55,10 @@ int fs_bio_integrity_verify(struct bio *bio, sector_t sector, unsigned int size) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_payload *bip = bio_integrity(bio); + struct bvec_iter data_iter = { + .bi_sector = sector, + .bi_size = size, + }; /* * Reinitialize bip->bip_iter. @@ -65,7 +69,7 @@ int fs_bio_integrity_verify(struct bio *bio, sector_t sector, unsigned int size) memset(&bip->bip_iter, 0, sizeof(bip->bip_iter)); bip->bip_iter.bi_sector = sector; bip->bip_iter.bi_size = bio_integrity_bytes(bi, size >> SECTOR_SHIFT); - return blk_status_to_errno(bio_integrity_verify(bio, &bip->bip_iter)); + return blk_status_to_errno(bio_integrity_verify(bio, &data_iter)); } static int __init fs_bio_integrity_init(void) -- cgit v1.2.3 From 0701c9e17bd903d95b2ddf7dd2e1d8be5027f331 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2026 11:18:29 +0200 Subject: x86/kvm/vmx: Move IRQ/NMI dispatch from KVM into x86 core Move the VMX interrupt dispatch magic into the x86 core code. This isolates KVM from the FRED/IDT decisions and reduces the amount of EXPORT_SYMBOL_FOR_KVM(). Suggested-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: "Verma, Vishal L" Tested-by: Zhao Liu Tested-by: Zhao Liu Tested-by: Sean Christopherson Reviewed-by: Binbin Wu Acked-by: Sean Christopherson Link: https://patch.msgid.link/20260508091829.GO3126523@noisy.programming.kicks-ass.net --- arch/x86/entry/Makefile | 2 +- arch/x86/entry/common.c | 48 +++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry.S | 46 +++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_64_fred.S | 1 - arch/x86/include/asm/desc.h | 4 ++++ arch/x86/include/asm/desc_defs.h | 2 +- arch/x86/include/asm/entry-common.h | 2 ++ arch/x86/include/asm/fred.h | 1 - arch/x86/kernel/idt.c | 15 ++++++++++++ arch/x86/kernel/nmi.c | 1 - arch/x86/kvm/vmx/vmenter.S | 46 ----------------------------------- arch/x86/kvm/vmx/vmx.c | 19 ++------------- 12 files changed, 119 insertions(+), 68 deletions(-) create mode 100644 arch/x86/entry/common.c diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 72cae8e0ce85..83b4762d6ecb 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -13,7 +13,7 @@ CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) CFLAGS_syscall_32.o += -fno-stack-protector CFLAGS_syscall_64.o += -fno-stack-protector -obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o common.o obj-y += vdso/ obj-y += vsyscall/ diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c new file mode 100644 index 000000000000..b62ac824c1e9 --- /dev/null +++ b/arch/x86/entry/common.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_KVM_INTEL) +/* + * On VMX, NMIs and IRQs (as configured by KVM) are acknowledged by hardware as + * part of the VM-Exit, i.e. the event itself is consumed as part the VM-Exit. + * x86_entry_from_kvm() is invoked by KVM to effectively forward NMIs and IRQs + * to the kernel for servicing. On SVM, a.k.a. AMD, the NMI/IRQ VM-Exit is + * purely a signal that an NMI/IRQ is pending, i.e. the event that triggered + * the VM-Exit is held pending until it's unblocked in the host. + */ +noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector) +{ + if (event_type == EVENT_TYPE_EXTINT) { +#ifdef CONFIG_X86_64 + /* + * Use FRED dispatch, even when running IDT. The dispatch + * tables are kept in sync between FRED and IDT, and the FRED + * dispatch works well with CFI. + */ + fred_entry_from_kvm(event_type, vector); +#else + idt_entry_from_kvm(vector); +#endif + return; + } + + WARN_ON_ONCE(event_type != EVENT_TYPE_NMI); + +#ifdef CONFIG_X86_64 + if (cpu_feature_enabled(X86_FEATURE_FRED)) + return fred_entry_from_kvm(event_type, vector); +#endif + + /* + * Notably, we must use IDT dispatch for NMI when running in IDT mode. + * The FRED NMI context is significantly different and will not work + * right (specifically FRED fixed the NMI recursion issue). + */ + idt_entry_from_kvm(vector); +} +EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm); +#endif diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 6ba2b3adcef0..a56e043b266d 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -75,3 +75,49 @@ THUNK warn_thunk_thunk, __warn_thunk #if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP) EXPORT_SYMBOL(__ref_stack_chk_guard); #endif + +#if IS_ENABLED(CONFIG_KVM_INTEL) +.macro IDT_DO_EVENT_IRQOFF call_insn call_target + /* + * Unconditionally create a stack frame, getting the correct RSP on the + * stack (for x86-64) would take two instructions anyways, and RBP can + * be used to restore RSP to make objtool happy (see below). + */ + push %_ASM_BP + mov %_ASM_SP, %_ASM_BP + +#ifdef CONFIG_X86_64 + /* + * Align RSP to a 16-byte boundary (to emulate CPU behavior) before + * creating the synthetic interrupt stack frame for the IRQ/NMI. + */ + and $-16, %rsp + push $__KERNEL_DS + push %rbp +#endif + pushf + push $__KERNEL_CS + \call_insn \call_target + + /* + * "Restore" RSP from RBP, even though IRET has already unwound RSP to + * the correct value. objtool doesn't know the callee will IRET and, + * without the explicit restore, thinks the stack is getting walloped. + * Using an unwind hint is problematic due to x86-64's dynamic alignment. + */ + leave + RET +.endm + +.pushsection .text, "ax" +SYM_FUNC_START(idt_do_interrupt_irqoff) + IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 +SYM_FUNC_END(idt_do_interrupt_irqoff) +.popsection + +.pushsection .noinstr.text, "ax" +SYM_FUNC_START(idt_do_nmi_irqoff) + IDT_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx +SYM_FUNC_END(idt_do_nmi_irqoff) +.popsection +#endif diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S index 894f7f16eb80..0d2768ab836c 100644 --- a/arch/x86/entry/entry_64_fred.S +++ b/arch/x86/entry/entry_64_fred.S @@ -147,5 +147,4 @@ SYM_FUNC_START(asm_fred_entry_from_kvm) RET SYM_FUNC_END(asm_fred_entry_from_kvm) -EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm); #endif diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ec95fe44fa3a..00aeae843529 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -438,6 +438,10 @@ extern void idt_setup_traps(void); extern void idt_setup_apic_and_irq_gates(void); extern bool idt_is_f00f_address(unsigned long address); +extern void idt_do_interrupt_irqoff(unsigned long address); +extern void idt_do_nmi_irqoff(void); +extern void idt_entry_from_kvm(unsigned int vector); + #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); #else diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 7e6b9314758a..2f2ce8aadf07 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -145,7 +145,7 @@ struct gate_struct { typedef struct gate_struct gate_desc; #ifndef _SETUP -static inline unsigned long gate_offset(const gate_desc *g) +static __always_inline unsigned long gate_offset(const gate_desc *g) { #ifdef CONFIG_X86_64 return g->offset_low | ((unsigned long)g->offset_middle << 16) | diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 7535131c711b..eca24b5e07f4 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -97,4 +97,6 @@ static __always_inline void arch_exit_to_user_mode(void) } #define arch_exit_to_user_mode arch_exit_to_user_mode +extern void x86_entry_from_kvm(unsigned int entry_type, unsigned int vector); + #endif diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h index 2bb65677c079..18a2f811c358 100644 --- a/arch/x86/include/asm/fred.h +++ b/arch/x86/include/asm/fred.h @@ -110,7 +110,6 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { ret static inline void cpu_init_fred_exceptions(void) { } static inline void cpu_init_fred_rsps(void) { } static inline void fred_complete_exception_setup(void) { } -static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { } static inline void fred_sync_rsp0(unsigned long rsp0) { } static inline void fred_update_rsp0(void) { } #endif /* CONFIG_X86_FRED */ diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 260456588756..7bcf1decc034 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -268,6 +268,21 @@ void __init idt_setup_early_pf(void) } #endif +#if IS_ENABLED(CONFIG_KVM_INTEL) +noinstr void idt_entry_from_kvm(unsigned int vector) +{ + if (vector == NMI_VECTOR) + return idt_do_nmi_irqoff(); + + /* + * Only the NMI path requires noinstr. + */ + instrumentation_begin(); + idt_do_interrupt_irqoff(gate_offset(idt_table + vector)); + instrumentation_end(); +} +#endif + static void __init idt_map_in_cea(void) { /* diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3d239ed12744..52a3afb1b79e 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -614,7 +614,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx) { exc_nmi(regs); } -EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx); #endif #ifdef CONFIG_NMI_CHECK_CPU diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 8a481dae9cae..ff1f254a0ef4 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -31,38 +31,6 @@ #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE #endif -.macro VMX_DO_EVENT_IRQOFF call_insn call_target - /* - * Unconditionally create a stack frame, getting the correct RSP on the - * stack (for x86-64) would take two instructions anyways, and RBP can - * be used to restore RSP to make objtool happy (see below). - */ - push %_ASM_BP - mov %_ASM_SP, %_ASM_BP - -#ifdef CONFIG_X86_64 - /* - * Align RSP to a 16-byte boundary (to emulate CPU behavior) before - * creating the synthetic interrupt stack frame for the IRQ/NMI. - */ - and $-16, %rsp - push $__KERNEL_DS - push %rbp -#endif - pushf - push $__KERNEL_CS - \call_insn \call_target - - /* - * "Restore" RSP from RBP, even though IRET has already unwound RSP to - * the correct value. objtool doesn't know the callee will IRET and, - * without the explicit restore, thinks the stack is getting walloped. - * Using an unwind hint is problematic due to x86-64's dynamic alignment. - */ - leave - RET -.endm - .section .noinstr.text, "ax" /** @@ -320,10 +288,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) SYM_FUNC_END(__vmx_vcpu_run) -SYM_FUNC_START(vmx_do_nmi_irqoff) - VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx -SYM_FUNC_END(vmx_do_nmi_irqoff) - #ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT /** @@ -375,13 +339,3 @@ SYM_FUNC_START(vmread_error_trampoline) RET SYM_FUNC_END(vmread_error_trampoline) #endif - -.section .text, "ax" - -#ifndef CONFIG_X86_FRED - -SYM_FUNC_START(vmx_do_interrupt_irqoff) - VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 -SYM_FUNC_END(vmx_do_interrupt_irqoff) - -#endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 49feecb286b2..b9103de01428 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7117,9 +7117,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); } -void vmx_do_interrupt_irqoff(unsigned long entry); -void vmx_do_nmi_irqoff(void); - static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu) { /* @@ -7161,17 +7158,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu, "unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; - /* - * Invoke the kernel's IRQ handler for the vector. Use the FRED path - * when it's available even if FRED isn't fully enabled, e.g. even if - * FRED isn't supported in hardware, in order to avoid the indirect - * CALL in the non-FRED path. - */ kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); - if (IS_ENABLED(CONFIG_X86_FRED)) - fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector); - else - vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector)); + x86_entry_from_kvm(EVENT_TYPE_EXTINT, vector); kvm_after_interrupt(vcpu); vcpu->arch.at_instruction_boundary = true; @@ -7481,10 +7469,7 @@ noinstr void vmx_handle_nmi(struct kvm_vcpu *vcpu) return; kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); - if (cpu_feature_enabled(X86_FEATURE_FRED)) - fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR); - else - vmx_do_nmi_irqoff(); + x86_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR); kvm_after_interrupt(vcpu); } -- cgit v1.2.3 From 5fcc48d521877c5d83828d715c81f4d169ef97f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Apr 2026 17:56:13 +0200 Subject: x86/kvm/vmx: Fix VMX vs hrtimer_rearm_deferred() Vishal reported that KVM unit test 'x2apic' started failing after commit 0e98eb14814e ("entry: Prepare for deferred hrtimer rearming"). The reason is that KVM/VMX is injecting interrupts while it has interrupts disabled, for a context that will enable interrupts, this means that regs->flags.X86_EFLAGS_IF == 0 and irqentry_exit() will not do the right thing. Notably, irqentry_exit() must not call hrtimer_rearm_deferred() when the return context does not have IF set, because this will cause problems vs NMIs. Therefore, fix up the state after the injection. Fixes: 0e98eb14814e ("entry: Prepare for deferred hrtimer rearming") Reported-by: "Verma, Vishal L" Suggested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: "Verma, Vishal L" Tested-by: David Woodhouse Tested-by: Zhao Liu Tested-by: Sean Christopherson Reviewed-by: Binbin Wu Link: https://patch.msgid.link/20260423155936.957351833@infradead.org Closes: https://lore.kernel.org/r/70cd3e97fbb796e2eb2ff8cd4b7614ada05a5f24.camel%40intel.com --- arch/x86/entry/common.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b62ac824c1e9..06c7c6ebd6f9 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -27,6 +28,18 @@ noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector) #else idt_entry_from_kvm(vector); #endif + /* + * Strictly speaking, only the NMI path requires noinstr. + */ + instrumentation_begin(); + /* + * KVM/VMX will dispatch from IRQ-disabled but for a context + * that will have IRQs-enabled. This confuses the entry code + * and it will not have reprogrammed the timer. Do so now. + */ + hrtimer_rearm_deferred(); + instrumentation_end(); + return; } -- cgit v1.2.3 From c35cb4fc7231702d1e9952aec1a442f3e27df6f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 May 2026 19:03:59 +0200 Subject: ACPI: battery: Fix system wakeup on critical battery status Commit 0a869409a981 ("ACPI: battery: Convert the driver to a platform one") changed the parent of the battery wakeup source to the platform device used for driver binding, but it forgot to update the acpi_pm_wakeup_event() call in acpi_battery_update() accordingly. Do it now to unbreak waking up the system on critical battery status during suspend-to-idle and during transitions to ACPI S3/S4. Fixes: 0a869409a981 ("ACPI: battery: Convert the driver to a platform one") Signed-off-by: Rafael J. Wysocki Cc: 7.0+ # 7.0+ Link: https://patch.msgid.link/12898712.O9o76ZdvQC@rafael.j.wysocki --- drivers/acpi/battery.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index d4ae21a71007..b82dd67d98c9 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -94,6 +94,7 @@ struct acpi_battery { struct power_supply *bat; struct power_supply_desc bat_desc; struct acpi_device *device; + struct device *phys_dev; struct notifier_block pm_nb; struct list_head list; unsigned long update_time; @@ -1033,7 +1034,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume) if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) || (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) && (battery->capacity_now <= battery->alarm))) - acpi_pm_wakeup_event(&battery->device->dev); + acpi_pm_wakeup_event(battery->phys_dev); return result; } @@ -1231,6 +1232,7 @@ static int acpi_battery_probe(struct platform_device *pdev) platform_set_drvdata(pdev, battery); + battery->phys_dev = &pdev->dev; battery->device = device; result = devm_mutex_init(&pdev->dev, &battery->update_lock); -- cgit v1.2.3 From 01f99f8c4a0adec6875f192702a57c5e88978af5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 May 2026 14:33:23 -0300 Subject: RDMA/core: Move the _ib_copy_validate_udata* functions to ib_core_uverbs It was incorrect to place them in uverbs_ioctl because that makes every driver depends on ib_uverbs.ko, which is undesired. ib_core_uverbs.c is for functions used by alot of drivers that are linked into ib_core instead. Fixes: 1de9287ece44 ("RDMA: Add ib_copy_validate_udata_in()") Link: https://patch.msgid.link/r/1-v1-045258567bd6+9fe-ib_uverbs_support_ko_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ib_core_uverbs.c | 89 ++++++++++++++++++++++ drivers/infiniband/core/uverbs.h | 35 +++++++++ drivers/infiniband/core/uverbs_ioctl.c | 122 ------------------------------- 3 files changed, 124 insertions(+), 122 deletions(-) diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index 1f7a5c119cc9..685030e0c60f 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -9,6 +9,7 @@ #include #include "uverbs.h" #include "core_priv.h" +#include "rdma_core.h" MODULE_IMPORT_NS("DMA_BUF"); @@ -416,3 +417,91 @@ struct ib_device *rdma_udata_to_dev(struct ib_udata *udata) } EXPORT_SYMBOL(rdma_udata_to_dev); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +uverbs_api_ioctl_handler_fn uverbs_get_handler_fn(struct ib_udata *udata) +{ + struct uverbs_attr_bundle *bundle = + rdma_udata_to_uverbs_attr_bundle(udata); + struct bundle_priv *pbundle = + container_of(&bundle->hdr, struct bundle_priv, bundle); + + lockdep_assert_held(&bundle->ufile->device->disassociate_srcu); + + return srcu_dereference(pbundle->method_elm->handler, + &bundle->ufile->device->disassociate_srcu); +} + +int _ib_copy_validate_udata_in(struct ib_udata *udata, void *req, + size_t kernel_size, size_t minimum_size) +{ + int err; + + if (udata->inlen < minimum_size) { + ibdev_dbg( + rdma_udata_to_dev(udata), + "System call driver input udata too small (%zu < %zu) for ioctl %ps called by %pSR\n", + udata->inlen, minimum_size, + uverbs_get_handler_fn(udata), + __builtin_return_address(0)); + return -EINVAL; + } + + err = copy_struct_from_user(req, kernel_size, udata->inbuf, + udata->inlen); + if (err) { + if (err == -E2BIG) { + ibdev_dbg( + rdma_udata_to_dev(udata), + "System call driver input udata not zero from %zu -> %zu for ioctl %ps called by %pSR\n", + minimum_size, udata->inlen, + uverbs_get_handler_fn(udata), + __builtin_return_address(0)); + return -EOPNOTSUPP; + } + ibdev_dbg( + rdma_udata_to_dev(udata), + "System call driver input udata EFAULT for ioctl %ps called by %pSR\n", + uverbs_get_handler_fn(udata), + __builtin_return_address(0)); + return err; + } + return 0; +} +EXPORT_SYMBOL(_ib_copy_validate_udata_in); + +int _ib_copy_validate_udata_cm_fail(struct ib_udata *udata, u64 req_cm, + u64 valid_cm) +{ + ibdev_dbg( + rdma_udata_to_dev(udata), + "System call driver input udata has unsupported comp_mask %llx & ~%llx = %llx for ioctl %ps called by %pSR\n", + req_cm, valid_cm, req_cm & ~valid_cm, + uverbs_get_handler_fn(udata), __builtin_return_address(0)); + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(_ib_copy_validate_udata_cm_fail); + +int _ib_respond_udata(struct ib_udata *udata, const void *src, size_t len) +{ + size_t copy_len; + + /* 0 length copy_len is a NOP for copy_to_user() and doesn't fail. */ + copy_len = min(len, udata->outlen); + if (copy_to_user(udata->outbuf, src, copy_len)) + goto err_fault; + if (copy_len < udata->outlen) { + if (clear_user(udata->outbuf + copy_len, + udata->outlen - copy_len)) + goto err_fault; + } + return 0; +err_fault: + ibdev_dbg( + rdma_udata_to_dev(udata), + "System call driver out udata has EFAULT (%zu into %zu) for ioctl %ps called by %pSR\n", + len, udata->outlen, uverbs_get_handler_fn(udata), + __builtin_return_address(0)); + return -EFAULT; +} +EXPORT_SYMBOL(_ib_respond_udata); +#endif diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 6d4295277e0e..a74a2dff1301 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -229,6 +229,41 @@ int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); +struct bundle_alloc_head { + struct_group_tagged(bundle_alloc_head_hdr, hdr, + struct bundle_alloc_head *next; + ); + u8 data[]; +}; + +struct bundle_priv { + /* Must be first */ + struct bundle_alloc_head_hdr alloc_head; + struct bundle_alloc_head *allocated_mem; + size_t internal_avail; + size_t internal_used; + + struct radix_tree_root *radix; + const struct uverbs_api_ioctl_method *method_elm; + void __rcu **radix_slots; + unsigned long radix_slots_len; + u32 method_key; + + struct ib_uverbs_attr __user *user_attrs; + struct ib_uverbs_attr *uattrs; + + DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); + DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); + DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN); + + /* + * Must be last. bundle ends in a flex array which overlaps + * internal_buffer. + */ + struct uverbs_attr_bundle_hdr bundle; + u64 internal_buffer[32]; +}; + long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); struct ib_uverbs_flow_spec { diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index b61af625e679..33feb88d652b 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -35,54 +35,6 @@ #include "rdma_core.h" #include "uverbs.h" -struct bundle_alloc_head { - struct_group_tagged(bundle_alloc_head_hdr, hdr, - struct bundle_alloc_head *next; - ); - u8 data[]; -}; - -struct bundle_priv { - /* Must be first */ - struct bundle_alloc_head_hdr alloc_head; - struct bundle_alloc_head *allocated_mem; - size_t internal_avail; - size_t internal_used; - - struct radix_tree_root *radix; - const struct uverbs_api_ioctl_method *method_elm; - void __rcu **radix_slots; - unsigned long radix_slots_len; - u32 method_key; - - struct ib_uverbs_attr __user *user_attrs; - struct ib_uverbs_attr *uattrs; - - DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); - DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); - DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN); - - /* - * Must be last. bundle ends in a flex array which overlaps - * internal_buffer. - */ - struct uverbs_attr_bundle_hdr bundle; - u64 internal_buffer[32]; -}; - -uverbs_api_ioctl_handler_fn uverbs_get_handler_fn(struct ib_udata *udata) -{ - struct uverbs_attr_bundle *bundle = - rdma_udata_to_uverbs_attr_bundle(udata); - struct bundle_priv *pbundle = - container_of(&bundle->hdr, struct bundle_priv, bundle); - - lockdep_assert_held(&bundle->ufile->device->disassociate_srcu); - - return srcu_dereference(pbundle->method_elm->handler, - &bundle->ufile->device->disassociate_srcu); -} - /* * Each method has an absolute minimum amount of memory it needs to allocate, * precompute that amount and determine if the onstack memory can be used or @@ -860,77 +812,3 @@ void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, pbundle->uobj_hw_obj_valid); } EXPORT_SYMBOL(uverbs_finalize_uobj_create); - -int _ib_copy_validate_udata_in(struct ib_udata *udata, void *req, - size_t kernel_size, size_t minimum_size) -{ - int err; - - if (udata->inlen < minimum_size) { - ibdev_dbg( - rdma_udata_to_dev(udata), - "System call driver input udata too small (%zu < %zu) for ioctl %ps called by %pSR\n", - udata->inlen, minimum_size, - uverbs_get_handler_fn(udata), - __builtin_return_address(0)); - return -EINVAL; - } - - err = copy_struct_from_user(req, kernel_size, udata->inbuf, - udata->inlen); - if (err) { - if (err == -E2BIG) { - ibdev_dbg( - rdma_udata_to_dev(udata), - "System call driver input udata not zero from %zu -> %zu for ioctl %ps called by %pSR\n", - minimum_size, udata->inlen, - uverbs_get_handler_fn(udata), - __builtin_return_address(0)); - return -EOPNOTSUPP; - } - ibdev_dbg( - rdma_udata_to_dev(udata), - "System call driver input udata EFAULT for ioctl %ps called by %pSR\n", - uverbs_get_handler_fn(udata), - __builtin_return_address(0)); - return err; - } - return 0; -} -EXPORT_SYMBOL(_ib_copy_validate_udata_in); - -int _ib_copy_validate_udata_cm_fail(struct ib_udata *udata, u64 req_cm, - u64 valid_cm) -{ - ibdev_dbg( - rdma_udata_to_dev(udata), - "System call driver input udata has unsupported comp_mask %llx & ~%llx = %llx for ioctl %ps called by %pSR\n", - req_cm, valid_cm, req_cm & ~valid_cm, - uverbs_get_handler_fn(udata), __builtin_return_address(0)); - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(_ib_copy_validate_udata_cm_fail); - -int _ib_respond_udata(struct ib_udata *udata, const void *src, size_t len) -{ - size_t copy_len; - - /* 0 length copy_len is a NOP for copy_to_user() and doesn't fail. */ - copy_len = min(len, udata->outlen); - if (copy_to_user(udata->outbuf, src, copy_len)) - goto err_fault; - if (copy_len < udata->outlen) { - if (clear_user(udata->outbuf + copy_len, - udata->outlen - copy_len)) - goto err_fault; - } - return 0; -err_fault: - ibdev_dbg( - rdma_udata_to_dev(udata), - "System call driver out udata has EFAULT (%zu into %zu) for ioctl %ps called by %pSR\n", - len, udata->outlen, uverbs_get_handler_fn(udata), - __builtin_return_address(0)); - return -EFAULT; -} -EXPORT_SYMBOL(_ib_respond_udata); -- cgit v1.2.3 From 7122ff96068a03595bde2fbafaca82ca2ed8084e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 May 2026 12:00:16 -0300 Subject: RDMA/core: Do not read wild stack memory in uverbs_get_handler_fn() Sashiko points out the legacy write path in ib_uverbs_write() does allocate a struct uverbs_attr_bundle, but it doesn't wrap it in a bundle_priv so downcasting here isn't safe. Instead lift the method_elm out of the bundle_priv and use it for the debug function. The legacy write path will leave it set as NULL since the write method_elm uses a different type. Cc: stable@vger.kernel.org Fixes: 1de9287ece44 ("RDMA: Add ib_copy_validate_udata_in()") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/ib_core_uverbs.c | 4 +--- drivers/infiniband/core/uverbs.h | 1 - drivers/infiniband/core/uverbs_ioctl.c | 26 ++++++++++++++------------ include/rdma/uverbs_ioctl.h | 1 + 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index 685030e0c60f..8a0e6fa2a528 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -422,12 +422,10 @@ uverbs_api_ioctl_handler_fn uverbs_get_handler_fn(struct ib_udata *udata) { struct uverbs_attr_bundle *bundle = rdma_udata_to_uverbs_attr_bundle(udata); - struct bundle_priv *pbundle = - container_of(&bundle->hdr, struct bundle_priv, bundle); lockdep_assert_held(&bundle->ufile->device->disassociate_srcu); - return srcu_dereference(pbundle->method_elm->handler, + return srcu_dereference(bundle->method_elm->handler, &bundle->ufile->device->disassociate_srcu); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a74a2dff1301..f2e192b51e60 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -244,7 +244,6 @@ struct bundle_priv { size_t internal_used; struct radix_tree_root *radix; - const struct uverbs_api_ioctl_method *method_elm; void __rcu **radix_slots; unsigned long radix_slots_len; u32 method_key; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 33feb88d652b..2552a7efe2fb 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -397,13 +397,13 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, struct uverbs_attr_bundle *bundle = container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs); - unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey; + unsigned int destroy_bkey = bundle->method_elm->destroy_bkey; unsigned int i; int ret; /* See uverbs_disassociate_api() */ handler = srcu_dereference( - pbundle->method_elm->handler, + bundle->method_elm->handler, &pbundle->bundle.ufile->device->disassociate_srcu); if (!handler) return -EIO; @@ -421,12 +421,12 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, } /* User space did not provide all the mandatory attributes */ - if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory, + if (unlikely(!bitmap_subset(bundle->method_elm->attr_mandatory, pbundle->bundle.attr_present, - pbundle->method_elm->key_bitmap_len))) + bundle->method_elm->key_bitmap_len))) return -EINVAL; - if (pbundle->method_elm->has_udata) + if (bundle->method_elm->has_udata) uverbs_fill_udata(bundle, &pbundle->bundle.driver_udata, UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); else @@ -451,7 +451,7 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, * assume that the driver wrote to its UHW_OUT and flag userspace * appropriately. */ - if (!ret && pbundle->method_elm->has_udata) { + if (!ret && bundle->method_elm->has_udata) { const struct uverbs_attr *attr = uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT); @@ -472,7 +472,7 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, static void bundle_destroy(struct bundle_priv *pbundle, bool commit) { - unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; + unsigned int key_bitmap_len = pbundle->bundle.method_elm->key_bitmap_len; struct uverbs_attr_bundle *bundle = container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr); struct bundle_alloc_head *memblock; @@ -560,7 +560,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, } /* Space for the pbundle->bundle.attrs flex array */ - pbundle->method_elm = method_elm; + pbundle->bundle.method_elm = method_elm; pbundle->method_key = attrs_iter.index; pbundle->bundle.ufile = ufile; pbundle->bundle.context = NULL; /* only valid if bundle has uobject */ @@ -569,10 +569,12 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter); pbundle->user_attrs = user_attrs; - pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len * - sizeof(*container_of(&pbundle->bundle, - struct uverbs_attr_bundle, hdr)->attrs), - sizeof(*pbundle->internal_buffer)); + pbundle->internal_used = ALIGN( + pbundle->bundle.method_elm->key_bitmap_len * + sizeof(*container_of(&pbundle->bundle, + struct uverbs_attr_bundle, hdr) + ->attrs), + sizeof(*pbundle->internal_buffer)); memset(pbundle->bundle.attr_present, 0, sizeof(pbundle->bundle.attr_present)); memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index e2af17da3e32..c89428030d61 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -635,6 +635,7 @@ struct uverbs_attr_bundle { struct ib_uverbs_file *ufile; struct ib_ucontext *context; struct ib_uobject *uobject; + const struct uverbs_api_ioctl_method *method_elm; DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); ); struct uverbs_attr attrs[]; -- cgit v1.2.3 From c9a40f6531b81baa9619bcc2697ff86896afcce7 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 12 May 2026 02:42:09 -0700 Subject: RDMA/mana_ib: Report max_msg_sz in mana_ib_query_port Report max_msg_sz for mana_ib, which is 16MB. Fixes: 4bda1d5332ec ("RDMA/mana_ib: Implement port parameters") Signed-off-by: Shiraz Saleem Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/20260512094209.264955-1-kotaranov@linux.microsoft.com Reviewed-by: Long Li Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index ac5e75dd3494..afc2fc124fee 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -606,6 +606,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port, if (mana_ib_is_rnic(dev)) { props->gid_tbl_len = 16; props->ip_gids = true; + props->max_msg_sz = SZ_16M; if (port == 1) props->port_cap_flags = IB_PORT_CM_SUP; } -- cgit v1.2.3 From 5b74373390113fba798a76b483837029ab010fef Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Thu, 14 May 2026 19:38:34 +0800 Subject: RDMA/rtrs: Fix use-after-free in path file creation cleanup In the error path of rtrs_srv_create_path_files(), the sysfs root folders may already have been created and srv_path->kobj may already have been initialized. If a later step fails, the cleanup currently calls kobject_put(&srv_path->kobj) before rtrs_srv_destroy_once_sysfs_root_folders(srv_path). kobject_put() may drop the last reference to srv_path->kobj and invoke the release callback, rtrs_srv_release(), which frees srv_path. The following call to rtrs_srv_destroy_once_sysfs_root_folders(srv_path) then dereferences srv_path internally to access srv_path->srv, resulting in a use-after-free. This failure path is reached before rtrs_srv_create_path_files() returns success, so the successful-path lifetime handling is not involved. Fix this by destroying the sysfs root folders before calling kobject_put(&srv_path->kobj), so srv_path is still valid while the helper accesses it. This issue was found by a static analysis tool I am developing. Fixes: ae4c81644e91 ("RDMA/rtrs-srv: Rename rtrs_srv_sess to rtrs_srv_path") Signed-off-by: Guangshuo Li Link: https://patch.msgid.link/20260514113834.865530-1-lgs201920130244@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 51727c7d710c..9dd9141c86a5 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -295,8 +295,8 @@ remove_group: put_kobj: kobject_del(&srv_path->kobj); destroy_root: - kobject_put(&srv_path->kobj); rtrs_srv_destroy_once_sysfs_root_folders(srv_path); + kobject_put(&srv_path->kobj); return err; } -- cgit v1.2.3 From 33d35975cbead3fa6b738ee57e5e45e14fbe0886 Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Fri, 15 May 2026 14:31:03 +0000 Subject: net: pse-pd: fix sign on -ENOENT check in of_load_pse_pis() of_count_phandle_with_args() returns the count on success and a negative errno on failure, including -ENOENT when the "pairsets" property is absent. The existing comparison in of_load_pse_pis() checks against ENOENT (positive 2) instead of -ENOENT, so the branch is taken for any error return: legitimate DTs that omit "pairsets" trigger a spurious "wrong number of pairsets" error and probe fails with -EINVAL. Compare against -ENOENT so a missing "pairsets" property is correctly treated as "this PI has no pairsets, continue". Fixes: 9be9567a7c59 ("net: pse-pd: Add support for PSE PIs") Cc: stable@vger.kernel.org Signed-off-by: Jonas Jelonek Acked-by: Oleksij Rempel Link: https://patch.msgid.link/20260515143103.1721888-1-jelonek.jonas@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pse_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 87aa4f4e9724..69dbdbde9d71 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -210,7 +210,7 @@ static int of_load_pse_pis(struct pse_controller_dev *pcdev) ret = of_load_pse_pi_pairsets(node, &pi, ret); if (ret) goto out; - } else if (ret != ENOENT) { + } else if (ret != -ENOENT) { dev_err(pcdev->dev, "error: wrong number of pairsets. Should be 1 or 2, got %d (%pOF)\n", ret, node); -- cgit v1.2.3 From 9b244c242bec48b37e82b89787afd6a4c43457e1 Mon Sep 17 00:00:00 2001 From: Dawei Feng Date: Fri, 15 May 2026 23:18:26 +0800 Subject: octeontx2-pf: avoid double free of pool->stack on AQ init failure otx2_pool_aq_init() frees pool->stack when mailbox sync or retry allocation fails, but leaves the pointer unchanged. Later, otx2_sq_aura_pool_init() unwinds the partial setup through otx2_aura_pool_free(), which frees pool->stack again. The CN20K-specific cn20k_pool_aq_init() implementation has the same bug in its corresponding error path. Set pool->stack to NULL immediately after the local free so the shared cleanup path does not free the same stack again while cleaning up partially initialized pool state. The bug was first flagged by an experimental analysis tool we are developing for kernel memory-management bugs while analyzing v6.13-rc1. The tool is still under development and is not yet publicly available. Manual inspection confirms that the bug is still present in v7.1-rc3. Runtime validation was not performed because reproducing this path requires OcteonTX2/CN20K hardware. Fixes: caa2da34fd25 ("octeontx2-pf: Initialize and config queues") Fixes: d322fbd17203 ("octeontx2-pf: Initialize cn20k specific aura and pool contexts") Cc: stable@vger.kernel.org Signed-off-by: Zilin Guan Signed-off-by: Dawei Feng Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260515151826.1005397-1-dawei.feng@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c | 2 ++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c index a5a8f4558717..dbf173196608 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c @@ -619,11 +619,13 @@ static int cn20k_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, err = otx2_sync_mbox_msg(&pfvf->mbox); if (err) { qmem_free(pfvf->dev, pool->stack); + pool->stack = NULL; return err; } aq = otx2_mbox_alloc_msg_npa_cn20k_aq_enq(&pfvf->mbox); if (!aq) { qmem_free(pfvf->dev, pool->stack); + pool->stack = NULL; return -ENOMEM; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 971fcab1c248..3d253132a17f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1482,11 +1482,13 @@ int otx2_pool_aq_init(struct otx2_nic *pfvf, u16 pool_id, err = otx2_sync_mbox_msg(&pfvf->mbox); if (err) { qmem_free(pfvf->dev, pool->stack); + pool->stack = NULL; return err; } aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); if (!aq) { qmem_free(pfvf->dev, pool->stack); + pool->stack = NULL; return -ENOMEM; } } -- cgit v1.2.3 From 4df78ff02629c7729168f0696a7a2123c389818d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 17 May 2026 15:11:21 +0300 Subject: bridge: mcast: Fix a possible use-after-free when removing a bridge port When per-VLAN multicast snooping is enabled, the bridge iterates over all the bridge ports, disables the per-port multicast context on each port and enables the per-{port, VLAN} multicast contexts instead. The reverse happens when per-VLAN multicast snooping is disabled. When global multicast snooping is enabled, the bridge iterates over all the bridge ports and enables the per-port multicast context on each port. The reverse happens when multicast snooping is disabled. The above scheme can result in a situation where both types of contexts (per-port and per-{port, VLAN}) are enabled on a single bridge port: # ip link add name br1 up type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 # ip link add name dummy1 up master br1 type dummy # ip link set dev br1 type bridge mcast_vlan_snooping 1 # ip link set dev br1 type bridge mcast_snooping 0 # ip link set dev br1 type bridge mcast_snooping 1 This is not intended and it is a problem since the commit cited below. Prior to this commit, when removing a bridge port, br_multicast_disable_port() would disable the per-port multicast context and the per-{port, VLAN} multicast contexts would get disabled when flushing VLANs. After this commit, br_multicast_disable_port() only disables the per-port multicast context if per-VLAN multicast snooping is disabled. If both types of contexts were enabled on the port when it was removed, the per-port multicast context would remain enabled when freeing the bridge port, leading to a use-after-free [1]. Fix by preventing the bridge from enabling / disabling the per-port multicast contexts when toggling global multicast snooping if per-VLAN multicast snooping is enabled. [1] ODEBUG: free active (active state 0) object: ffff88810f8bda78 object type: timer_list hint: br_ip6_multicast_port_query_expired (net/bridge/br_multicast.c:1927) WARNING: lib/debugobjects.c:629 at debug_print_object+0x1b1/0x3e0, CPU#5: swapper/5/0 [...] Call Trace: __debug_check_no_obj_freed (lib/debugobjects.c:1116) kfree (mm/slub.c:2620 mm/slub.c:6250 mm/slub.c:6565) kobject_cleanup (lib/kobject.c:689) rcu_do_batch (kernel/rcu/tree.c:2617) rcu_core (kernel/rcu/tree.c:2869) handle_softirqs (kernel/softirq.c:622) __irq_exit_rcu (kernel/softirq.c:656 kernel/softirq.c:496 kernel/softirq.c:735) irq_exit_rcu (kernel/softirq.c:752) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1061 (discriminator 47) arch/x86/kernel/apic/apic.c:1061 (discriminator 47)) Fixes: 4b30ae9adb04 ("net: bridge: mcast: re-implement br_multicast_{enable, disable}_port functions") Reported-by: syzbot+ae231e0552fa77b26ea1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/87qznowlfs.ffs@tglx/ Reported-by: Thomas Gleixner Acked-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260517121122.188333-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 881d866d687a..2eef4f3345cd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4640,10 +4640,24 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, rcu_read_unlock(); } -static void br_multicast_del_grps(struct net_bridge *br) +static void br_multicast_enable_all_ports(struct net_bridge *br) { struct net_bridge_port *port; + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port_ctx(&port->multicast_ctx); +} + +static void br_multicast_disable_all_ports(struct net_bridge *br) +{ + struct net_bridge_port *port; + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + list_for_each_entry(port, &br->port_list, list) __br_multicast_disable_port_ctx(&port->multicast_ctx); } @@ -4651,7 +4665,6 @@ static void br_multicast_del_grps(struct net_bridge *br) int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - struct net_bridge_port *port; bool change_snoopers = false; int err = 0; @@ -4668,7 +4681,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { change_snoopers = true; - br_multicast_del_grps(br); + br_multicast_disable_all_ports(br); goto unlock; } @@ -4676,8 +4689,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, goto unlock; br_multicast_open(br); - list_for_each_entry(port, &br->port_list, list) - __br_multicast_enable_port_ctx(&port->multicast_ctx); + br_multicast_enable_all_ports(br); change_snoopers = true; -- cgit v1.2.3 From ae743a8ca8dbd66fb67c461a27460b2b21c376ab Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 17 May 2026 15:11:22 +0300 Subject: selftests: bridge_vlan_mcast: Test toggling of multicast snooping Test toggling of multicast snooping when per-VLAN multicast snooping is enabled. The test always passes, but without "bridge: mcast: Fix possible use-after-free when removing a bridge port" it results in a splat. Acked-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260517121122.188333-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/bridge_vlan_mcast.sh | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index e8031f68200a..ebdb4c790a5d 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -4,7 +4,7 @@ ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \ vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \ vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \ - vlmc_router_port_test vlmc_filtering_test" + vlmc_router_port_test vlmc_filtering_test vlmc_mcast_toggle_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -537,6 +537,34 @@ vlmc_filtering_test() log_test "Disable multicast vlan snooping when vlan filtering is disabled" } +vlmc_mcast_toggle_test() +{ + RET=0 + + ip link add name br1-mcast up type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 + ip link add name dummy1-mcast up master br1-mcast type dummy + + # Enabling per-VLAN multicast snooping should disable the per-port + # multicast context on "dummy1-mcast". + ip link set dev br1-mcast type bridge mcast_vlan_snooping 1 + + # Toggling multicast snooping on the bridge should not affect the + # per-port multicast context on "dummy1-mcast" given that per-VLAN + # multicast snooping is enabled. + ip link set dev br1-mcast type bridge mcast_snooping 0 + ip link set dev br1-mcast type bridge mcast_snooping 1 + + # If both the per-port and per-{port, VLAN} multicast contexts are + # enabled on "dummy1-mcast", removing it from the bridge will result + # in a splat. + ip link set dev dummy1-mcast nomaster + + log_test "Toggling mcast snooping with per-VLAN mcast snooping enabled" + + ip link del dev dummy1-mcast + ip link del dev br1-mcast +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From 960e77ce14a83ef7f226e8e4b4d75765633ba48b Mon Sep 17 00:00:00 2001 From: Nerijus Bendžiūnas Date: Sat, 16 May 2026 18:02:51 +0300 Subject: net: phy: skip EEE advertisement write when autoneg is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit genphy_c45_an_config_eee_aneg() writes the EEE advertisement to the auto-negotiation device's MMD register space (MDIO_MMD_AN, register MDIO_AN_EEE_ADV). These registers are read by the link partner only during auto-negotiation, so writing them while autoneg is disabled cannot influence the link. On some PHYs (e.g. Broadcom BCM54213PE) the write nevertheless reaches the chip and disturbs the receive datapath. Concretely, running ethtool -s eth0 speed 100 duplex full autoneg off ethtool --set-eee eth0 eee off leaves eth0 with TX working and RX completely silent on a Raspberry Pi 4 / CM4 board (bcmgenet + BCM54213PE in rgmii-rxid). Switching back to autoneg recovers the link. Prior to commit f26a29a038ee ("net: phy: ensure that genphy_c45_an_config_eee_aneg() sees new value of phydev->eee_cfg.eee_enabled"), the disable path was effectively a no-op because the helper read the stale eee_cfg.eee_enabled, so the underlying PHY behavior never surfaced. Bisected on rpi-6.12.y between commits 83943264 (good) and effcbc88 (bad) to f26a29a038ee. Fixes: f26a29a038ee ("net: phy: ensure that genphy_c45_an_config_eee_aneg() sees new value of phydev->eee_cfg.eee_enabled") Cc: stable@vger.kernel.org Signed-off-by: Nerijus Bendžiūnas Reviewed-by: Nicolai Buchwitz Tested-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260516150251.879680-1-nerijus.bendziunas@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index d48aa7231b37..126951741428 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -940,6 +940,14 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_eee_abilities); */ int genphy_c45_an_config_eee_aneg(struct phy_device *phydev) { + /* Writing MMD AN advertisements while autoneg is disabled has no + * effect on link-partner negotiation, but on some PHYs (e.g. the + * Broadcom BCM54213PE) the write itself disturbs the receive + * datapath. Skip it. + */ + if (phydev->autoneg == AUTONEG_DISABLE) + return 0; + if (!phydev->eee_cfg.eee_enabled) { __ETHTOOL_DECLARE_LINK_MODE_MASK(adv) = {}; -- cgit v1.2.3 From 3655063e083889ed4b79b7dda9cec65478dce09a Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Mon, 18 May 2026 10:23:09 +0200 Subject: net: phy: honor eee_disabled_modes in phy_support_eee() phy_support_eee() copies supported_eee into advertising_eee unconditionally, overwriting any filtering applied during phy_probe() based on DT eee-broken-* properties or driver-populated eee_disabled_modes. MAC drivers that call phy_support_eee() after probe (e.g. bcmgenet, fec, lan743x, lan78xx, r8169) then cause the PHY to advertise EEE for modes the user marked as broken. The symptom is that ethtool --show-eee on the local interface reports "not supported" (supported & ~eee_disabled_modes is empty) while the link partner sees EEE negotiated and active. phy_probe() already filters advertising_eee via eee_disabled_modes after calling of_set_phy_eee_broken(). Apply the same mask in phy_support_eee() so the filtering survives the copy. Fixes: 49168d1980e2 ("net: phy: Add phy_support_eee() indicating MAC support EEE") Signed-off-by: Nicolai Buchwitz Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260518-devel-phy-support-eee-fix-v2-1-05b52626fa68@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c2cdf1ae3542..83b074bc4a8f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2903,7 +2903,8 @@ EXPORT_SYMBOL_GPL(phy_advertise_eee_all); */ void phy_support_eee(struct phy_device *phydev) { - linkmode_copy(phydev->advertising_eee, phydev->supported_eee); + linkmode_andnot(phydev->advertising_eee, phydev->supported_eee, + phydev->eee_disabled_modes); phydev->eee_cfg.tx_lpi_enabled = true; phydev->eee_cfg.eee_enabled = true; -- cgit v1.2.3 From 8baa7506d793f0636e3f6f01b01ef7be19674d06 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Mon, 18 May 2026 10:23:10 +0200 Subject: net: phy: honor eee_disabled_modes in phy_advertise_eee_all() phy_advertise_eee_all() copies supported_eee into advertising_eee unconditionally, overwriting any filtering applied during phy_probe() based on DT eee-broken-* properties or driver-populated eee_disabled_modes. genphy_c45_ethtool_set_eee() calls this helper when user space passes an empty advertisement, undoing the filtering. Apply the same eee_disabled_modes mask in phy_advertise_eee_all() so the filtering survives the copy, matching the pattern in phy_probe() and phy_support_eee(). Fixes: b64691274f5d ("net: phy: add helper phy_advertise_eee_all") Signed-off-by: Nicolai Buchwitz Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260518-devel-phy-support-eee-fix-v2-2-05b52626fa68@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 83b074bc4a8f..3370eb822017 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2877,7 +2877,8 @@ EXPORT_SYMBOL(phy_advertise_supported); */ void phy_advertise_eee_all(struct phy_device *phydev) { - linkmode_copy(phydev->advertising_eee, phydev->supported_eee); + linkmode_andnot(phydev->advertising_eee, phydev->supported_eee, + phydev->eee_disabled_modes); } EXPORT_SYMBOL_GPL(phy_advertise_eee_all); -- cgit v1.2.3 From d4ea0dfd75011b78cebf3808f98ac4c4f51a6fb9 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Sun, 17 May 2026 20:30:59 +0200 Subject: ipv6: ioam: add NULL check for idev in ipv6_hop_ioam() Reported by Sashiko: The function ipv6_hop_ioam() accesses __in6_dev_get(skb->dev)->cnf.ioam6_enabled without validating the returned idev pointer. Because addrconf_ifdown() can concurrently clear dev->ip6_ptr via RCU, __in6_dev_get() can return NULL during interface teardown, which could cause a NULL pointer dereference when processing an IOAM Hop-by-Hop option. Let's add a check and use SKB_DROP_REASON_IPV6DISABLED accordingly. Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Cc: stable@vger.kernel.org Signed-off-by: Justin Iurman Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260517183059.29140-1-justin.iurman@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 03cbce842c1a..47c5502a34a2 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -910,16 +910,27 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) { + enum skb_drop_reason drop_reason; struct ioam6_trace_hdr *trace; struct ioam6_namespace *ns; + struct inet6_dev *idev; struct ioam6_hdr *hdr; + drop_reason = SKB_DROP_REASON_IP_INHDR; + /* Bad alignment (must be 4n-aligned) */ if (optoff & 3) goto drop; + /* Does the device still have IPv6 configuration? */ + idev = __in6_dev_get(skb->dev); + if (!idev) { + drop_reason = SKB_DROP_REASON_IPV6DISABLED; + goto drop; + } + /* Ignore if IOAM is not enabled on ingress */ - if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) + if (!READ_ONCE(idev->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ @@ -972,7 +983,7 @@ ignore: return true; drop: - kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); + kfree_skb_reason(skb, drop_reason); return false; } -- cgit v1.2.3 From be309f8eae8b474a4a617eaae01324da996fc719 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 May 2026 18:51:30 +0200 Subject: af_unix: Fix UAF read of tail->len in unix_stream_data_wait() unix_stream_data_wait() does skb_peek_tail(&sk->sk_receive_queue) without holding any lock that prevents SKBs on that queue from being dequeued and freed. This has been the case since commit 79f632c71bea ("unix/stream: fix peeking with an offset larger than data in queue"). The first consequence of this is that the pointer comparison `tail != last` can be false even if `last` semantically refers to an already-freed SKB while `tail` is a new SKB allocated at the same address; which can cause unix_stream_data_wait() to wrongly keep blocking after new data has arrived, but only in a weird scenario where a peeking recv() and a normal recv() on the same socket are racing, which is probably not a real problem. But since commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets"), `tail` is actually dereferenced, which can cause UAF in the following race scenario (where test_setup() runs single-threaded, and afterwards, test_thread1() and test_thread2() run concurrently in two threads: ``` static int socks[2]; void test_setup(void) { socketpair(AF_UNIX, SOCK_STREAM, 0, socks); send(socks[1], "A", 1, 0); int peekoff = 1; setsockopt(socks[0], SOL_SOCKET, SO_PEEK_OFF, &peekoff, sizeof(peekoff)); } void test_thread1(void) { char dummy; recv(socks[0], &dummy, 1, MSG_PEEK); } void test_thread2(void) { char dummy; recv(socks[0], &dummy, 1, 0); shutdown(socks[1], SHUT_WR); } ``` when racing like this: ``` thread1 thread2 unix_stream_read_generic mutex_lock(&u->iolock) skb_peek(&sk->sk_receive_queue) skb_peek_next(skb, &sk->sk_receive_queue) mutex_unlock(&u->iolock) unix_stream_read_generic unix_state_lock(sk) skb_peek(&sk->sk_receive_queue) unix_state_unlock(sk) unix_stream_data_wait unix_state_lock(sk) tail = skb_peek_tail(&sk->sk_receive_queue) spin_lock(&sk->sk_receive_queue.lock) __skb_unlink(skb, &sk->sk_receive_queue) spin_unlock(&sk->sk_receive_queue.lock) consume_skb(skb) [frees the SKB] `tail != last`: false `tail`: true `tail->len != last_len` ***UAF*** ``` Fix the UAF by removing the read of tail->len; checking tail->len would only make sense if SKBs in the receive queue of a UNIX socket could grow, which can no longer happen. Kuniyuki explained: > When commit 869e7c62486e ("net: af_unix: implement stream sendpage > support") added sendpage() support, data could be appended to the last > skb in the receiver's queue. > > That's why we needed to check if the length of the last skb was changed > while waiting for new data in unix_stream_data_wait(). > > However, commit a0dbf5f818f9 ("af_unix: Support MSG_SPLICE_PAGES") and > commit 57d44a354a43 ("unix: Convert unix_stream_sendpage() to use > MSG_SPLICE_PAGES") refactored sendmsg(), and now data is always added > to a new skb. That means this fix is not suitable for kernels before 6.5. Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") Cc: stable@vger.kernel.org # 6.5.x Signed-off-by: Jann Horn Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260518-b4-unix-recv-wait-hotfix-v2-1-83e29ce8ad31@google.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1cbf36ea043b..dc71ed79be4a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2711,8 +2711,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len, - bool freezable) + struct sk_buff *last, bool freezable) { unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; struct sk_buff *tail; @@ -2725,7 +2724,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, tail = skb_peek_tail(&sk->sk_receive_queue); if (tail != last || - (tail && tail->len != last_len) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || @@ -2921,7 +2919,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, int flags = state->flags; bool check_creds = false; struct scm_cookie scm; - unsigned int last_len; struct unix_sock *u; int copied = 0; int err = 0; @@ -2967,7 +2964,6 @@ redo: goto unlock; } last = skb = skb_peek(&sk->sk_receive_queue); - last_len = last ? last->len : 0; again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) @@ -3001,8 +2997,7 @@ again: mutex_unlock(&u->iolock); - timeo = unix_stream_data_wait(sk, timeo, last, - last_len, freezable); + timeo = unix_stream_data_wait(sk, timeo, last, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -3019,7 +3014,6 @@ unlock: while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; - last_len = skb->len; skb = skb_peek_next(skb, &sk->sk_receive_queue); if (!skb) goto again; @@ -3094,7 +3088,6 @@ unlock: skip = 0; last = skb; - last_len = skb->len; unix_state_lock(sk); skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb) -- cgit v1.2.3 From 0cb5a74faa3bdcfa3b18735d554e12c0f615e35d Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 18 May 2026 15:44:57 +0200 Subject: net: airoha: Fix NPU RX DMA descriptor bits In an internal review from Airoha, it was notice that the RX DMA descriptor bits and mask are wrong. These values probably refer to an old NPU firmware never published. The previous value works correctly but it was reported that in some specific condition in mixed scenario with both Ethernet and WiFi offload it's possible that RX DMA descriptor signal wrong value with the problem to the RX ring or packets getting dropped. To handle these specific scenario, apply the new suggested bits mask from Airoha. Correct functionality of both AN7581 NPU and MT7996 variant were verified and confirmed working. Fixes: a7fc8c641cab ("net: airoha: Fix npu rx DMA definitions") Signed-off-by: Christian Marangi Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260518134530.3683-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index d01ef4a6b3d7..7589fccfeef6 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -71,9 +71,9 @@ static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, #define NPU_RX1_DESC_NUM 512 /* CTRL */ -#define NPU_RX_DMA_DESC_LAST_MASK BIT(27) -#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(26, 14) -#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(13, 1) +#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) #define NPU_RX_DMA_DESC_DONE_MASK BIT(0) /* INFO */ #define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 29) -- cgit v1.2.3 From 0e46b6635b03d29807f810c3b415c4755a3f958d Mon Sep 17 00:00:00 2001 From: "Nikhil P. Rao" Date: Fri, 15 May 2026 21:29:05 +0000 Subject: pds_core: fix error handling in pdsc_devcmd_wait Fix two cases where pdsc_devcmd_wait() returns stale success from the completion register instead of an error: 1. FW crash: If firmware stops running, the wait loop breaks early with running=false. The condition "if ((!done || timeout) && running)" is false, so error handling is bypassed and stale status is returned. Check !running first and return -ENXIO. 2. Timeout: If a command times out, err is set to -ETIMEDOUT but then overwritten by pdsc_err_to_errno(status) which reads stale status. Return -ETIMEDOUT immediately after cleaning up. Both errors now propagate to pdsc_devcmd_locked() which queues health_work for recovery. Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Nikhil P. Rao Link: https://patch.msgid.link/20260515212907.998028-1-nikhil.rao@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/dev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 2e1d0d01d03a..bded6b33289c 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -162,12 +162,19 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) dev_dbg(dev, "DEVCMD %d %s after %ld secs\n", opcode, pdsc_devcmd_str(opcode), duration / HZ); - if ((!done || timeout) && running) { + if (!running) { + dev_err(dev, "DEVCMD %d %s fw not running\n", + opcode, pdsc_devcmd_str(opcode)); + pdsc_devcmd_clean(pdsc); + return -ENXIO; + } + + if (!done || timeout) { dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n", opcode, pdsc_devcmd_str(opcode), done, timeout, max_seconds); - err = -ETIMEDOUT; pdsc_devcmd_clean(pdsc); + return -ETIMEDOUT; } status = pdsc_devcmd_status(pdsc); -- cgit v1.2.3 From dc416e32baaeb620b9809e9e25fc7b30889686e9 Mon Sep 17 00:00:00 2001 From: "Nikhil P. Rao" Date: Fri, 15 May 2026 21:29:07 +0000 Subject: pds_core: fix debugfs_lookup dentry leak and error handling debugfs_lookup() returns a dentry with an elevated reference count that must be released with dput(). The current code discards the returned dentry without calling dput(), causing a reference leak on every firmware reset recovery. Additionally, when CONFIG_DEBUG_FS is disabled, debugfs_lookup() returns ERR_PTR(-ENODEV), not NULL. The current check passes for error pointers and would call dput() on an invalid pointer, causing a crash. Fixes: bc90fbe0c318 ("pds_core: Rework teardown/setup flow to be more common") Signed-off-by: Nikhil P. Rao Link: https://patch.msgid.link/20260515212907.998028-3-nikhil.rao@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/debugfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index 04c5e3abd8d7..810a0cd9bcac 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -64,9 +64,14 @@ DEFINE_SHOW_ATTRIBUTE(identity); void pdsc_debugfs_add_ident(struct pdsc *pdsc) { + struct dentry *dentry; + /* This file will already exist in the reset flow */ - if (debugfs_lookup("identity", pdsc->dentry)) + dentry = debugfs_lookup("identity", pdsc->dentry); + if (!IS_ERR_OR_NULL(dentry)) { + dput(dentry); return; + } debugfs_create_file("identity", 0400, pdsc->dentry, pdsc, &identity_fops); -- cgit v1.2.3 From 49b18315be4eecfc36b75f4aecb4d40a87d68a20 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 20 May 2026 04:40:59 +0200 Subject: bpf: Reject NULL data/sig in bpf_verify_pkcs7_signature __bpf_dynptr_data() can return NULL (FILE dynptrs, any non-contiguous backing). bpf_verify_pkcs7_signature() forwards the pointer to verify_pkcs7_signature() unchecked, causing a NULL deref in asn1_ber_decoder() reachable from a sleepable BPF LSM at lsm.s/bpf. NULL-check both pointers and reject with -EINVAL. Mirrors the guards already in kernel/bpf/crypto.c. Fixes: 865b0566d8f1 ("bpf: Add bpf_verify_pkcs7_signature() kfunc") Reported-by: Xianrui Dong Signed-off-by: KP Singh Reviewed-by: Amery Hung Acked-by: Song Liu Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20260520024059.313468-1-kpsingh@kernel.org Signed-off-by: Kumar Kartikeya Dwivedi --- kernel/bpf/helpers.c | 5 +++++ tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 2bb60200c266..b5314c9fed3c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4241,8 +4241,13 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, data_len = __bpf_dynptr_size(data_ptr); data = __bpf_dynptr_data(data_ptr, data_len); + if (!data) + return -EINVAL; + sig_len = __bpf_dynptr_size(sig_ptr); sig = __bpf_dynptr_data(sig_ptr, sig_len); + if (!sig) + return -EINVAL; return verify_pkcs7_signature(data, data_len, sig, sig_len, trusted_keyring->key, diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c index 8cd298b78e44..04aaf4c9cf5e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -14,7 +14,7 @@ static struct { const char *prog_name; int expected_runtime_err; } kfunc_dynptr_tests[] = { - {"dynptr_data_null", -EBADMSG}, + {"dynptr_data_null", -EINVAL}, }; static bool kfunc_not_supported; -- cgit v1.2.3 From e36a88b33cbe3dcbb90ac2245ba6149dd5793370 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 19 May 2026 14:11:52 +0200 Subject: ALSA: hda: Move irq pending work into hda-intel stream Currently, the delayed IRQ handling for PCM streams is managed in a single work embedded in hda_intel, but this is basically a per-stream thing. Due to the single work, we can't cancel the work properly at closing each stream, for example. For making the IRQ pending work to be stream-based, this patch changes the following: - An extended version of azx_dev (i.e. the hd-audio stream object) is defined for snd-hda-intel - The irq_pending flag and irq_pending_work are moved to hda_intel_stream, so that they can be hda-intel stream specific - The stream creation and assignment are refactored so that snd-hda-intel can handle individually; the snd-hda-intel specific workaround for stream tags is also moved to snd-hda-intel itself instead of the common code - The irq pending work is canceled properly at free / shutdown While we're at it, changed the bit field flag to bool, as the bit field doesn't help much in our case. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260519121157.28477-1-tiwai@suse.de --- sound/hda/common/controller.c | 26 +++-------- sound/hda/common/hda_controller.h | 12 ++++- sound/hda/controllers/intel.c | 92 ++++++++++++++++++++++++++------------- sound/hda/controllers/intel.h | 15 +++++-- 4 files changed, 89 insertions(+), 56 deletions(-) diff --git a/sound/hda/common/controller.c b/sound/hda/common/controller.c index 5934e5cdfdfd..89e63a53d683 100644 --- a/sound/hda/common/controller.c +++ b/sound/hda/common/controller.c @@ -1264,19 +1264,17 @@ int azx_codec_configure(struct azx *chip) } EXPORT_SYMBOL_GPL(azx_codec_configure); -static int stream_direction(struct azx *chip, unsigned char index) +void azx_add_stream(struct azx *chip, struct azx_dev *azx_dev, int idx, int tag) { - if (index >= chip->capture_index_offset && - index < chip->capture_index_offset + chip->capture_streams) - return SNDRV_PCM_STREAM_CAPTURE; - return SNDRV_PCM_STREAM_PLAYBACK; + snd_hdac_stream_init(azx_bus(chip), azx_stream(azx_dev), idx, + azx_stream_direction(chip, idx), tag); } +EXPORT_SYMBOL_GPL(azx_add_stream); /* initialize SD streams */ int azx_init_streams(struct azx *chip) { int i; - int stream_tags[2] = { 0, 0 }; /* initialize each stream (aka device) * assign the starting bdl address to each stream (device) @@ -1284,24 +1282,10 @@ int azx_init_streams(struct azx *chip) */ for (i = 0; i < chip->num_streams; i++) { struct azx_dev *azx_dev = kzalloc_obj(*azx_dev); - int dir, tag; if (!azx_dev) return -ENOMEM; - - dir = stream_direction(chip, i); - /* stream tag must be unique throughout - * the stream direction group, - * valid values 1...15 - * use separate stream tag if the flag - * AZX_DCAPS_SEPARATE_STREAM_TAG is used - */ - if (chip->driver_caps & AZX_DCAPS_SEPARATE_STREAM_TAG) - tag = ++stream_tags[dir]; - else - tag = i + 1; - snd_hdac_stream_init(azx_bus(chip), azx_stream(azx_dev), - i, dir, tag); + azx_add_stream(chip, azx_dev, i, i + 1); } return 0; diff --git a/sound/hda/common/hda_controller.h b/sound/hda/common/hda_controller.h index 7434f38038a0..bc8ee4cc2401 100644 --- a/sound/hda/common/hda_controller.h +++ b/sound/hda/common/hda_controller.h @@ -57,13 +57,12 @@ enum { struct azx_dev { struct hdac_stream core; - unsigned int irq_pending:1; /* * For VIA: * A flag to ensure DMA position is 0 * when link position is not greater than FIFO size */ - unsigned int insufficient:1; + bool insufficient; }; #define azx_stream(dev) (&(dev)->core) @@ -206,6 +205,15 @@ int azx_bus_init(struct azx *chip, const char *model); int azx_probe_codecs(struct azx *chip, unsigned int max_slots); int azx_codec_configure(struct azx *chip); int azx_init_streams(struct azx *chip); +void azx_add_stream(struct azx *chip, struct azx_dev *s, int idx, int tag); void azx_free_streams(struct azx *chip); +static inline int azx_stream_direction(struct azx *chip, unsigned char index) +{ + if (index >= chip->capture_index_offset && + index < chip->capture_index_offset + chip->capture_streams) + return SNDRV_PCM_STREAM_CAPTURE; + return SNDRV_PCM_STREAM_PLAYBACK; +} + #endif /* __SOUND_HDA_CONTROLLER_H */ diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index c87d75dbd8aa..01477bd4fcb9 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -615,17 +615,17 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev); /* called from IRQ */ static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev) { - struct hda_intel *hda = container_of(chip, struct hda_intel, chip); + struct hda_intel_stream *istream = azx_dev_to_istream(azx_dev); int ok; ok = azx_position_ok(chip, azx_dev); if (ok == 1) { - azx_dev->irq_pending = 0; + istream->irq_pending = false; return ok; } else if (ok == 0) { /* bogus IRQ, process it later */ - azx_dev->irq_pending = 1; - schedule_work(&hda->irq_pending_work); + istream->irq_pending = true; + schedule_work(&istream->irq_pending_work); } return 0; } @@ -721,11 +721,13 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) */ static void azx_irq_pending_work(struct work_struct *work) { - struct hda_intel *hda = container_of(work, struct hda_intel, irq_pending_work); + struct hda_intel_stream *istream = + container_of(work, struct hda_intel_stream, irq_pending_work); + struct azx_dev *azx_dev = &istream->azx_dev; + struct hda_intel *hda = istream->hda; struct azx *chip = &hda->chip; struct hdac_bus *bus = azx_bus(chip); - struct hdac_stream *s; - int pending, ok; + int ok; if (!hda->irq_pending_warned) { dev_info(chip->card->dev, @@ -735,28 +737,25 @@ static void azx_irq_pending_work(struct work_struct *work) } for (;;) { - pending = 0; - spin_lock_irq(&bus->reg_lock); - list_for_each_entry(s, &bus->stream_list, list) { - struct azx_dev *azx_dev = stream_to_azx_dev(s); - if (!azx_dev->irq_pending || - !s->substream || - !s->running) - continue; + scoped_guard(spinlock_irq, &bus->reg_lock) { + if (!istream->irq_pending || + !azx_dev->core.substream || + !azx_dev->core.running) { + return; + } + ok = azx_position_ok(chip, azx_dev); - if (ok > 0) { - azx_dev->irq_pending = 0; - spin_unlock(&bus->reg_lock); - snd_pcm_period_elapsed(s->substream); - spin_lock(&bus->reg_lock); - } else if (ok < 0) { - pending = 0; /* too early */ - } else - pending++; + if (ok < 0) + return; /* too early */ + if (ok > 0) + istream->irq_pending = false; } - spin_unlock_irq(&bus->reg_lock); - if (!pending) + + if (ok) { + snd_pcm_period_elapsed(azx_dev->core.substream); return; + } + msleep(1); } } @@ -767,10 +766,11 @@ static void azx_clear_irq_pending(struct azx *chip) struct hdac_bus *bus = azx_bus(chip); struct hdac_stream *s; - guard(spinlock_irq)(&bus->reg_lock); list_for_each_entry(s, &bus->stream_list, list) { struct azx_dev *azx_dev = stream_to_azx_dev(s); - azx_dev->irq_pending = 0; + struct hda_intel_stream *istream = azx_dev_to_istream(azx_dev); + istream->irq_pending = false; + cancel_work_sync(&istream->irq_pending_work); } } @@ -1797,7 +1797,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, if (jackpoll_ms[dev] >= 50 && jackpoll_ms[dev] <= 60000) chip->jackpoll_interval = msecs_to_jiffies(jackpoll_ms[dev]); INIT_LIST_HEAD(&chip->pcm_list); - INIT_WORK(&hda->irq_pending_work, azx_irq_pending_work); INIT_LIST_HEAD(&hda->list); init_vga_switcheroo(chip); init_completion(&hda->probe_wait); @@ -1846,6 +1845,39 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, return 0; } +/* create and assign streams */ +static int hda_init_streams(struct azx *chip) +{ + int i; + int stream_tags[2] = { 0, 0 }; + + for (i = 0; i < chip->num_streams; i++) { + struct hda_intel_stream *s = kzalloc_obj(*s); + int tag, dir; + + if (!s) + return -ENOMEM; + + s->hda = container_of(chip, struct hda_intel, chip); + INIT_WORK(&s->irq_pending_work, azx_irq_pending_work); + + /* stream tag must be unique throughout + * the stream direction group, + * valid values 1...15 + * use separate stream tag if the flag + * AZX_DCAPS_SEPARATE_STREAM_TAG is used + */ + dir = azx_stream_direction(chip, i); + if (chip->driver_caps & AZX_DCAPS_SEPARATE_STREAM_TAG) + tag = ++stream_tags[dir]; + else + tag = i + 1; + azx_add_stream(chip, &s->azx_dev, i, tag); + } + + return 0; +} + static int azx_first_init(struct azx *chip) { int dev = chip->dev_index; @@ -2000,7 +2032,7 @@ static int azx_first_init(struct azx *chip) } /* initialize streams */ - err = azx_init_streams(chip); + err = hda_init_streams(chip); if (err < 0) return err; diff --git a/sound/hda/controllers/intel.h b/sound/hda/controllers/intel.h index 2d1725f86ef1..4efb3b0fc2d8 100644 --- a/sound/hda/controllers/intel.h +++ b/sound/hda/controllers/intel.h @@ -9,9 +9,6 @@ struct hda_intel { struct azx chip; - /* for pending irqs */ - struct work_struct irq_pending_work; - /* sync probing */ struct completion probe_wait; struct delayed_work probe_work; @@ -35,4 +32,16 @@ struct hda_intel { int probe_retry; /* being probe-retry */ }; +struct hda_intel_stream { + struct azx_dev azx_dev; + + /* for pending irqs */ + struct hda_intel *hda; + struct work_struct irq_pending_work; + bool irq_pending; +}; + +#define azx_dev_to_istream(azx_dev) \ + container_of(azx_dev, struct hda_intel_stream, azx_dev) + #endif -- cgit v1.2.3 From 33d3b6f0d86b539680bbda3300b2581cd62a3f18 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 19 May 2026 14:11:53 +0200 Subject: ALSA: hda/intel: Make sure to cancel irq-pending work at closing PCM stream The pending irq work might be still floating while the assigned stream has been already closed, which may lead to UAF, especially when another async work for fasync is involved. For addressing this, extend the hda_controller_ops for allowing the extra cleanup procedure that is specific to the controller driver, and make sure to cancel and sync the pending irq work at each PCM close before releasing the resources. Reported-by: Jake Lamberson Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260519121157.28477-2-tiwai@suse.de --- sound/hda/common/controller.c | 2 ++ sound/hda/common/hda_controller.h | 2 ++ sound/hda/controllers/intel.c | 20 ++++++++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/sound/hda/common/controller.c b/sound/hda/common/controller.c index 89e63a53d683..a847546753db 100644 --- a/sound/hda/common/controller.c +++ b/sound/hda/common/controller.c @@ -97,6 +97,8 @@ static int azx_pcm_close(struct snd_pcm_substream *substream) trace_azx_pcm_close(chip, azx_dev); scoped_guard(mutex, &chip->open_mutex) { + if (chip->ops->pcm_close) + chip->ops->pcm_close(chip, azx_dev); azx_release_device(azx_dev); if (hinfo->ops.close) hinfo->ops.close(hinfo, apcm->codec, substream); diff --git a/sound/hda/common/hda_controller.h b/sound/hda/common/hda_controller.h index bc8ee4cc2401..38227f82e704 100644 --- a/sound/hda/common/hda_controller.h +++ b/sound/hda/common/hda_controller.h @@ -78,6 +78,8 @@ struct hda_controller_ops { int (*position_check)(struct azx *chip, struct azx_dev *azx_dev); /* enable/disable the link power */ int (*link_power)(struct azx *chip, bool enable); + /* additional hook for PCM */ + void (*pcm_close)(struct azx *chip, struct azx_dev *azx_dev); }; struct azx_pcm { diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 01477bd4fcb9..4b03c64e72ab 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -761,16 +761,27 @@ static void azx_irq_pending_work(struct work_struct *work) } /* clear irq_pending flags and assure no on-going workq */ +static void hda_intel_stream_clear_irq_pending(struct azx_dev *azx_dev) +{ + struct hda_intel_stream *istream = azx_dev_to_istream(azx_dev); + + istream->irq_pending = false; + cancel_work_sync(&istream->irq_pending_work); +} + +/* called at PCM close */ +static void hda_intel_pcm_close(struct azx *chip, struct azx_dev *azx_dev) +{ + hda_intel_stream_clear_irq_pending(azx_dev); +} + static void azx_clear_irq_pending(struct azx *chip) { struct hdac_bus *bus = azx_bus(chip); struct hdac_stream *s; list_for_each_entry(s, &bus->stream_list, list) { - struct azx_dev *azx_dev = stream_to_azx_dev(s); - struct hda_intel_stream *istream = azx_dev_to_istream(azx_dev); - istream->irq_pending = false; - cancel_work_sync(&istream->irq_pending_work); + hda_intel_stream_clear_irq_pending(stream_to_azx_dev(s)); } } @@ -2131,6 +2142,7 @@ static const struct dmi_system_id driver_denylist_dmi[] = { static const struct hda_controller_ops pci_hda_ops = { .disable_msi_reset_irq = disable_msi_reset_irq, .position_check = azx_position_check, + .pcm_close = hda_intel_pcm_close, }; static DECLARE_BITMAP(probed_devs, SNDRV_CARDS); -- cgit v1.2.3 From 12b1b4f5653d9a14980c0d881f4ac1af2e1d6b05 Mon Sep 17 00:00:00 2001 From: Marius Hoch Date: Tue, 19 May 2026 16:01:29 +0200 Subject: ALSA: hda/realtek: Add LED quirk for HP ProBook 430 G6 Like the HP ProBook 440 G6, the HP ProBook 430 G6 needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk for its mute and microphone mute LEDs. Tested on a HP ProBook 430 G6. Signed-off-by: Marius Hoch Link: https://patch.msgid.link/20260519140248.4211-2-mail@mariushoch.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index be1bbde8be5a..f180d6a72021 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6955,6 +6955,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), + SND_PCI_QUIRK(0x103c, 0x8536, "HP ProBook 430 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8537, "HP ProBook 440 G6", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8548, "HP EliteBook x360 830 G6", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x854a, "HP EliteBook 830 G6", ALC285_FIXUP_HP_GPIO_LED), -- cgit v1.2.3 From a69b677e47a80319ce148d61cc29a2b57006e78d Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Tue, 19 May 2026 11:46:19 -0300 Subject: ALSA: scarlett2: Allow flash writes ending at segment boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scarlett2_hwdep_write() rejects writes when offset + count is greater than or equal to the selected flash segment size. That incorrectly treats a write ending exactly at the end of the segment as out of space, although the last byte written is still within the segment. Split invalid argument checks from the segment-space check, keep zero-length writes as no-ops, and compare count against the remaining segment size. This permits exact-end writes and avoids relying on offset + count before deciding whether the request is in bounds. Fixes: 1abfbd3c9527 ("ALSA: scarlett2: Add support for uploading new firmware") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260519-alsa-scarlett2-flash-write-boundary-v1-1-b550480e92da@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 0f83f8981213..8e80a7165faf 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -9187,12 +9187,15 @@ static long scarlett2_hwdep_write(struct snd_hwdep *hw, flash_size = private->flash_segment_blocks[segment_id] * SCARLETT2_FLASH_BLOCK_SIZE; - if (count < 0 || *offset < 0 || *offset + count >= flash_size) - return -ENOSPC; + if (count < 0 || *offset < 0) + return -EINVAL; if (!count) return 0; + if (*offset >= flash_size || count > flash_size - *offset) + return -ENOSPC; + /* Limit the *req size to SCARLETT2_FLASH_RW_MAX */ if (count > max_data_size) count = max_data_size; -- cgit v1.2.3 From 649932fc3815eda2f24eb4de4b3a5e94886ee0b9 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 28 Apr 2026 12:34:31 +0800 Subject: erofs: fix managed cache race for unaligned extents After unaligned compressed extents were introduced, the following race could occur: [Thread 1] [Thread 2] (z_erofs_fill_bio_vec) ... filemap_add_folio (1) (z_erofs_bind_cache) .. .. folio_attach_private (2) filemap_add_folio (3) again Since (1) is executed but (2) hasn't been executed yet, it's possible that another thread finds the same managed folio in z_erofs_bind_cache() for a different pcluster and calls filemap_add_folio() again since folio->private is still Z_EROFS_PREALLOCATED_FOLIO. Fix this by explicitly clearing folio->private before making the folio visible in the managed cache so that another pcluster can simply wait on the locked managed folio as what we did for other shared cases [1]. This only impacts unaligned data compression (`-E48bit` with zstd, for example). [1] Commit 9e2f9d34dd12 ("erofs: handle overlapped pclusters out of crafted images properly") was originally introduced to handle crafted overlapped extents, but it addresses unaligned extents as well. Fixes: 7361d1e3763b ("erofs: support unaligned encoded data") Reported-by: Arseniy Krasnov Closes: https://lore.kernel.org/r/4a2f3801-fac1-42fe-ae75-da315822e088@salutedevices.com Tested-by: Arseniy Krasnov Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 43bb5a6a9924..27ab7bd844ec 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1509,8 +1509,15 @@ repeat: DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); folio = page_folio(zbv.page); - /* For preallocated managed folios, add them to page cache here */ + /* + * Preallocated folios are added to the managed cache here rather than + * in z_erofs_bind_cache() in order to keep these folios locked in + * increasing (physical) address order. + * Clear folio->private before these folios become visible to others in + * the managed cache to avoid duplicate additions for unaligned extents. + */ if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) { + folio->private = NULL; tocache = true; goto out_tocache; } @@ -1546,14 +1553,8 @@ repeat: } return; } - /* - * Already linked with another pcluster, which only appears in - * crafted images by fuzzers for now. But handle this anyway. - */ - tocache = false; /* use temporary short-lived pages */ } else { DBG_BUGON(1); /* referenced managed folios can't be truncated */ - tocache = true; } folio_unlock(folio); folio_put(folio); -- cgit v1.2.3 From 79b09c54c6563df9846ca3094bcfd72082c3e1d7 Mon Sep 17 00:00:00 2001 From: Jia Zhu Date: Wed, 20 May 2026 12:46:07 +0800 Subject: erofs: fix metabuf leak in inode xattr initialization commit bb88e8da0025 ("erofs: use meta buffers for xattr operations") converted xattr operations to use on-stack erofs_buf instances. erofs_init_inode_xattrs() uses such a metabuf while reading the inline xattr header and shared xattr id array. Some error paths after erofs_read_metabuf() leave through out_unlock without dropping the metabuf, so the folio reference can leak. Consolidate the cleanup at out_unlock. erofs_put_metabuf() is a no-op if no folio has been acquired, and this keeps all paths after taking EROFS_I_BL_XATTR_BIT covered by a single cleanup site. Fixes: bb88e8da0025 ("erofs: use meta buffers for xattr operations") Signed-off-by: Jia Zhu Reviewed-by: Gao Xiang Fixes: bb88e8da0025 ("erofs: use meta buffers for xattr operations") Signed-off-by: Gao Xiang --- fs/erofs/xattr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 41e311019a25..df7ea019526d 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -89,13 +89,11 @@ static int erofs_init_inode_xattrs(struct inode *inode) vi->xattr_isize - sizeof(struct erofs_xattr_ibody_header)) { erofs_err(sb, "invalid h_shared_count %u @ nid %llu", vi->xattr_shared_count, vi->nid); - erofs_put_metabuf(&buf); ret = -EFSCORRUPTED; goto out_unlock; } vi->xattr_shared_xattrs = kmalloc_objs(uint, vi->xattr_shared_count); if (!vi->xattr_shared_xattrs) { - erofs_put_metabuf(&buf); ret = -ENOMEM; goto out_unlock; } @@ -112,12 +110,12 @@ static int erofs_init_inode_xattrs(struct inode *inode) } vi->xattr_shared_xattrs[i] = le32_to_cpu(*xattr_id); } - erofs_put_metabuf(&buf); /* paired with smp_mb() at the beginning of the function. */ smp_mb(); set_bit(EROFS_I_EA_INITED_BIT, &vi->flags); out_unlock: + erofs_put_metabuf(&buf); clear_and_wake_up_bit(EROFS_I_BL_XATTR_BIT, &vi->flags); return ret; } -- cgit v1.2.3 From 9ce754ed8e7ab4e3999767ce1505f85c449ccb07 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 19 May 2026 09:25:19 -0400 Subject: KVM: arm64: vgic-its: Reject restored DTE with out-of-range num_eventid_bits Userspace can restore an ITS Device Table Entry whose Size field encodes more EventID bits than the virtual ITS supports. The live MAPD path rejects that state, but vgic_its_restore_dte() accepts it and stores the out-of-range value in dev->num_eventid_bits. Reject restored DTEs with num_eventid_bits > VITS_TYPER_IDBITS before allocating the device. This mirrors the MAPD check and prevents the restored state from reaching vgic_its_restore_itt(), where the unchecked value can be converted into an oversized scan_its_table() range. Fixes: 57a9a117154c ("KVM: arm64: vgic-its: Device table save/restore") Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://lore.kernel.org/r/20260519132519.2142458-1-michael.bommarito@gmail.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/vgic/vgic-its.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2ea9f1c7ebcd..1d7e5d560af4 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2307,6 +2307,10 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id, /* dte entry is valid */ offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + /* Mimic the MAPD behaviour and reject invalid EID bits. */ + if (num_eventid_bits > VITS_TYPER_IDBITS) + return -EINVAL; + if (!vgic_its_check_id(its, baser, id, NULL)) return -EINVAL; -- cgit v1.2.3 From f19c354dbd457759dfcf1195ab4bdba2bb568323 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Tue, 19 May 2026 09:50:42 -0400 Subject: KVM: arm64: vgic: Free private_irqs when init fails after allocation Companion to commit 250f25367b58 ("KVM: arm64: Tear down vGIC on failed vCPU creation"), which added the missing kvm_vgic_vcpu_destroy() call to the kvm_share_hyp() failure path in kvm_arch_vcpu_create(). The kvm_vgic_vcpu_init() failure path immediately above it has the same shape and still needs the same cleanup. Call kvm_vgic_vcpu_destroy() when kvm_vgic_vcpu_init() fails so private IRQs allocated before a redistributor iodev registration failure are released before the failed vCPU is freed. Fixes: 03b3d00a70b5 ("KVM: arm64: vgic: Allocate private interrupts on demand") Cc: stable@vger.kernel.org Cc: Will Deacon Reviewed-by: Yuan Yao Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://lore.kernel.org/r/20260519135042.2219239-1-michael.bommarito@gmail.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 34c9950884d5..9453321ef8c6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -555,8 +555,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_destroy_mpidr_data(vcpu->kvm); err = kvm_vgic_vcpu_init(vcpu); - if (err) + if (err) { + kvm_vgic_vcpu_destroy(vcpu); return err; + } err = kvm_share_hyp(vcpu, vcpu + 1); if (err) -- cgit v1.2.3 From 1702da76e017ae0fbe1a92b07bc332972c293e89 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Thu, 14 May 2026 17:26:24 +0100 Subject: KVM: arm64: Fix nVHE/pKVM hyp tracing error on invalid desc pKVM must validate the host-provided tracing buffer descriptor. However, if an error is found, the hypervisor would just return 0 to the host. Fix the return value on validation failure. While at it, rename the function to hyp_trace_desc_is_valid() and skip validation for the nVHE mode as we trust host-provided data in that case. Signed-off-by: Vincent Donnefort Fixes: 680a04c333fa ("KVM: arm64: Add tracing capability for the nVHE/pKVM hyp") Link: https://lore.kernel.org/r/20260514162624.3477857-1-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/trace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/trace.c b/arch/arm64/kvm/hyp/nvhe/trace.c index a6ca27b18e15..e7e150ab265f 100644 --- a/arch/arm64/kvm/hyp/nvhe/trace.c +++ b/arch/arm64/kvm/hyp/nvhe/trace.c @@ -164,13 +164,16 @@ static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer, return ret; } -static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size) +static bool hyp_trace_desc_is_valid(struct hyp_trace_desc *desc, size_t desc_size) { struct ring_buffer_desc *rb_desc; unsigned int cpu; size_t nr_bpages; void *desc_end; + if (!is_protected_kvm_enabled()) + return true; + /* * Both desc_size and bpages_backing_size are untrusted host-provided * values. We rely on __pkvm_host_donate_hyp() to enforce their validity. @@ -212,8 +215,10 @@ int __tracing_load(unsigned long desc_hva, size_t desc_size) if (ret) return ret; - if (!hyp_trace_desc_validate(desc, desc_size)) + if (!hyp_trace_desc_is_valid(desc, desc_size)) { + ret = -EINVAL; goto err_release_desc; + } hyp_spin_lock(&trace_buffer.lock); -- cgit v1.2.3 From 540f4a4f6ef806a28e794001bb4beac4840a6090 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 22 Apr 2026 16:17:16 +0200 Subject: s390/topology: Use zero-based numbering for containing entities Start the numbering scheme for higher-level topology structures (like socket, book, drawer) at zero, matching the convention for other hardware identifiers like e.g. CPU numbers. Hardware documentation, the Hardware Management Console and other tools like zmemtopo also use zero-based numbering for these containing entities. Aligning the numbering in sysfs, procfs, and tools like lscpu improves user experience by making it easier to correlate topology information across different interfaces. If available, Linux on s390 derives this physical topology information from the stsi function code 15 store_topology instruction, which is defined to start at 1 for the lowest numbered container id. Subtract one, so drawer_id, book_id and socket_id in cpu_topology[] start with 0 for the lowest numbered entity; and /proc/cpuinfo and tools like 'lscpu -ye' display the expected values. Display only, no functional change intended. Example: In a partition with 3 cores in a system with 8 cores per socket; 2 sockets per book; 4 books per dawer; and 4 drawers: Before this fix: $ lscpu -ye CPU NODE DRAWER BOOK SOCKET CORE L1d:L1i:L2 ONLINE CONFIGURED POLARIZATION ADDRESS 0 0 2 4 1 0 0:0:0 yes yes vert-high 0 1 0 2 4 1 0 1:1:1 yes yes vert-high 1 2 0 2 4 1 1 2:2:2 yes yes vert-medium 2 3 0 2 4 1 1 3:3:3 yes yes vert-medium 3 4 0 2 4 2 3 4:4:4 yes yes vert-low 4 5 0 2 4 2 3 5:5:5 yes yes vert-low 5 After this fix: $ lscpu -ye CPU NODE DRAWER BOOK SOCKET CORE L1d:L1i:L2 ONLINE CONFIGURED POLARIZATION ADDRESS 0 0 1 3 0 0 0:0:0 yes yes vert-high 0 1 0 1 3 0 0 1:1:1 yes yes vert-high 1 2 0 1 3 0 1 2:2:2 yes yes vert-medium 2 3 0 1 3 0 1 3:3:3 yes yes vert-medium 3 4 0 1 3 1 3 4:4:4 yes yes vert-low 4 5 0 1 3 1 3 5:5:5 yes yes vert-low 5 For KVM guests, qemu emulates the stsi FC15 store_topology instruction. This emulation currently erroneously starts id numbering at 0. A qemu fix is proposed that makes this emulation compliant to the stsi architecture. In case a guest with this patch is running on a qemu without the other fix, it can happen that ids of 255 are displayed erroneously. z/VM currently does not provide or emulate physical topology information to its guests. So this patch does not change anything for z/VM guests. Fixes: 10d385895055 ("[S390] topology: expose core identifier") Signed-off-by: Alexandra Winter Acked-by: Heiko Carstens Acked-by: Christian Borntraeger Acked-by: Hendrik Brueckner Signed-off-by: Alexander Gordeev --- arch/s390/kernel/topology.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 1913a5566ac2..1377c6f3f670 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -192,17 +192,21 @@ static void tl_to_masks(struct sysinfo_15_1_x *info) end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { + /* + * Adjust drawer_id, book_id, and socked_id so they match the + * numbering scheme of e.g. the hardware management console. + */ case 3: drawer = drawer->next; - drawer->id = tle->container.id; + drawer->id = tle->container.id - 1; break; case 2: book = book->next; - book->id = tle->container.id; + book->id = tle->container.id - 1; break; case 1: socket = socket->next; - socket->id = tle->container.id; + socket->id = tle->container.id - 1; break; case 0: add_cpus_to_mask(&tle->cpu, drawer, book, socket); -- cgit v1.2.3 From f718506edd2d9c6a308ded9d13c632bf7b7d5a2c Mon Sep 17 00:00:00 2001 From: Alexandru Hossu Date: Fri, 15 May 2026 12:29:08 +0200 Subject: wifi: mac80211: bounds-check link_id in ieee80211_ml_epcs IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID is 0x000f, so link_id extracted from a PRIO_ACCESS ML element PER_STA_PROFILE subelement can be 0..15. sdata->link[] has IEEE80211_MLD_MAX_NUM_LINKS (15) entries (indices 0..14), making index 15 out-of-bounds. A connected WiFi 7 AP can trigger this by sending an EPCS Enable Response action frame with a PER_STA_PROFILE subelement where link_id = 15. The unsolicited-notification path (dialog_token = 0) is reachable any time EPCS is already enabled, without any prior client request. sdata->link[15] reads into the first word of sdata->activate_links_work (a wiphy_work whose embedded list_head is non-NULL after INIT_LIST_HEAD), so the NULL check on the result does not catch the invalid access. The garbage pointer is then passed to ieee80211_sta_wmm_params(), which dereferences link->sdata and crashes the kernel. The same class of bug was fixed for ieee80211_ml_reconfiguration() by commit 162d331d833d ("wifi: mac80211: bounds-check link_id in ieee80211_ml_reconfiguration"). Fixes: de86c5f60839 ("wifi: mac80211: Add support for EPCS configuration") Signed-off-by: Alexandru Hossu Link: https://patch.msgid.link/20260515102908.1653088-1-hossu.alexandru@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0a0f27836d57..ca1d29daf019 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -11232,6 +11232,9 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, control = get_unaligned_le16(pos); link_id = control & IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID; + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; -- cgit v1.2.3 From e1e83feb8eae82cc9cc676db4c70f52fedc4735d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 13 May 2026 17:06:27 +0300 Subject: wifi: mac80211: don't override max_amsdu_subframes In client mode, the extended capabilities are handled by the kernel looking at the association frame. When the supplicant installs the keys it calls sta_apply_parameters and it doesn't include the extended capabilities since those can't change after association. As a result, we overrode the max_amsdu_subframes that we set after association. Check that the ext_capa coming from the user space is valid before looking at it. If the ext_capa is NULL, it really means that the extended capabilities are not changed (as opposed to cleared). The default value for max_amsdu_subframes is 0, which means there is no limit. This value is valid and in case the association response frame does not have extended capabilities, this is the value we should use. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=221079 Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260513170623.828dbb58c782.Ifd2bfc190c26140e919127adb02ffddd7b551499@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7b77d57c9f96..f9ee9947a94d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2344,8 +2344,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->sta.max_sp = params->max_sp; } - ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab, - params->ext_capab_len); + if (params->ext_capab) + ieee80211_sta_set_max_amsdu_subframes(sta, params->ext_capab, + params->ext_capab_len); /* * cfg80211 validates this (1-2007) and allows setting the AID -- cgit v1.2.3 From a74e893f30db64cdce0fc7a96d3baa417bcd55f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 May 2026 09:10:31 +0200 Subject: wifi: mac80211: fix MLE defragmentation If either reconf or EPCS multi-link element (MLE) is contained in a non-transmitted profile, the defragmentation routine is called with a pointer to the defragmented copy, but the original elements. This is incorrect for two reasons: - if the original defragmentation was needed, it will not find the correct data - if the original frame is at a higher address, the parsing will potentially overrun the heap data (though given the layout of the buffers, only into the new defragmentation buffer, and then it has to stop and fail once that's filled with copied data. Fix it by tracking the container along with the pointer and in doing so also unify the two almost identical defragmentation routines. Fixes: 4d70e9c5488d ("wifi: mac80211: defragment reconfiguration MLE when parsing") Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Ilan Peer Link: https://patch.msgid.link/20260508091031.8a6c34613178.I4de16ebbce2d27f2f8f98fc49949c7a376c2fe8d@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 71 +++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 2b3632c6008a..666cdd5fd0ea 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -34,6 +34,13 @@ #include "led.h" #include "wep.h" +struct ieee80211_elem_defrag { + const struct element *elem; + /* container start/len */ + const u8 *start; + size_t len; +}; + struct ieee80211_elems_parse { /* must be first for kfree to work */ struct ieee802_11_elems elems; @@ -41,11 +48,7 @@ struct ieee80211_elems_parse { /* The basic Multi-Link element in the original elements */ const struct element *ml_basic_elem; - /* The reconfiguration Multi-Link element in the original elements */ - const struct element *ml_reconf_elem; - - /* The EPCS Multi-Link element in the original elements */ - const struct element *ml_epcs_elem; + struct ieee80211_elem_defrag ml_reconf, ml_epcs; bool multi_link_inner; bool skip_vendor; @@ -162,10 +165,14 @@ ieee80211_parse_extension_element(u32 *crc, } break; case IEEE80211_ML_CONTROL_TYPE_RECONF: - elems_parse->ml_reconf_elem = elem; + elems_parse->ml_reconf.elem = elem; + elems_parse->ml_reconf.start = params->start; + elems_parse->ml_reconf.len = params->len; break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: - elems_parse->ml_epcs_elem = elem; + elems_parse->ml_epcs.elem = elem; + elems_parse->ml_epcs.start = params->start; + elems_parse->ml_epcs.len = params->len; break; default: break; @@ -990,46 +997,27 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, sub->start, sub->len); } -static void -ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse) -{ - struct ieee802_11_elems *elems = &elems_parse->elems; - ssize_t ml_len; - - ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem, - elems->ie_start, - elems->total_len, - elems_parse->scratch_pos, - elems_parse->scratch + - elems_parse->scratch_len - - elems_parse->scratch_pos, - WLAN_EID_FRAGMENT); - if (ml_len < 0) - return; - elems->ml_reconf = (void *)elems_parse->scratch_pos; - elems->ml_reconf_len = ml_len; - elems_parse->scratch_pos += ml_len; -} - -static void -ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) +static const void * +ieee80211_mle_defrag(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elem_defrag *defrag, + size_t *out_len) { - struct ieee802_11_elems *elems = &elems_parse->elems; + const void *ret; ssize_t ml_len; - ml_len = cfg80211_defragment_element(elems_parse->ml_epcs_elem, - elems->ie_start, - elems->total_len, + ml_len = cfg80211_defragment_element(defrag->elem, + defrag->start, defrag->len, elems_parse->scratch_pos, elems_parse->scratch + elems_parse->scratch_len - elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; - elems->ml_epcs = (void *)elems_parse->scratch_pos; - elems->ml_epcs_len = ml_len; + return NULL; + ret = elems_parse->scratch_pos; + *out_len = ml_len; elems_parse->scratch_pos += ml_len; + return ret; } struct ieee802_11_elems * @@ -1109,9 +1097,12 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_defrag_reconf(elems_parse); - - ieee80211_mle_defrag_epcs(elems_parse); + elems->ml_reconf = ieee80211_mle_defrag(elems_parse, + &elems_parse->ml_reconf, + &elems->ml_reconf_len); + elems->ml_epcs = ieee80211_mle_defrag(elems_parse, + &elems_parse->ml_epcs, + &elems->ml_epcs_len); if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; -- cgit v1.2.3 From fe2d61a5d2849ee75dd4deeb2fe35f78d80721f8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 May 2026 09:10:32 +0200 Subject: wifi: mac80211: fix multi-link element inheritance When parsing a beacon, mac80211 erroneously inherits any reconfiguration or EPCS multi-link elements from the outer elements into the multi-BSSID profile that's requested, if connected to a non-transmitted BSS, unless that profile has a non-inheritance element. This also happens if parsing a multi-BSSID profile that doesn't have a non-inheritance element. Fix this by having an empty non-inheritance element so cfg80211_is_element_inherited() is invoked in these cases and causes the parser to skip the elements that should never be inherited. Fixes: cf36cdef10e2 ("wifi: mac80211: Add support for parsing Reconfiguration Multi Link element") Fixes: 24711d60f849 ("wifi: mac80211: Support parsing EPCS ML element") Reviewed-by: Ilan Peer Reviewed-by: Benjamin Berg Link: https://patch.msgid.link/20260508091032.92184c0a3f08.I3c43b0b63d2cef8a4ddddaef1c2faaeb1de711ad@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 666cdd5fd0ea..77894d997113 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -34,6 +34,15 @@ #include "led.h" #include "wep.h" +static const u8 empty_non_inheritance[] = { + WLAN_EID_EXTENSION, 1, WLAN_EID_EXT_NON_INHERITANCE, + /* + * cfg80211_is_element_inherited() hardcodes elements that + * cannot be inherited, so we just need an empty one to be + * calling it at all. + */ +}; + struct ieee80211_elem_defrag { const struct element *elem; /* container start/len */ @@ -923,7 +932,7 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; - const struct element *tmp; + const struct element *tmp, *ret; ssize_t ml_len; const u8 *end; @@ -993,8 +1002,17 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, sub->from_ap = params->from_ap; sub->link_id = -1; - return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub->start, sub->len); + ret = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub->start, sub->len); + if (ret) + return ret; + + /* + * Since we know we want and found a profile, apply an empty + * non-inheritance if the profile didn't have one, so that any + * element that shouldn't be inherited by spec isn't. + */ + return (const void *)empty_non_inheritance; } static const void * @@ -1030,6 +1048,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) size_t scratch_len = 3 * params->len; bool multi_link_inner = false; + BUILD_BUG_ON(sizeof(empty_non_inheritance) != empty_non_inheritance[1] + 2); BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); /* cannot parse for both a specific link and non-transmitted BSS */ @@ -1077,6 +1096,17 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, sub.start, nontx_len); + /* + * If it's a non-transmitted BSS, we shouldn't pick + * any elements in the outer parsing that shouldn't + * be inherited. If the profile has a non-inheritance + * element this automatically happens, but if not then + * provide an empty one so that the hard-coded elements + * in cfg80211_is_element_inherited() are ignored, but + * it must be called. + */ + if (params->bss->transmitted_bss && !non_inherit) + non_inherit = (const void *)empty_non_inheritance; } else { /* must always parse to get elems_parse->ml_basic_elem */ non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params, -- cgit v1.2.3 From d71c841be5d9e586ee7f36c0dc8ed4db0d9a1349 Mon Sep 17 00:00:00 2001 From: Zhao Li Date: Sat, 9 May 2026 12:34:28 +0800 Subject: wifi: mac80211: capture fast-RX rate before mesh reuses skb->cb ieee80211_invoke_fast_rx() reads RX status through IEEE80211_SKB_RXCB(skb), which aliases the same skb->cb storage that ieee80211_rx_mesh_data() reuses as IEEE80211_TX_INFO. In the unicast forward path, mesh_data does: info = IEEE80211_SKB_CB(fwd_skb); memset(info, 0, sizeof(*info)); on the same skb the caller still names via rx->skb, then either queues the skb for TX (success) or kfree_skb()'s it (no-route) before returning RX_QUEUED. The caller's RX_QUEUED arm then calls sta_stats_encode_rate(status) on memory that is either zeroed (success path) or freed (no-route path). The latter is KASAN slab-use-after-free in ieee80211_prepare_and_rx_handle. Fix by encoding the rate from status before invoking ieee80211_rx_mesh_data(), so the RX_QUEUED arm consumes a value captured while status was still backed by valid memory. Fixes: 3468e1e0c639 ("wifi: mac80211: add mesh fast-rx support") Cc: stable@vger.kernel.org Signed-off-by: Zhao Li Link: https://patch.msgid.link/20260509043427.60322-2-enderaoelyther@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d18e962126ce..3fb40449c6c5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4984,6 +4984,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u8 sa[ETH_ALEN]; } addrs __aligned(2); struct ieee80211_sta_rx_stats *stats; + u32 encoded_rate; /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write * to a common data structure; drivers can implement that per queue @@ -5091,11 +5092,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, /* push the addresses in front */ memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); + /* capture before mesh forward may memset or free skb->cb */ + encoded_rate = sta_stats_encode_rate(status); + res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); switch (res) { case RX_QUEUED: stats->last_rx = jiffies; - stats->last_rate = sta_stats_encode_rate(status); + stats->last_rate = encoded_rate; return true; case RX_CONTINUE: break; -- cgit v1.2.3 From dd7b6a8671939708cc4b7a46786d8c11297e8f69 Mon Sep 17 00:00:00 2001 From: Shitalkumar Gandhi Date: Mon, 11 May 2026 09:57:32 +0530 Subject: wifi: wilc1000: fix dma_buffer leak on bus acquire failure wilc_wlan_firmware_download() allocates dma_buffer with kmalloc() at the top of the function and uses a 'fail:' label to free it via kfree(dma_buffer) on error. All later error paths correctly use 'goto fail' to route through this cleanup. However, the early failure path after the first acquire_bus() call uses a bare 'return ret;', which leaks dma_buffer whenever the bus acquire fails. Replace the early return with goto fail so the existing cleanup path runs. Found via a custom Coccinelle semantic patch hunting for kmalloc'd locals leaked on early-return error paths in driver firmware-download code. Fixes: 1241c5650ff7 ("wifi: wilc1000: Fill in missing error handling") Signed-off-by: Shitalkumar Gandhi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260511042732.998311-1-shitalkumar.gandhi@cambiumnetworks.com Signed-off-by: Johannes Berg --- drivers/net/wireless/microchip/wilc1000/wlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c index 3fa8592eb250..4b116fe6f9ea 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan.c @@ -1265,7 +1265,7 @@ int wilc_wlan_firmware_download(struct wilc *wilc, const u8 *buffer, ret = acquire_bus(wilc, WILC_BUS_ACQUIRE_AND_WAKEUP); if (ret) - return ret; + goto fail; wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, ®); reg &= ~BIT(10); -- cgit v1.2.3 From a6e6ccd5bd07155c2add6c74ce1a5e68ad3b95ea Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 15 May 2026 11:17:18 -0400 Subject: wifi: mac80211: consume only present negotiated TTLM maps ieee80211_tid_to_link_map_size_ok() validates negotiated TTLM elements against the number of link-map entries indicated by link_map_presence. ieee80211_parse_neg_ttlm() must consume the same layout. The parser advanced its cursor for every TID, including TIDs whose presence bit is clear and therefore have no map bytes in the element. A sparse map can then make a later present TID read past the validated element. The bad bytes land in neg_ttlm->{up,down}link[tid] but are gated by valid_links before being applied to driver state, so a peer cannot turn the read into a policy change. Under KUnit + KASAN with an exact-sized element allocation the OOB read is reported as a slab-out-of-bounds; whether the same trigger fires under the production RX path depends on surrounding allocator state. Advance the cursor only when the current TID has a map present. Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260515151719.1317659-2-michael.bommarito@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ca1d29daf019..b98ddfa3003e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8164,6 +8164,7 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, "No active links for TID %d", tid); return -EINVAL; } + pos += map_size; } else { map = 0; } @@ -8182,7 +8183,6 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, default: return -EINVAL; } - pos += map_size; } return 0; } -- cgit v1.2.3 From dc14686f27df6454b13b16ad1c9203ab3e9b0375 Mon Sep 17 00:00:00 2001 From: Kartik Nair Date: Mon, 11 May 2026 01:54:37 +0530 Subject: wifi: cfg80211: wext: validate chandef in monitor mode cfg80211_wext_siwfreq() constructs a channel definition for monitor mode but passes it to cfg80211_set_monitor_channel() without first validating it with cfg80211_chandef_valid(). This causes a WARN_ON in cfg80211_chandef_dfs_required() when it receives an invalid chandef. Add the missing cfg80211_chandef_valid() check before calling cfg80211_set_monitor_channel() to return -EINVAL early on invalid channel definitions, consistent with how other callers handle this. Reported-by: syzbot+02a1a03b8622d3c7d1c9@syzkaller.appspotmail.com Signed-off-by: Kartik Nair Link: https://patch.msgid.link/20260510202437.7857-1-contact.kartikn@gmail.com [clarify subject] Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 22d9d9bae8f5..63d145b524c9 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -789,6 +789,8 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; + if (!cfg80211_chandef_valid(&chandef)) + return -EINVAL; return cfg80211_set_monitor_channel(rdev, dev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); -- cgit v1.2.3 From dc334b9bf9d28070f7a3e413fead759abdce19ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:03:21 +0200 Subject: platform/x86: acer-wireless: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 acer-wireless driver. Fixes: f7e648027d7e ("platform/x86: acer-wireless: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/4746824.LvFx2qVVIh@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/acer-wireless.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/acer-wireless.c b/drivers/platform/x86/acer-wireless.c index f464b13a58af..fae8e5ad0f97 100644 --- a/drivers/platform/x86/acer-wireless.c +++ b/drivers/platform/x86/acer-wireless.c @@ -37,9 +37,14 @@ static void acer_wireless_notify(acpi_handle handle, u32 event, void *data) static int acer_wireless_probe(struct platform_device *pdev) { + struct acpi_device *adev; struct input_dev *idev; int ret; + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return -ENODEV; + idev = devm_input_allocate_device(&pdev->dev); if (!idev) return -ENOMEM; @@ -57,8 +62,7 @@ static int acer_wireless_probe(struct platform_device *pdev) if (ret) return ret; - return acpi_dev_install_notify_handler(ACPI_COMPANION(&pdev->dev), - ACPI_DEVICE_NOTIFY, + return acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY, acer_wireless_notify, &pdev->dev); } -- cgit v1.2.3 From 08f29dc34d1cf39696d04bc1ff31538dc2d74c72 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:04:23 +0200 Subject: platform/x86: asus-laptop: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 asus-laptop driver. Fixes: ba19eb10170b ("platform/x86: asus-laptop: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/5083741.GXAFRqVoOG@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-laptop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index dbbb6292cd11..140ac8a10537 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -1826,10 +1826,14 @@ static bool asus_device_present; static int asus_acpi_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; struct asus_laptop *asus; int result; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + pr_notice("Asus Laptop Support version %s\n", ASUS_LAPTOP_VERSION); asus = kzalloc_obj(struct asus_laptop); -- cgit v1.2.3 From 8a17c62b02fc4baa02300b57a2ad882c7a06a984 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:05:07 +0200 Subject: platform/x86: dell/dell-rbtn: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 dell-rbtn driver. Fixes: 19ebacfb442b ("platform/x86: dell/dell-rbtn: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/2276487.irdbgypaU6@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-rbtn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell-rbtn.c b/drivers/platform/x86/dell/dell-rbtn.c index 34af9f4ff741..180b8c6720e6 100644 --- a/drivers/platform/x86/dell/dell-rbtn.c +++ b/drivers/platform/x86/dell/dell-rbtn.c @@ -396,11 +396,15 @@ static void rbtn_cleanup(struct device *dev) static int rbtn_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct rbtn_data *rbtn_data; + struct acpi_device *device; enum rbtn_type type; int ret = 0; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + type = rbtn_check(device); if (type == RBTN_UNKNOWN) { dev_info(&pdev->dev, "Unknown device type\n"); -- cgit v1.2.3 From 814830394764cef79c506629edd55f072e0207d1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:05:47 +0200 Subject: platform/x86: eeepc-laptop: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 eeepc-laptop driver. Fixes: 079b59fd2d79 ("platform/x86: eeepc-laptop: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/3056852.e9J7NaK4W3@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/eeepc-laptop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 02a71095920e..d18a80907611 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -1363,10 +1363,14 @@ static bool eeepc_device_present; static int eeepc_acpi_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; struct eeepc_laptop *eeepc; int result; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + pr_notice(EEEPC_LAPTOP_NAME "\n"); eeepc = kzalloc_obj(struct eeepc_laptop); if (!eeepc) -- cgit v1.2.3 From e99829a15989d58ffe1ee3e283e0a5eb8e41ee8b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:08:58 +0200 Subject: platform/x86: fujitsu: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add requisite ACPI_COMPANION() checks against NULL to the platform/x86 fujitsu-laptop driver. Fixes: 6da22b031a3c ("platform/x86: fujitsu: Convert laptop driver to a platform one") Fixes: d5c9212ccfaa ("platform/x86: fujitsu: Convert backlight driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Acked-by: Jonathan Woithe Link: https://patch.msgid.link/3430329.44csPzL39Z@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/fujitsu-laptop.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 2e265be2267e..54d0b9cec4d3 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -530,10 +530,14 @@ static void acpi_fujitsu_bl_notify(acpi_handle handle, u32 event, void *data) static int acpi_fujitsu_bl_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; struct fujitsu_bl *priv; int ret; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + if (acpi_video_get_backlight_type() != acpi_backlight_vendor) return -ENODEV; @@ -993,10 +997,14 @@ static void acpi_fujitsu_laptop_notify(acpi_handle handle, u32 event, void *data static int acpi_fujitsu_laptop_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct fujitsu_laptop *priv; + struct acpi_device *device; int ret, i = 0; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; -- cgit v1.2.3 From ab743c6d7b118235f6d48a513a6560afbc5e615a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:09:58 +0200 Subject: platform/x86: fujitsu-tablet: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add requisite ACPI_COMPANION() checks against NULL to the platform/x86 fujitsu-tablet driver. Fixes: bd13b265d386 ("platform/x86: fujitsu-tablet: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/10861611.nUPlyArG6x@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/fujitsu-tablet.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/fujitsu-tablet.c b/drivers/platform/x86/fujitsu-tablet.c index 8319df28e9b8..2f8c1b89cbca 100644 --- a/drivers/platform/x86/fujitsu-tablet.c +++ b/drivers/platform/x86/fujitsu-tablet.c @@ -445,10 +445,14 @@ static acpi_status fujitsu_walk_resources(struct acpi_resource *res, void *data) static int acpi_fujitsu_probe(struct platform_device *pdev) { - struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); + struct acpi_device *adev; acpi_status status; int error; + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return -ENODEV; + status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, fujitsu_walk_resources, NULL); if (ACPI_FAILURE(status) || !fujitsu.irq || !fujitsu.io_base) -- cgit v1.2.3 From 51e91ad0a0ccb0906306c653f9e8b97a180d8608 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:11:00 +0200 Subject: platform/x86: intel/rst: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 intel/rst driver. Fixes: 163a68a31f74 ("platform/x86: intel/rst: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/2051525.PYKUYFuaPT@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/rst.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/rst.c b/drivers/platform/x86/intel/rst.c index 4bd10927aad9..bb19f0d89305 100644 --- a/drivers/platform/x86/intel/rst.c +++ b/drivers/platform/x86/intel/rst.c @@ -102,9 +102,13 @@ static struct device_attribute irst_timeout_attr = { static int irst_probe(struct platform_device *pdev) { - struct acpi_device *acpi = ACPI_COMPANION(&pdev->dev); + struct acpi_device *acpi; int error; + acpi = ACPI_COMPANION(&pdev->dev); + if (!acpi) + return -ENODEV; + error = device_create_file(&acpi->dev, &irst_timeout_attr); if (unlikely(error)) return error; -- cgit v1.2.3 From 922952f2bbcfe21375973d0ea0e662a1a4837b10 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:11:43 +0200 Subject: platform/x86: intel/smartconnect: Check ACPI_HANDLE() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_HANDLE() check against NULL to the platform/x86 intel/smartconnect driver. Fixes: 8a44bd3ffdb2 ("platform/x86: intel/smartconnect: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/7956676.EvYhyI6sBW@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/smartconnect.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/smartconnect.c b/drivers/platform/x86/intel/smartconnect.c index 4d866b6366d6..71e91ac60e5d 100644 --- a/drivers/platform/x86/intel/smartconnect.c +++ b/drivers/platform/x86/intel/smartconnect.c @@ -12,10 +12,14 @@ MODULE_LICENSE("GPL"); static int smartconnect_acpi_probe(struct platform_device *pdev) { - acpi_handle handle = ACPI_HANDLE(&pdev->dev); unsigned long long value; + acpi_handle handle; acpi_status status; + handle = ACPI_HANDLE(&pdev->dev); + if (!handle) + return -ENODEV; + status = acpi_evaluate_integer(handle, "GAOS", NULL, &value); if (ACPI_FAILURE(status)) return -EINVAL; -- cgit v1.2.3 From 7e169326c2263ebc4878baae536c956fa3118eff Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:12:27 +0200 Subject: platform/x86: lg-laptop: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 lg-laptop driver. Fixes: 2d9cb20610f7 ("platform/x86: lg-laptop: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/3706551.iIbC2pHGDl@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/lg-laptop.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index 9681412d694b..a8f2f465ef3f 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -761,12 +761,11 @@ static void lg_laptop_remove_address_space_handler(void *data) static int acpi_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct platform_device_info pdev_info = { - .fwnode = acpi_fwnode_handle(device), .name = PLATFORM_NAME, .id = PLATFORM_DEVID_NONE, }; + struct acpi_device *device; acpi_status status; int ret; const char *product; @@ -775,6 +774,12 @@ static int acpi_probe(struct platform_device *pdev) if (pf_device) return 0; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + + pdev_info.fwnode = acpi_fwnode_handle(device), + status = acpi_install_address_space_handler(device->handle, LG_ADDRESS_SPACE_ID, &lg_laptop_address_space_handler, NULL, &pdev->dev); -- cgit v1.2.3 From f15b0a3043f193677903be52d7dbef3d5c8df282 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:13:13 +0200 Subject: platform/x86: panasonic-laptop: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 panasonic-laptop driver. Fixes: de6837243af0 ("platform/x86: panasonic-laptop: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/3353471.5fSG56mABF@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/panasonic-laptop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 1337f7c49805..b83113c26f88 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -981,11 +981,15 @@ static int acpi_pcc_hotkey_resume(struct device *dev) static int acpi_pcc_hotkey_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct backlight_properties props; + struct acpi_device *device; struct pcc_acpi *pcc; int num_sifr, result; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + num_sifr = acpi_pcc_get_sqty(device); /* -- cgit v1.2.3 From f2ec69363fb52fbb2010e5edbec2b8ca0951aadf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:13:59 +0200 Subject: platform/x86: sony-laptop: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add requisite ACPI_COMPANION() checks against NULL to the platform/x86 sony-laptop driver. Fixes: 138db7ee58c0 ("platform/x86: sony-laptop: Convert PIC driver to a platform one") Fixes: 14004dd31caa ("platform/x86: sony-laptop: Convert NC driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/1871155.VLH7GnMWUR@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/sony-laptop.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index b18f00e9082f..67370967df6f 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -3147,11 +3147,15 @@ static void sony_nc_backlight_cleanup(void) static int sony_nc_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); + struct acpi_device *device; acpi_status status; int result = 0; struct sony_nc_value *item; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + sony_nc_acpi_device = device; strscpy(acpi_device_class(device), "sony/hotkey"); @@ -4509,11 +4513,15 @@ static void sony_pic_remove(struct platform_device *pdev) static int sony_pic_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct sony_pic_ioport *io, *tmp_io; struct sony_pic_irq *irq, *tmp_irq; + struct acpi_device *device; int result; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + spic_dev.acpi_dev = device; strscpy(acpi_device_class(device), "sony/hotkey"); sony_pic_detect_device_type(&spic_dev); -- cgit v1.2.3 From 840bcd6bd5bcbc807b679e367ae7a148a1f07370 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:14:43 +0200 Subject: platform/x86: system76: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 system76 driver. Fixes: 80b8f68b94ab ("platform/x86: system76: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/2072699.usQuhbGJ8B@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/system76_acpi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/system76_acpi.c b/drivers/platform/x86/system76_acpi.c index 693cbb461382..dd7b1b07c316 100644 --- a/drivers/platform/x86/system76_acpi.c +++ b/drivers/platform/x86/system76_acpi.c @@ -674,10 +674,14 @@ static void system76_notify(acpi_handle handle, u32 event, void *context) // Probe a System76 platform device static int system76_probe(struct platform_device *pdev) { - struct acpi_device *acpi_dev = ACPI_COMPANION(&pdev->dev); + struct acpi_device *acpi_dev; struct system76_data *data; int err; + acpi_dev = ACPI_COMPANION(&pdev->dev); + if (!acpi_dev) + return -ENODEV; + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; -- cgit v1.2.3 From 0978824a64f160f1d2e8dc821a522ad91e7b4215 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:15:31 +0200 Subject: platform/x86: toshiba_acpi: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 toshiba_acpi driver. Fixes: 246d6cefe525 ("platform/x86: toshiba_acpi: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/1973170.CQOukoFCf9@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/toshiba_acpi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 35d899c01740..7cecb3a70b9c 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -3374,7 +3374,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = { static int toshiba_acpi_probe(struct platform_device *pdev) { - struct acpi_device *acpi_dev = ACPI_COMPANION(&pdev->dev); + struct acpi_device *acpi_dev; struct toshiba_acpi_dev *dev; const char *hci_method; u32 dummy; @@ -3383,6 +3383,10 @@ static int toshiba_acpi_probe(struct platform_device *pdev) if (toshiba_acpi) return -EBUSY; + acpi_dev = ACPI_COMPANION(&pdev->dev); + if (!acpi_dev) + return -ENODEV; + pr_info("Toshiba Laptop ACPI Extras version %s\n", TOSHIBA_ACPI_VERSION); -- cgit v1.2.3 From 48b06fffb16901237dc0acbb0474a54b3cc5730f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:16:18 +0200 Subject: platform/x86: toshiba_bluetooth: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 toshiba_bluetooth driver. Fixes: 553b2ac59fbb ("platform/x86: toshiba_bluetooth: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/2715450.Lt9SDvczpP@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/toshiba_bluetooth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c index e50d4fc1e603..e00abba58c7c 100644 --- a/drivers/platform/x86/toshiba_bluetooth.c +++ b/drivers/platform/x86/toshiba_bluetooth.c @@ -230,10 +230,14 @@ static int toshiba_bt_resume(struct device *dev) static int toshiba_bt_rfkill_probe(struct platform_device *pdev) { - struct acpi_device *device = ACPI_COMPANION(&pdev->dev); struct toshiba_bluetooth_dev *bt_dev; + struct acpi_device *device; int result; + device = ACPI_COMPANION(&pdev->dev); + if (!device) + return -ENODEV; + result = toshiba_bluetooth_present(device->handle); if (result) return result; -- cgit v1.2.3 From 4840f8bb3e9aad183e707950f24f829fd220eb07 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:17:00 +0200 Subject: platform/x86: toshiba_haps: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 toshiba_haps driver. Fixes: 3a96c7915d93 ("platform/x86: toshiba_haps: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/2285136.Mh6RI2rZIc@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/toshiba_haps.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/toshiba_haps.c b/drivers/platform/x86/toshiba_haps.c index 1486252b5983..8d12241924df 100644 --- a/drivers/platform/x86/toshiba_haps.c +++ b/drivers/platform/x86/toshiba_haps.c @@ -182,13 +182,17 @@ static int toshiba_haps_available(acpi_handle handle) static int toshiba_haps_probe(struct platform_device *pdev) { - struct acpi_device *acpi_dev = ACPI_COMPANION(&pdev->dev); struct toshiba_haps_dev *haps; + struct acpi_device *acpi_dev; int ret; if (toshiba_haps) return -EBUSY; + acpi_dev = ACPI_COMPANION(&pdev->dev); + if (!acpi_dev) + return -ENODEV; + if (!toshiba_haps_available(acpi_dev->handle)) return -ENODEV; -- cgit v1.2.3 From 53a8f95cbb407608ef77a864ad4a59f25ddd906c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 12 May 2026 16:17:44 +0200 Subject: platform/x86: wireless-hotkey: Check ACPI_COMPANION() against NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every platform driver can be forced to match a device that doesn't match its list of device IDs because of device_match_driver_override(), so platform drivers that rely on the existence of a device's ACPI companion object need to verify its presence. Accordingly, add a requisite ACPI_COMPANION() check against NULL to the platform/x86 wireless-hotkey driver. Fixes: 8507277ef132 ("platform/x86: wireless-hotkey: Convert ACPI driver to a platform one") Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/3899916.MHq7AAxBmi@rafael.j.wysocki Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/wireless-hotkey.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index f680d8ff8e87..3151844d1699 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -89,9 +89,14 @@ static void wl_notify(acpi_handle handle, u32 event, void *data) static int wl_probe(struct platform_device *pdev) { + struct acpi_device *adev; struct wl_button *button; int err; + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return -ENODEV; + button = kzalloc_obj(struct wl_button); if (!button) return -ENOMEM; @@ -104,8 +109,8 @@ static int wl_probe(struct platform_device *pdev) kfree(button); return err; } - err = acpi_dev_install_notify_handler(ACPI_COMPANION(&pdev->dev), - ACPI_DEVICE_NOTIFY, wl_notify, button); + err = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY, + wl_notify, button); if (err) { pr_err("Failed to install ACPI notify handler\n"); wireless_input_destroy(&pdev->dev); -- cgit v1.2.3 From 60a1969fae6209644698fca91c185d153674f631 Mon Sep 17 00:00:00 2001 From: Zhang Cen Date: Wed, 20 May 2026 18:32:49 +0800 Subject: ALSA: seq: Serialize UMP output teardown with event_input seq_ump_process_event() borrows client->out_rfile.output without synchronizing with the first-open and last-close transition in seq_ump_client_open() and seq_ump_client_close(). The last output unuse can therefore drop opened[STR_OUT] to zero and release the rawmidi file while an in-flight event_input callback is still inside snd_rawmidi_kernel_write(). That leaves the rawmidi substream runtime exposed to teardown before the write path has taken its own buffer reference. Add a per-client rwlock for the event_input-visible output file. Publish a newly opened output file under the write side, and hold the read side from the output lookup through snd_rawmidi_kernel_write(). The last output close copies and clears the visible output file under the write side, then drops the lock and releases the saved rawmidi file. Use IRQ-safe rwlock guards because event_input can also be reached from atomic sequencer delivery. The buggy scenario involves two paths, with each column showing the order within that path: path A label: event_input path path B label: last unuse path 1. seq_ump_process_event() reads 1. seq_ump_client_close() client->out_rfile.output. drops opened[STR_OUT] to zero. 2. snd_rawmidi_kernel_write1() 2. snd_rawmidi_kernel_release() has not yet pinned runtime. closes the output file. 3. The writer continues using 3. close_substream() frees the borrowed substream. substream->runtime. This keeps the output substream and runtime alive for the full event_input write while keeping rawmidi release outside the rwlock. KASAN reproduced this as a slab-use-after-free in snd_rawmidi_kernel_write1(), with allocation through seq_ump_use()/snd_seq_port_connect() and free through seq_ump_unuse()/snd_seq_port_disconnect(). Suggested-by: Takashi Iwai Validation reproduced this kernel report: KASAN slab-use-after-free in snd_rawmidi_kernel_write1+0x9d/0x400 RIP: 0033:0x7f5528af837f Read of size 8 Call trace: dump_stack_lvl+0x73/0xb0 (?:?) print_report+0xd1/0x650 (?:?) srso_alias_return_thunk+0x5/0xfbef5 (?:?) __virt_addr_valid+0x1a7/0x340 (?:?) kasan_complete_mode_report_info+0x64/0x200 (?:?) kasan_report+0xf7/0x130 (?:?) snd_rawmidi_kernel_write1+0x9d/0x400 (?:?) __asan_load8+0x82/0xb0 (?:?) update_stack_state+0x1ef/0x2d0 (?:?) snd_rawmidi_kernel_write+0x1a/0x20 (?:?) seq_ump_process_event+0xd4/0x120 (sound/core/seq/seq_ump_client.c:82) __snd_seq_deliver_single_event+0x8a/0xe0 (?:?) snd_seq_deliver_from_ump+0x2b2/0xd60 (?:?) lock_acquire+0x14e/0x2e0 (?:?) find_held_lock+0x31/0x90 (?:?) snd_seq_port_use_ptr+0xa6/0xe0 (?:?) __kasan_check_write+0x18/0x20 (?:?) do_raw_read_unlock+0x32/0xa0 (?:?) _raw_read_unlock+0x26/0x50 (?:?) snd_seq_deliver_single_event+0x45c/0x4b0 (?:?) snd_seq_deliver_event+0x10d/0x1b0 (?:?) snd_seq_client_enqueue_event+0x192/0x240 (?:?) snd_seq_write+0x2cd/0x450 (?:?) apparmor_file_permission+0x20/0x30 (?:?) security_file_permission+0x51/0x60 (?:?) vfs_write+0x1ce/0x850 (?:?) __fget_files+0x12b/0x220 (?:?) lock_release+0xc8/0x2a0 (?:?) __rcu_read_unlock+0x74/0x2d0 (?:?) __fget_files+0x135/0x220 (?:?) ksys_write+0x15a/0x180 (?:?) rcu_is_watching+0x24/0x60 (?:?) __x64_sys_write+0x46/0x60 (?:?) x64_sys_call+0x7d/0x20d0 (?:?) do_syscall_64+0xc1/0x360 (arch/x86/entry/syscall_64.c:87) entry_SYSCALL_64_after_hwframe+0x77/0x7f (?:?) Fixes: 81fd444aa371 ("ALSA: seq: Bind UMP device") Signed-off-by: Zhang Cen Link: https://patch.msgid.link/20260520103249.3048345-1-rollkingzzc@gmail.com Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ump_client.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c index 9079ccfdc866..ccd93599b493 100644 --- a/sound/core/seq/seq_ump_client.c +++ b/sound/core/seq/seq_ump_client.c @@ -37,6 +37,7 @@ struct seq_ump_client { struct snd_ump_endpoint *ump; /* assigned endpoint */ int seq_client; /* sequencer client id */ int opened[2]; /* current opens for each direction */ + rwlock_t output_lock; /* protects out_rfile output access */ struct snd_rawmidi_file out_rfile; /* rawmidi for output */ struct seq_ump_input_buffer input; /* input parser context */ void *ump_info[SNDRV_UMP_MAX_BLOCKS + 1]; /* shadow of seq client ump_info */ @@ -88,6 +89,7 @@ static int seq_ump_process_event(struct snd_seq_event *ev, int direct, unsigned char type; int len; + guard(read_lock_irqsave)(&client->output_lock); substream = client->out_rfile.output; if (!substream) return -ENODEV; @@ -106,6 +108,7 @@ static int seq_ump_process_event(struct snd_seq_event *ev, int direct, static int seq_ump_client_open(struct seq_ump_client *client, int dir) { struct snd_ump_endpoint *ump = client->ump; + struct snd_rawmidi_file rfile = {}; int err; guard(mutex)(&ump->open_mutex); @@ -113,9 +116,11 @@ static int seq_ump_client_open(struct seq_ump_client *client, int dir) err = snd_rawmidi_kernel_open(&ump->core, 0, SNDRV_RAWMIDI_LFLG_OUTPUT | SNDRV_RAWMIDI_LFLG_APPEND, - &client->out_rfile); + &rfile); if (err < 0) return err; + scoped_guard(write_lock_irqsave, &client->output_lock) + client->out_rfile = rfile; } client->opened[dir]++; return 0; @@ -125,11 +130,19 @@ static int seq_ump_client_open(struct seq_ump_client *client, int dir) static int seq_ump_client_close(struct seq_ump_client *client, int dir) { struct snd_ump_endpoint *ump = client->ump; + struct snd_rawmidi_file rfile = {}; guard(mutex)(&ump->open_mutex); - if (!--client->opened[dir]) - if (dir == STR_OUT) - snd_rawmidi_kernel_release(&client->out_rfile); + if (!--client->opened[dir]) { + if (dir == STR_OUT) { + scoped_guard(write_lock_irqsave, &client->output_lock) { + rfile = client->out_rfile; + client->out_rfile = (struct snd_rawmidi_file){}; + } + if (rfile.rmidi) + snd_rawmidi_kernel_release(&rfile); + } + } return 0; } @@ -467,6 +480,7 @@ static int snd_seq_ump_probe(struct snd_seq_device *dev) INIT_WORK(&client->group_notify_work, handle_group_notify); client->ump = ump; + rwlock_init(&client->output_lock); client->seq_client = snd_seq_create_kernel_client(card, ump->core.device, -- cgit v1.2.3 From 4ce058df2ee02cc2a0f0fd5cd64ce6f1482a0b65 Mon Sep 17 00:00:00 2001 From: Zhang Cen Date: Tue, 19 May 2026 19:11:50 +0800 Subject: USB: serial: belkin_sa: validate interrupt status length The Belkin interrupt callback treats interrupt data as a four-byte status report and reads LSR/MSR fields at offsets 2 and 3. The interrupt-in buffer length is derived from endpoint wMaxPacketSize, and short interrupt transfers may complete successfully with a smaller actual_length. Check the completed interrupt packet length before parsing status fields so short interrupt endpoints and short successful packets are ignored instead of causing out-of-bounds or stale status-byte reads. KASAN report as below: BUG: KASAN: slab-out-of-bounds in belkin_sa_read_int_callback() Read of size 1 Call trace: belkin_sa_read_int_callback() (drivers/usb/serial/belkin_sa.c:202) __usb_hcd_giveback_urb() (drivers/usb/core/hcd.c:1630) dummy_timer() (?:?) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Assisted-by: Codex:gpt-5.5 Signed-off-by: Zhang Cen Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/belkin_sa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index 38ac910b1082..7bbd9523d4e9 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -194,6 +194,9 @@ static void belkin_sa_read_int_callback(struct urb *urb) usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); + if (urb->actual_length < BELKIN_SA_MSR_INDEX + 1) + goto exit; + /* Handle known interrupt data */ /* ignore data[0] and data[1] */ -- cgit v1.2.3 From cb3560e8eab1dfa1cac1ed52631adf8ec6ff2cd5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 May 2026 16:26:22 +0200 Subject: USB: serial: digi_acceleport: fix memory corruption with small endpoints Add the missing bulk-out buffer size sanity checks to avoid out-of-bounds memory accesses or slab corruption should a malicious device report smaller buffers than expected. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/digi_acceleport.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index d515df045c4c..c481208255eb 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1229,15 +1229,34 @@ static int digi_port_init(struct usb_serial_port *port, unsigned port_num) static int digi_startup(struct usb_serial *serial) { struct digi_serial *serial_priv; + int oob_port_num; int ret; + int i; + + /* + * The port bulk-out buffers must be large enough for header and + * buffered data. + */ + for (i = 0; i < serial->type->num_ports; i++) { + if (serial->port[i]->bulk_out_size < DIGI_OUT_BUF_SIZE + 2) + return -EINVAL; + } + + /* + * The OOB port bulk-out buffer must be large enough for the two + * commands in digi_set_modem_signals(). + */ + oob_port_num = serial->type->num_ports; + if (serial->port[oob_port_num]->bulk_out_size < 8) + return -EINVAL; serial_priv = kzalloc_obj(*serial_priv); if (!serial_priv) return -ENOMEM; spin_lock_init(&serial_priv->ds_serial_lock); - serial_priv->ds_oob_port_num = serial->type->num_ports; - serial_priv->ds_oob_port = serial->port[serial_priv->ds_oob_port_num]; + serial_priv->ds_oob_port_num = oob_port_num; + serial_priv->ds_oob_port = serial->port[oob_port_num]; ret = digi_port_init(serial_priv->ds_oob_port, serial_priv->ds_oob_port_num); -- cgit v1.2.3 From ab8336a7e414f018430aa1af3a46944032f7ff96 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 May 2026 16:26:48 +0200 Subject: USB: serial: keyspan: fix missing indat transfer sanity check Add the missing sanity check on the size of usa49wg indat transfers to avoid parsing stale or uninitialised slab data. Fixes: 0ca1268e109a ("USB Serial Keyspan: add support for USA-49WG & USA-28XG") Cc: stable@vger.kernel.org # 2.6.23 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/keyspan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 46448843541a..28b80607cebd 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1187,6 +1187,10 @@ static void usa49wg_indat_callback(struct urb *urb) len = 0; while (i < urb->actual_length) { + if (urb->actual_length - i < 3) { + dev_warn_ratelimited(&urb->dev->dev, "malformed indat packet\n"); + break; + } /* Check port number from message */ if (data[i] >= serial->num_ports) { -- cgit v1.2.3 From 915b36d701950503c4ea0f6e314b10868e59fce3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 May 2026 16:27:00 +0200 Subject: USB: serial: mct_u232: fix memory corruption with small endpoint The driver overrides the maximum transfer size for a specific device which only accepts 16 byte packets for its 32 byte bulk-out endpoint. Make sure to never increase the maximum transfer size to prevent slab corruption should a malicious device report a smaller endpoint max packet size than expected. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/mct_u232.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index 18844b92bd08..ca1530da6e77 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -378,6 +378,7 @@ static int mct_u232_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv; + u16 pid; /* check first to simplify error handling */ if (!serial->port[1] || !serial->port[1]->interrupt_in_urb) { @@ -385,6 +386,16 @@ static int mct_u232_port_probe(struct usb_serial_port *port) return -ENODEV; } + /* + * Compensate for a hardware bug: although the Sitecom U232-P25 + * device reports a maximum output packet size of 32 bytes, + * it seems to be able to accept only 16 bytes (and that's what + * SniffUSB says too...) + */ + pid = le16_to_cpu(serial->dev->descriptor.idProduct); + if (pid == MCT_U232_SITECOM_PID) + port->bulk_out_size = min(16, port->bulk_out_size); + priv = kzalloc_obj(*priv); if (!priv) return -ENOMEM; @@ -410,7 +421,6 @@ static void mct_u232_port_remove(struct usb_serial_port *port) static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct usb_serial *serial = port->serial; struct mct_u232_private *priv = usb_get_serial_port_data(port); int retval = 0; unsigned int control_state; @@ -418,15 +428,6 @@ static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port) unsigned char last_lcr; unsigned char last_msr; - /* Compensate for a hardware bug: although the Sitecom U232-P25 - * device reports a maximum output packet size of 32 bytes, - * it seems to be able to accept only 16 bytes (and that's what - * SniffUSB says too...) - */ - if (le16_to_cpu(serial->dev->descriptor.idProduct) - == MCT_U232_SITECOM_PID) - port->bulk_out_size = 16; - /* Do a defined restart: the normal serial device seems to * always turn on DTR and RTS here, so do the same. I'm not * sure if this is really necessary. But it should not harm -- cgit v1.2.3 From 245aba83e3c288e176ed037a1f6b618b09e92ed8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 May 2026 16:27:10 +0200 Subject: USB: serial: mct_u232: fix missing interrupt-in transfer sanity check Add the missing sanity check on the size of interrupt-in transfers to avoid parsing stale or uninitialised slab data (and leaking it to user space). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/mct_u232.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index ca1530da6e77..163161881d2d 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -544,6 +544,11 @@ static void mct_u232_read_int_callback(struct urb *urb) goto exit; } + if (urb->actual_length < 2) { + dev_warn_ratelimited(&port->dev, "short interrupt-in packet\n"); + goto exit; + } + /* * The interrupt-in pipe signals exceptional conditions (modem line * signal changes and errors). data[0] holds MSR, data[1] holds LSR. -- cgit v1.2.3 From 9af1b6e175c82daf4b423da339a722d8e67a735a Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Tue, 19 May 2026 13:52:47 +0530 Subject: drm/virtio: use uninterruptible resv lock for plane updates virtio_gpu_cursor_plane_update() and virtio_gpu_resource_flush() lock the framebuffer BO's dma_resv via virtio_gpu_array_lock_resv() and ignore its return value. The function can fail with -EINTR from dma_resv_lock_interruptible() (signal during lock wait) or with -ENOMEM from dma_resv_reserve_fences() (fence slot allocation), leaving the resv lock not held. The queue path then walks the object array and calls dma_resv_add_fence(), which requires the lock held; with lockdep enabled this trips dma_resv_assert_held(): WARNING: drivers/dma-buf/dma-resv.c:296 at dma_resv_add_fence+0x71e/0x840 Call Trace: virtio_gpu_array_add_fence virtio_gpu_queue_ctrl_sgs virtio_gpu_queue_fenced_ctrl_buffer virtio_gpu_cursor_plane_update drm_atomic_helper_commit_planes drm_atomic_helper_commit_tail commit_tail drm_atomic_helper_commit drm_atomic_commit drm_atomic_helper_update_plane __setplane_atomic drm_mode_cursor_universal drm_mode_cursor_common drm_mode_cursor_ioctl drm_ioctl __x64_sys_ioctl Beyond the WARN, mutating the dma_resv fence list without the lock races with concurrent readers/writers and can corrupt the list. Both call sites run inside the .atomic_update plane callback, which DRM atomic helpers do not allow to fail (by the time it runs, the commit has been signed off to userspace and there is no clean rollback path). Moving the lock acquisition to .prepare_fb was rejected because the broader lock scope deadlocks against other BO locking paths in the same atomic commit. Introduce virtio_gpu_lock_one_resv_uninterruptible() that uses dma_resv_lock() instead of dma_resv_lock_interruptible(). This eliminates the -EINTR failure mode -- the realistic syzbot trigger -- without extending the lock hold across the commit. The helper locks a single BO and rejects nents > 1 with -EINVAL; both fix sites lock exactly one BO. Use it from virtio_gpu_cursor_plane_update() and virtio_gpu_resource_flush(); check the return value to handle the remaining -ENOMEM case from dma_resv_reserve_fences() by freeing the objs and skipping the plane update for that frame. The framebuffer BOs touched here are not shared with other contexts and lock contention is expected to be brief, so the loss of signal-interruptibility is acceptable. Other callers of virtio_gpu_array_lock_resv() (the ioctl paths) continue to use the interruptible variant. The bug was reported by syzbot, triggered via fault injection (fail_nth) on the DRM_IOCTL_MODE_CURSOR path, which forces the -ENOMEM branch in dma_resv_reserve_fences(). Reported-by: syzbot+72bd3dd3a5d5f39a0271@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=72bd3dd3a5d5f39a0271 Fixes: 5cfd31c5b3a3 ("drm/virtio: fix virtio_gpu_cursor_plane_update().") Cc: stable@vger.kernel.org Signed-off-by: Deepanshu Kartikey Signed-off-by: Dmitry Osipenko Link: https://patch.msgid.link/20260519082247.34470-1-kartikey406@gmail.com --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 + drivers/gpu/drm/virtio/virtgpu_gem.c | 17 +++++++++++++++++ drivers/gpu/drm/virtio/virtgpu_plane.c | 10 ++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index f17660a71a3e..2f3531950aa4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -317,6 +317,7 @@ virtio_gpu_array_from_handles(struct drm_file *drm_file, u32 *handles, u32 nents void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs, struct drm_gem_object *obj); int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs); +int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs); void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs); void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs, struct dma_fence *fence); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index f22dc5c21cd4..435d37d36034 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -238,6 +238,23 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs) return ret; } +int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs) +{ + int ret; + + if (objs->nents != 1) + return -EINVAL; + + dma_resv_lock(objs->objs[0]->resv, NULL); + + ret = dma_resv_reserve_fences(objs->objs[0]->resv, 1); + if (ret) { + virtio_gpu_array_unlock_resv(objs); + return ret; + } + return 0; +} + void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs) { if (objs->nents == 1) { diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a126d1b25f46..652352424744 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -215,7 +215,10 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); - virtio_gpu_array_lock_resv(objs); + if (virtio_gpu_lock_one_resv_uninterruptible(objs)) { + virtio_gpu_array_put_free(objs); + return; + } virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, width, height, objs, vgplane_st->fence); @@ -459,7 +462,10 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); - virtio_gpu_array_lock_resv(objs); + if (virtio_gpu_lock_one_resv_uninterruptible(objs)) { + virtio_gpu_array_put_free(objs); + return; + } virtio_gpu_cmd_transfer_to_host_2d (vgdev, 0, plane->state->crtc_w, -- cgit v1.2.3 From 237557b8a81ab948e8332f7c0058e758f081c0a3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 May 2026 15:05:04 +0200 Subject: sysfs: don't remove existing directory on update failure When sysfs_update_group() is called for a named group and create_files() fails (e.g. -ENOMEM), internal_create_group() calls kernfs_remove(kn) on the group directory. In the update path, kn was obtained via kernfs_find_and_get() and refers to a directory that already existed before this call. Removing it silently destroys a sysfs group that the caller did not create. Only remove the directory if we created it ourselves. On update failure the directory remains as it is left empty by remove_files() inside create_files(), but can be repopulated by a retry. Cc: Rajat Jain Fixes: c855cf2759d2 ("sysfs: Fix internal_create_group() for named group updates") Cc: stable Assisted-by: gkh_clanker_t1000 Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Danilo Krummrich Link: https://patch.msgid.link/2026052003-uniquely-hastily-c093@gregkh Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/group.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 182e54e575ee..4e1e4f18a166 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -188,7 +188,7 @@ static int internal_create_group(struct kobject *kobj, int update, kernfs_get(kn); error = create_files(kn, kobj, uid, gid, grp, update); if (error) { - if (grp->name) + if (grp->name && !update) kernfs_remove(kn); } kernfs_put(kn); -- cgit v1.2.3 From 3d879647fb03dab6fe6e1dd9404a2dd324096218 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 May 2026 10:02:58 -0600 Subject: io_uring/timeout: splice timed out link in timeout handler A previous commit deferred this to the task_work part of it, so it could be protected by ->uring_lock. But that's actually not necessary here, and in fact the head clearing is not enough to make that safe. For those two reasons, just re-instate the local splicing. Fixes: 49ae66eb8c27 ("io_uring: defer linked-timeout chain splice out of hrtimer context") Signed-off-by: Jens Axboe --- io_uring/timeout.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 6353a4d979dc..c4dd26cf342d 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -417,8 +417,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) * done in io_req_task_link_timeout(), if needed. */ if (prev) { - if (!req_ref_inc_not_zero(prev)) + if (!req_ref_inc_not_zero(prev)) { + io_remove_next_linked(prev); prev = NULL; + } } list_del(&timeout->list); timeout->prev = prev; -- cgit v1.2.3 From 22572dbcd3486e6c4dced877125bbf50e4e24edf Mon Sep 17 00:00:00 2001 From: Cunlong Li Date: Wed, 20 May 2026 11:30:54 +0800 Subject: cgroup: rstat: relax NMI guard after switch to try_cmpxchg Commit 36df6e3dbd7e ("cgroup: make css_rstat_updated nmi safe") used this_cpu_cmpxchg() for the lockless insertion, and therefore required both ARCH_HAVE_NMI_SAFE_CMPXCHG and ARCH_HAS_NMI_SAFE_THIS_CPU_OPS in the NMI guard: on archs without the latter, this_cpu_cmpxchg() falls back to "local_irq_save() + plain cmpxchg", and local_irq_save() cannot mask NMIs. Commit 3309b63a2281 ("cgroup: rstat: use LOCK CMPXCHG in css_rstat_updated") later replaced this_cpu_cmpxchg() with plain try_cmpxchg() to fix cross-CPU lockless-list corruption, but left the NMI guard untouched. After that switch, css_rstat_updated() no longer performs any this_cpu_*() RMW operations and only relies on the arch having NMI-safe cmpxchg, so ARCH_HAS_NMI_SAFE_THIS_CPU_OPS is no longer required in the guard. Relax the guard accordingly so that archs which have HAVE_NMI and ARCH_HAVE_NMI_SAFE_CMPXCHG but not ARCH_HAS_NMI_SAFE_THIS_CPU_OPS (e.g. sparc, powerpc on PPC64/BOOK3S) can benefit from the existing CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC path. Without this, the css is never queued in NMI on those archs, and the atomics staged by account_{slab,kmem}_nmi_safe() are not drained by flush_nmi_stats(). Fixes: 3309b63a2281 ("cgroup: rstat: use LOCK CMPXCHG in css_rstat_updated") Signed-off-by: Cunlong Li Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index ed60ba119c68..de816a43db9f 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -81,11 +81,10 @@ void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu) lockdep_assert_preemption_disabled(); /* - * For archs withnot nmi safe cmpxchg or percpu ops support, ignore - * the requests from nmi context. + * The lockless insertion below relies on NMI-safe cmpxchg; + * bail out in NMI on archs that don't provide it. */ - if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) || - !IS_ENABLED(CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS)) && in_nmi()) + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && in_nmi()) return; rstatc = css_rstat_cpu(css, cpu); -- cgit v1.2.3 From 9fc75b71fdd38465c76c6f6a884cdd4ae3c72d90 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 19 May 2026 23:07:26 +0200 Subject: rbd: eliminate a race in lock_dwork draining on unmap Given how rbd_lock_add_request() and rbd_img_exclusive_lock() are written, lock_dwork may be (re)queued more than it's actually needed: for example in case a new I/O request comes in while we are in the middle of rbd_acquire_lock() on behalf of another I/O request. This is expected and with rbd_release_lock() preemptively canceling lock_dwork is benign under normal operation. A more problematic example is maybe_kick_acquire(): if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) { dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev); mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); } It's not unrealistic for lock_dwork to get canceled right after delayed_work_pending() returns true and for mod_delayed_work() to requeue it right there anyway. This is a classic TOCTOU race. When it comes to unmapping the image, there is an implicit assumption of no self-initiated exclusive lock activity past the point of return from rbd_dev_image_unlock() which unlocks the lock if it happens to be held. This unlock is assumed to be final and lock_dwork (as well as all other exclusive lock tasks, really) isn't expected to get queued again. However, lock_dwork is canceled only in cancel_tasks_sync() (i.e. later in the unmap sequence) and on top of that the cancellation can get in effect nullified by maybe_kick_acquire(). This may result in rbd_acquire_lock() executing after rbd_dev_device_release() and rbd_dev_image_release() run and free and/or reset a bunch of things. One of the possible failure modes then is a violated rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); in rbd_dev_header_info() which is called via rbd_dev_refresh() from rbd_post_acquire_action(). Redo exclusive lock task draining to provide saner semantics and try to meet the assumptions around rbd_dev_image_unlock(). Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Viacheslav Dubeyko --- drivers/block/rbd.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4065336ebd1f..6c1e7347e6a7 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4565,24 +4565,12 @@ out: return ret; } -static void cancel_tasks_sync(struct rbd_device *rbd_dev) -{ - dout("%s rbd_dev %p\n", __func__, rbd_dev); - - cancel_work_sync(&rbd_dev->acquired_lock_work); - cancel_work_sync(&rbd_dev->released_lock_work); - cancel_delayed_work_sync(&rbd_dev->lock_dwork); - cancel_work_sync(&rbd_dev->unlock_work); -} - /* * header_rwsem must not be held to avoid a deadlock with * rbd_dev_refresh() when flushing notifies. */ static void rbd_unregister_watch(struct rbd_device *rbd_dev) { - cancel_tasks_sync(rbd_dev); - mutex_lock(&rbd_dev->watch_mutex); if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) __rbd_unregister_watch(rbd_dev); @@ -6548,10 +6536,18 @@ out_err: static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) { + dout("%s rbd_dev %p\n", __func__, rbd_dev); + + disable_delayed_work_sync(&rbd_dev->lock_dwork); + disable_work_sync(&rbd_dev->unlock_work); + down_write(&rbd_dev->lock_rwsem); if (__rbd_is_lock_owner(rbd_dev)) __rbd_release_lock(rbd_dev); up_write(&rbd_dev->lock_rwsem); + + flush_work(&rbd_dev->acquired_lock_work); + flush_work(&rbd_dev->released_lock_work); } /* -- cgit v1.2.3 From 576ec047d20b368b43c4d5db98c4f2e0f3c101ec Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 8 May 2026 20:57:47 +0100 Subject: tracing: Avoid NULL return from hist_field_name() on truncation hist_field_name() returns "" everywhere except the fully-qualified VAR_REF/EXPR case, where snprintf() truncation returns NULL early and bypasses the bottom NULL->"" guard. Callers don't expect NULL: strcat(expr, hist_field_name(field, 0)) at trace_events_hist.c:1758 and the strcmp() in the sort-key match loop at :4804 both deref it. system and event_name are bounded by MAX_EVENT_NAME_LEN, but the field name on a VAR_REF is kstrdup'd from a histogram variable name parsed out of the trigger string and has no length cap, so a long enough var name in a fully qualified reference can reach the truncation path. Keep the length check but leave field_name as "" on overflow. Link: https://patch.msgid.link/20260508195747.25492-1-devnexen@gmail.com Fixes: 5ec1d1e97de1 ("tracing: Rebuild full_name on each hist_field_name() call") Signed-off-by: David Carlier Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_hist.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0dbbf6cca9bc..eb2c2bc8bc3d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1369,10 +1369,8 @@ static const char *hist_field_name(struct hist_field *field, len = snprintf(full_name, sizeof(full_name), fmt, field->system, field->event_name, field->name); - if (len >= sizeof(full_name)) - return NULL; - - field_name = full_name; + if (len < sizeof(full_name)) + field_name = full_name; } else field_name = field->name; } else if (field->flags & HIST_FIELD_FL_TIMESTAMP) -- cgit v1.2.3 From 59e932ded949fa6f0340bf7c6d7818f962fa4fd2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 12 May 2026 22:15:39 +0200 Subject: Bluetooth: bnep: Fix UAF read of dev->name bnep_add_connection() needs to keep holding the bnep_session_sem while reading dev->name (just like bnep_get_connlist() does); otherwise the bnep_session() thread can concurrently free the net_device, which can for example be triggered by a concurrent bnep_del_connection(). (This UAF is fairly uninteresting from a security perspective; calling bnep_add_connection() requires passing a capable(CAP_NET_ADMIN) check. It also requires completely tearing down a netdev during a fairly tight race window.) Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/bnep/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 853c8d7644b5..0de5df690bd0 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -645,8 +645,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) goto failed; } - up_write(&bnep_session_sem); strcpy(req->device, dev->name); + up_write(&bnep_session_sem); return 0; failed: -- cgit v1.2.3 From 23d528d817a485fe9800a66c9411bd9e3d8a6f63 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 14 May 2026 09:42:24 -0400 Subject: Bluetooth: hci_sync: Fix not setting mask for HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE This fixes not setting the bit for HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE when extended features bit is set otherwise the controller may not generate HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE causing hci_le_read_all_remote_features_sync to timeout waiting for it. Also remove dead code. Fixes: a106e50be74b ("Bluetooth: HCI: Add support for LL Extended Feature Set") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index fd3aacdea512..aff8562a8690 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4438,6 +4438,9 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev) events[4] |= 0x02; /* LE BIG Info Advertising Report */ } + if (ll_ext_feature_capable(hdev)) + events[5] |= BIT(2); + if (le_cs_capable(hdev)) { /* Channel Sounding events */ events[5] |= 0x08; /* LE CS Read Remote Supported Cap Complete event */ @@ -7413,9 +7416,6 @@ static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev, sizeof(cp), &cp, HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE, HCI_CMD_TIMEOUT, NULL); - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ALL_REMOTE_FEATURES, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_read_remote_features_sync(struct hci_dev *hdev, void *data) -- cgit v1.2.3 From 88365d04fdc821dc4e9eb0cc00fdf6905430d172 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Fri, 15 May 2026 00:32:48 +0530 Subject: Bluetooth: btintel_pcie: Fix incorrect MAC access programming btintel_pcie_get_mac_access() and btintel_pcie_release_mac_access() were programming STOP_MAC_ACCESS_DIS and XTAL_CLK_REQ in addition to the MAC_ACCESS_REQ handshake. These bits are not part of the host MAC-access handshake on the supported parts; the driver was programming them incorrectly. Drop the writes so the register update contains only the bits the controller actually consumes. Fixes: b9465e6670a2 ("Bluetooth: btintel_pcie: Read hardware exception data") Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 20 ++++++-------------- drivers/bluetooth/btintel_pcie.h | 3 --- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index a3643e67b33f..37e050763633 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -582,12 +582,10 @@ static int btintel_pcie_get_mac_access(struct btintel_pcie_data *data) reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG); - reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS; - reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ; - if ((reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_STS) == 0) + if (!(reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ)) { reg |= BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ; - - btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + } do { reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG); @@ -607,16 +605,10 @@ static void btintel_pcie_release_mac_access(struct btintel_pcie_data *data) reg = btintel_pcie_rd_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG); - if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ) + if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ) { reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ; - - if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS) - reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS; - - if (reg & BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ) - reg &= ~BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ; - - btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_FUNC_CTRL_REG, reg); + } } static void *btintel_pcie_copy_tlv(void *dest, enum btintel_pcie_tlv_type type, diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index f922abd1e7d8..13efef499e4e 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -34,9 +34,6 @@ #define BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_STS (BIT(20)) #define BTINTEL_PCIE_CSR_FUNC_CTRL_MAC_ACCESS_REQ (BIT(21)) -/* Stop MAC Access disconnection request */ -#define BTINTEL_PCIE_CSR_FUNC_CTRL_STOP_MAC_ACCESS_DIS (BIT(22)) -#define BTINTEL_PCIE_CSR_FUNC_CTRL_XTAL_CLK_REQ (BIT(23)) #define BTINTEL_PCIE_CSR_FUNC_CTRL_BUS_MASTER_STS (BIT(28)) #define BTINTEL_PCIE_CSR_FUNC_CTRL_BUS_MASTER_DISCON (BIT(29)) -- cgit v1.2.3 From 84c24fb151fc1179355296d7ff29129ac7c42129 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Fri, 15 May 2026 07:25:25 +0100 Subject: Bluetooth: ISO: drop ISO_END frames received without prior ISO_START ISO data PDUs carry a packet-boundary flag indicating START, CONT, END or SINGLE. The ISO_CONT branch of iso_recv() guards against a missing ISO_START by checking conn->rx_len before touching conn->rx_skb, but ISO_END does not. If a peer sends an ISO_END as the first packet on a fresh ISO connection, conn->rx_skb is still NULL and conn->rx_len is zero, so skb_put(conn->rx_skb, ...) dereferences NULL and oopses. For BIS, where receivers sync to a broadcaster without pairing, any broadcaster on the air can trigger this. Mirror the ISO_CONT check at the top of ISO_END so a stray end fragment is logged and dropped instead of crashing the host. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: David Carlier Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 7cb2864fe872..b971281f0a2b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2593,6 +2593,11 @@ int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb, u16 flags) break; case ISO_END: + if (!conn->rx_len) { + BT_ERR("Unexpected end frame (len %d)", skb->len); + goto drop; + } + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), skb->len); conn->rx_len -= skb->len; -- cgit v1.2.3 From dd1dda6b8d6e1f4376a5b3055a04f0ecbdb4d6bd Mon Sep 17 00:00:00 2001 From: Jiajia Liu Date: Mon, 18 May 2026 10:24:02 +0800 Subject: Bluetooth: btmtk: fix urb->setup_packet leak in error paths The setup_packet of control urb is not freed if usb_submit_urb fails or the submitted urb is killed. Add free in these two paths. Fixes: a1c49c434e150 ("Bluetooth: btusb: Add protocol support for MediaTek MT7668U USB devices") Signed-off-by: Jiajia Liu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index a29f72216c34..8ff66b276af0 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -537,6 +537,7 @@ static void btmtk_usb_wmt_recv(struct urb *urb) return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ + kfree(urb->setup_packet); return; } @@ -610,6 +611,7 @@ static int btmtk_usb_submit_wmt_recv_urb(struct hci_dev *hdev) if (err != -EPERM && err != -ENODEV) bt_dev_err(hdev, "urb %p submission failed (%d)", urb, -err); + kfree(dr); usb_unanchor_urb(urb); } -- cgit v1.2.3 From d3f7d17960ed50df3a6709c5158caff989c8c905 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 15 May 2026 10:38:19 -0400 Subject: Bluetooth: MGMT: validate Add Extended Advertising Data length MGMT_OP_ADD_EXT_ADV_DATA is registered as a variable-length command, with MGMT_ADD_EXT_ADV_DATA_SIZE as the fixed header size. The handler then uses cp->adv_data_len and cp->scan_rsp_len to validate and copy cp->data, but it never checks that those bytes are part of the mgmt command payload. A short command can therefore make add_ext_adv_data() pass an out-of-bounds pointer into tlv_data_is_valid(). If the bytes beyond the command buffer are addressable, they can also be copied into the advertising instance as scan response data, where the caller can read them back via MGMT_OP_GET_ADV_INSTANCE. The trigger requires CAP_NET_ADMIN in the initial user namespace; KASAN reports an 8-byte slab-out-of-bounds read. Reject commands whose length does not match the fixed header plus both advertising data lengths before parsing cp->data. Fixes: 12410572833a ("Bluetooth: Break add adv into two mgmt commands") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b05bb380e5f8..de5bd6b637b2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9110,9 +9110,15 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, struct adv_info *adv_instance; int err = 0; struct mgmt_pending_cmd *cmd; + u16 expected_len; BT_DBG("%s", hdev->name); + expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len); + if (expected_len != data_len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); adv_instance = hci_find_adv_instance(hdev, cp->instance); -- cgit v1.2.3 From c1bb9336ae6b54a5f6a353c4bd4ed9a4307e429b Mon Sep 17 00:00:00 2001 From: Mingyu Wang <25181214217@stu.xidian.edu.cn> Date: Mon, 18 May 2026 10:49:49 +0800 Subject: Bluetooth: hci_uart: fix UAFs and race conditions in close and init paths Vulnerabilities leading to Use-After-Free (UAF) and Null Pointer Dereference (NPD) conditions were observed in the lifecycle management of hci_uart. The primary issue arises because the workqueues (init_ready and write_work) are only flushed/cancelled if the HCI_UART_PROTO_READY flag is set during TTY close. If a hangup occurs before setup completes, hci_uart_tty_close() skips the teardown of these workqueues and proceeds to free the `hu` struct. When the scheduled work executes later, it blindly dereferences the freed `hu` struct. Furthermore, several data races and UAFs were identified in the teardown sequence: 1. Calling hci_uart_flush() from hci_uart_close() without effectively disabling write_work causes a race condition where both can concurrently double-free hu->tx_skb. This happens because protocol timers can concurrently invoke hci_uart_tx_wakeup() and requeue write_work. 2. Calling hci_free_dev(hdev) before hu->proto->close(hu) causes a UAF when vendor specific protocol close callbacks dereference hu->hdev. 3. In the initialization error paths, failing to take the proto_lock write lock before clearing PROTO_READY leads to races with active readers. Additionally, hci_uart_tty_receive() accesses hu->hdev outside the read lock, leading to UAFs if the initialization error path frees hdev concurrently. Fix these synchronization and lifecycle issues by: 1. Re-ordering hci_uart_tty_close() to clear HCI_UART_PROTO_READY first, followed immediately by a cancel_work_sync(&hu->write_work). Clearing the flag locks out concurrent protocol timers from successfully invoking hci_uart_tx_wakeup(), effectively rendering the cancellation permanent and preventing the tx_skb double-free. 2. Note: Clearing PROTO_READY early causes hci_uart_close() to skip hu->proto->flush(). This is perfectly safe in the tty_close path because hu->proto->close() executes shortly after, which intrinsically purges all protocol SKB queues and tears down the state. 3. Relocating hu->proto->close(hu) strictly prior to hci_free_dev(hdev) across all close and error paths to prevent vendor-level UAFs. 4. Moving the hdev->stat.byte_rx increment in hci_uart_tty_receive() inside the proto_lock read-side critical section to safely synchronize with device unregistration. 5. Adding cancel_work_sync(&hu->write_work) to hci_uart_close() to safely flush the workqueue before hci_uart_flush() is invoked via the HCI core. 6. Utilizing cancel_work_sync() instead of disable_work_sync() across all paths to prevent permanently breaking user-space retry capabilities. Fixes: 3b799254cf6f ("Bluetooth: hci_uart: Cancel init work before unregistering") Cc: stable@vger.kernel.org Signed-off-by: Mingyu Wang <25181214217@stu.xidian.edu.cn> Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_ldisc.c | 48 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 275ea865bc29..47f4902b40b4 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -194,7 +194,15 @@ void hci_uart_init_work(struct work_struct *work) err = hci_register_dev(hu->hdev); if (err < 0) { BT_ERR("Can't register HCI device"); + + percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); + percpu_up_write(&hu->proto_lock); + + /* Safely cancel work after clearing flags */ + cancel_work_sync(&hu->write_work); + + /* Close protocol before freeing hdev */ hu->proto->close(hu); hdev = hu->hdev; hu->hdev = NULL; @@ -263,8 +271,12 @@ static int hci_uart_open(struct hci_dev *hdev) /* Close device */ static int hci_uart_close(struct hci_dev *hdev) { + struct hci_uart *hu = hci_get_drvdata(hdev); + BT_DBG("hdev %p", hdev); + cancel_work_sync(&hu->write_work); + hci_uart_flush(hdev); hdev->flush = NULL; return 0; @@ -531,6 +543,7 @@ static void hci_uart_tty_close(struct tty_struct *tty) { struct hci_uart *hu = tty->disc_data; struct hci_dev *hdev; + bool proto_ready; BT_DBG("tty %p", tty); @@ -540,24 +553,38 @@ static void hci_uart_tty_close(struct tty_struct *tty) if (!hu) return; - hdev = hu->hdev; - if (hdev) - hci_uart_close(hdev); + /* Wait for init_ready to finish to prevent registration races */ + cancel_work_sync(&hu->init_ready); - if (test_bit(HCI_UART_PROTO_READY, &hu->flags)) { + proto_ready = test_bit(HCI_UART_PROTO_READY, &hu->flags); + if (proto_ready) { percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_READY, &hu->flags); percpu_up_write(&hu->proto_lock); + } - cancel_work_sync(&hu->init_ready); - cancel_work_sync(&hu->write_work); + /* + * Unconditionally cancel write_work AFTER clearing PROTO_READY. + * This ensures that concurrent protocol timers cannot requeue + * write_work via hci_uart_tx_wakeup(), permanently preventing + * double-free races and UAFs. + */ + cancel_work_sync(&hu->write_work); + + hdev = hu->hdev; + if (hdev) + hci_uart_close(hdev); /* proto->flush is safely skipped */ + if (proto_ready) { if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) hci_unregister_dev(hdev); - hci_free_dev(hdev); } + /* Close protocol before freeing hdev (intrinsically purges queues) */ hu->proto->close(hu); + + if (hdev) + hci_free_dev(hdev); } clear_bit(HCI_UART_PROTO_SET, &hu->flags); @@ -625,11 +652,12 @@ static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, * tty caller */ hu->proto->recv(hu, data, count); - percpu_up_read(&hu->proto_lock); if (hu->hdev) hu->hdev->stat.byte_rx += count; + percpu_up_read(&hu->proto_lock); + tty_unthrottle(tty); } @@ -695,6 +723,10 @@ static int hci_uart_register_dev(struct hci_uart *hu) percpu_down_write(&hu->proto_lock); clear_bit(HCI_UART_PROTO_INIT, &hu->flags); percpu_up_write(&hu->proto_lock); + /* Cancel work after clearing flags */ + cancel_work_sync(&hu->write_work); + + /* Close protocol before freeing hdev */ hu->proto->close(hu); hu->hdev = NULL; hci_free_dev(hdev); -- cgit v1.2.3 From ab1513597c6cf17cd1ad2a21e3b045421b48e022 Mon Sep 17 00:00:00 2001 From: Safa Karakuş Date: Sat, 16 May 2026 21:15:04 +0300 Subject: Bluetooth: fix UAF in l2cap_sock_cleanup_listen() vs l2cap_conn_del() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bt_accept_dequeue() unlinks a not-yet-accepted child from the parent accept queue and release_sock()s it before returning, so the returned sk has no caller reference and is unlocked. l2cap_sock_cleanup_listen() walks these children on listening-socket close. A concurrent HCI disconnect drives hci_rx_work -> l2cap_conn_del() which runs l2cap_chan_del() + l2cap_sock_kill() and frees the child sk and its l2cap_chan; cleanup_listen() then uses both: BUG: KASAN: slab-use-after-free in l2cap_sock_kill l2cap_sock_kill / l2cap_sock_cleanup_listen / __x64_sys_close Freed by: l2cap_conn_del -> l2cap_sock_close_cb -> l2cap_sock_kill This is distinct from the two fixes already in this area: commit e83f5e24da741 ("Bluetooth: serialize accept_q access") serialises the accept_q list/poll and takes temporary refs inside bt_accept_dequeue(), and CVE-2025-39860 serialises the userspace close()/accept() race by calling cleanup_listen() under lock_sock() in l2cap_sock_release(). Neither covers l2cap_conn_del() running from hci_rx_work, so this UAF still reproduces on current bluetooth/master. Take the reference at the source: bt_accept_dequeue() does sock_hold() while sk is still locked, before release_sock(); callers sock_put(). cleanup_listen() pins the chan with l2cap_chan_hold_unless_zero() under a brief child sk lock (serialising vs l2cap_sock_teardown_cb()), drops it before l2cap_chan_lock(), and skips a duplicate l2cap_sock_kill() on SOCK_DEAD. conn->lock is not taken here: cleanup_listen() runs under the parent sk lock and that would invert conn->lock -> chan->lock -> sk_lock (lockdep). KASAN/SMP: an unprivileged listen/close vs HCI-disconnect race produced 12 use-after-free reports per run before this change; 0, and no lockdep report, over 1600+ raced iterations after it on bluetooth/master. Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Cc: stable@vger.kernel.org Reported-by: Siwei Zhang Reviewed-by: Siwei Zhang Signed-off-by: Safa Karakuş Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/af_bluetooth.c | 10 +++++++++ net/bluetooth/iso.c | 9 +++++++- net/bluetooth/l2cap_sock.c | 51 ++++++++++++++++++++++++++++++++++++++------ net/bluetooth/rfcomm/sock.c | 9 +++++++- net/bluetooth/sco.c | 9 +++++++- 5 files changed, 78 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 9d68dd86023c..1a6aa3f8d4d6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -340,6 +340,16 @@ restart: if (newsock) sock_graft(sk, newsock); + /* Hand the caller a reference taken while sk is + * still locked. bt_accept_unlink() just dropped + * the accept-queue reference; without this hold a + * concurrent teardown (e.g. l2cap_conn_del() -> + * l2cap_sock_kill()) could free sk between + * release_sock() and the caller using it. Every + * caller drops this with sock_put() when done. + */ + sock_hold(sk); + release_sock(sk); if (next) sock_put(next); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index b971281f0a2b..d7af617cda45 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -751,6 +751,8 @@ static void iso_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { iso_sock_close(sk); iso_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } /* If listening socket has a hcon, properly disconnect it */ @@ -1356,8 +1358,13 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, } ch = bt_accept_dequeue(sk, newsock); - if (ch) + if (ch) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps ch alive from here. + */ + sock_put(ch); break; + } if (!timeo) { err = -EAGAIN; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index cf590a67d364..b34e7da8d906 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -349,8 +349,13 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, } nsk = bt_accept_dequeue(sk, newsock); - if (nsk) + if (nsk) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps nsk alive from here. + */ + sock_put(nsk); break; + } if (!timeo) { err = -EAGAIN; @@ -1475,22 +1480,54 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) BT_DBG("parent %p state %s", parent, state_to_string(parent->sk_state)); - /* Close not yet accepted channels */ + /* Close not yet accepted channels. + * + * bt_accept_dequeue() now returns sk with an extra reference held + * (taken while sk was still locked) so a concurrent l2cap_conn_del() + * -> l2cap_sock_kill() cannot free sk under us. + * + * cleanup_listen() runs under the parent sk lock, so unlike + * l2cap_sock_shutdown() we must NOT take conn->lock here: that would + * establish sk_lock -> conn->lock and invert the established + * conn->lock -> chan->lock -> sk_lock order (lockdep deadlock). + * + * Instead, briefly take the child sk lock to fetch and pin its chan. + * l2cap_conn_del() reaches the chan free only via + * l2cap_chan_del() -> l2cap_sock_teardown_cb(), which itself takes + * the child sk lock; holding it across l2cap_chan_hold_unless_zero() + * therefore guarantees the chan cannot be freed while we read and + * pin it (hold_unless_zero() additionally skips a chan already past + * its last reference). We then drop the sk lock before taking + * chan->lock, so sk and chan locks are never held together. + */ while ((sk = bt_accept_dequeue(parent, NULL))) { - struct l2cap_chan *chan = l2cap_pi(sk)->chan; + struct l2cap_chan *chan; + + lock_sock_nested(sk, L2CAP_NESTING_NORMAL); + chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan); + release_sock(sk); + if (!chan) { + /* l2cap_conn_del() already tearing this child down */ + sock_put(sk); + continue; + } BT_DBG("child chan %p state %s", chan, state_to_string(chan->state)); - l2cap_chan_hold(chan); l2cap_chan_lock(chan); - __clear_chan_timer(chan); l2cap_chan_close(chan, ECONNRESET); - l2cap_sock_kill(sk); - + /* l2cap_conn_del() may already have killed this socket + * (it sets SOCK_DEAD); skip the duplicate to avoid a + * double sock_put()/l2cap_chan_put(). + */ + if (!sock_flag(sk, SOCK_DEAD)) + l2cap_sock_kill(sk); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); + sock_put(sk); } } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index be6639cd6f59..bd7d959c6e9e 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -180,6 +180,8 @@ static void rfcomm_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { rfcomm_sock_close(sk); rfcomm_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } parent->sk_state = BT_CLOSED; @@ -497,8 +499,13 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, } nsk = bt_accept_dequeue(sk, newsock); - if (nsk) + if (nsk) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps nsk alive from here. + */ + sock_put(nsk); break; + } if (!timeo) { err = -EAGAIN; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index eba44525d41d..f1799c6a6f87 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -502,6 +502,8 @@ static void sco_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { sco_sock_close(sk); sco_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } parent->sk_state = BT_CLOSED; @@ -765,8 +767,13 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, } ch = bt_accept_dequeue(sk, newsock); - if (ch) + if (ch) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps ch alive from here. + */ + sock_put(ch); break; + } if (!timeo) { err = -EAGAIN; -- cgit v1.2.3 From 44126343d58c68adaa8343fbf1c07dd20078c35e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 20 May 2026 12:00:50 -0500 Subject: x86/mm: Disable broadcast TLB flush when PCID is disabled Booting with "nopcid" clears X86_FEATURE_PCID and keeps CR4.PCIDE from being set to one. On AMD CPUs that support INVLPGB, broadcast TLB flushing remains enabled. There are two checks that decide whether the global ASID code runs, mm_global_asid() and consider_global_asid(), that key off of the X86_FEATURE_INVLPGB feature. Once an mm becomes active on more than three CPUs, consider_global_asid() assigns it a global ASID, after which flush_tlb_mm_range() takes the broadcast_tlb_flush() path using a non-zero PCID. Issuing an INVLPGB with a non-zero PCID while CR4.PCIDE is not set results in a #GP: Oops: general protection fault, kernel NULL pointer dereference 0x1: 0000 [#1] SMP NOPTI CPU: 158 UID: 0 PID: 3119 Comm: snap Not tainted 7.1.0-rc3 #1 PREEMPT(full) Hardware name: ... RIP: 0010:broadcast_tlb_flush Code: ... 89 da 48 83 c8 07 <0f> 01 fe eb 08 cc cc cc ... Call Trace: flush_tlb_mm_range ptep_clear_flush wp_page_copy ? _raw_spin_unlock __handle_mm_fault handle_mm_fault do_user_addr_fault exc_page_fault asm_exc_page_fault All processors that support broadcast TLB invalidation also have PCID support, so it is only the "nopcid" scenario that is of concern. In this situation just disable the broadcast TLB support using the CPUID dependency support by making X86_FEATURE_INVLPGB dependent on X86_FEATURE_PCID. [ bp: Massage commit message. ] Fixes: 4afeb0ed1753 ("x86/mm: Enable broadcast TLB invalidation for multi-threaded processes") Suggested-by: Dave Hansen Assisted-by: Claude:claude-opus-4.7 Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Acked-by: Rik van Riel Cc: Link: https://patch.msgid.link/b915acfd63e8b2a094fdeb8dc608738072518764.1779296450.git.thomas.lendacky@amd.com --- arch/x86/kernel/cpu/cpuid-deps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 146f6f8b0650..99801e844b30 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -92,6 +92,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_FRED, X86_FEATURE_LKGS }, { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL }, { X86_FEATURE_LASS, X86_FEATURE_SMAP }, + { X86_FEATURE_INVLPGB, X86_FEATURE_PCID }, {} }; -- cgit v1.2.3 From 5f17ae0f595aeb560155ce98edbe44d3eacc7e40 Mon Sep 17 00:00:00 2001 From: Alice Mikityanska Date: Mon, 18 May 2026 09:22:49 +0300 Subject: udp: gso: Fix handling checksum in __udp_gso_segment The cited commit started using msslen for uh->len, but still uses newlen to adjust uh->check. Although the checksum is ignored in most cases due to the hardware offload, __udp_gso_segment attempts to maintain the correct one. Fix uh->check and adjust it by the right value. Additionally, after the fix, newlen becomes assigned and unused before the loop. The code can be simplified a bit if mss adjustment is dropped, so that newlen becomes equal to msslen before the loop, and msslen can be also dropped, saving a few lines of code. This brings us back to one variable, drops an unneeded arithmetic for mss, and fixes the UDP checksum. Fixes: b10b446ce7ad ("udp: gso: Use single MSS length in UDP header for GSO_PARTIAL") Signed-off-by: Alice Mikityanska Reviewed-by: Willem de Bruijn Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20260518062250.3019914-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index a0813d425b71..2578aa7f9ff9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -482,11 +482,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; struct sk_buff *segs, *seg; - __be16 newlen, msslen; struct udphdr *uh; unsigned int mss; bool copy_dtor; __sum16 check; + __be16 newlen; int ret = 0; mss = skb_shinfo(gso_skb)->gso_size; @@ -555,15 +555,6 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return segs; } - msslen = htons(sizeof(*uh) + mss); - - /* GSO partial and frag_list segmentation only requires splitting - * the frame into an MSS multiple and possibly a remainder, both - * cases return a GSO skb. So update the mss now. - */ - if (skb_is_gso(segs)) - mss *= skb_shinfo(segs)->gso_segs; - seg = segs; uh = udp_hdr(seg); @@ -586,7 +577,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (!seg->next) break; - uh->len = msslen; + uh->len = newlen; uh->check = check; if (seg->ip_summed == CHECKSUM_PARTIAL) -- cgit v1.2.3 From 78effd896eee11ac9db9bcbb53e7bbcad96073d7 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 18 May 2026 09:22:50 +0300 Subject: udp: Fix UDP length on last GSO_PARTIAL segment Following the cited commit, __udp_gso_segment() writes single MSS length in the UDP header. The cited patch doesn't account for the fact that the last segment could be a GSO skb by itself. This could happen when the size of the packet is a multiple of MSS, hence the first segment is also the last one (there is no need for a remainder skb). When the post-loop segment is a GSO skb, assign the single MSS length in the UDP header. Fixes: b10b446ce7ad ("udp: gso: Use single MSS length in UDP header for GSO_PARTIAL") Reported-by: Matthew Schwartz Closes: https://lore.kernel.org/all/6c3fb15e-711d-4b8d-b152-e03d9b05293f@linux.dev/ Tested-by: Matthew Schwartz Reviewed-by: Dragos Tatulea Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20260518062250.3019914-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 2578aa7f9ff9..29651b1a0bc7 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -590,9 +590,12 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, uh = udp_hdr(seg); } - /* last packet can be partial gso_size, account for that in checksum */ - newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) + - seg->data_len); + /* Unless skb fits perfectly as GSO_PARTIAL, the trailing + * segment may not be full MSS, account for that in the checksum + */ + if (!skb_is_gso(seg)) + newlen = htons(skb_tail_pointer(seg) - + skb_transport_header(seg) + seg->data_len); check = csum16_add(csum16_sub(uh->check, uh->len), newlen); uh->len = newlen; -- cgit v1.2.3 From abe003b33223ff33552f291644bf35d9c2f992fb Mon Sep 17 00:00:00 2001 From: Prathamesh Deshpande Date: Sun, 10 May 2026 23:59:00 +0100 Subject: net/mlx5e: Fix eswitch mode block underflow on IPsec acquire SA mlx5e_xfrm_add_state() handles acquire-flow temporary SAs by allocating software state and skipping hardware offload setup. That path jumps to the common success label before taking the eswitch mode block. After tunnel-mode validation was moved earlier, the common success label unconditionally calls mlx5_eswitch_unblock_mode(). For acquire SAs, this decrements esw->offloads.num_block_mode without a matching increment. Return directly after installing the acquire SA offload handle, so only the paths that successfully called mlx5_eswitch_block_mode() call the matching unblock. Fixes: 22239eb258bc ("net/mlx5e: Prevent tunnel reformat when tunnel mode not allowed") Signed-off-by: Prathamesh Deshpande Reviewed-by: Simon Horman Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20260510225903.13184-1-prathameshdeshpande7@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index a52e12c3c95a..db260e3d1412 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -792,8 +792,10 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, sa_entry->dev = dev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ - if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) - goto out; + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) { + x->xso.offload_handle = (unsigned long)sa_entry; + return 0; + } err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); if (err) @@ -870,7 +872,6 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, xa_unlock_bh(&ipsec->sadb); } -out: x->xso.offload_handle = (unsigned long)sa_entry; if (allow_tunnel_mode) mlx5_eswitch_unblock_encap(priv->mdev); -- cgit v1.2.3 From a3442936dd0523277e20aaf86207c574e755c634 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 May 2026 15:13:24 -0700 Subject: net: shaper: annotate the data races As previously discussed we don't care about making the shaper state fully RCU-compliant because the hierarchy itself can't be dumped in one go over Netlink. Let's annotate the reads and writes to make that clear. The field-by-field assignments will also be useful for the next commit which adds explicit "valid" field (which we don't want to override with the current full struct assignment). Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260515221325.1685455-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/shaper/shaper.c | 53 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index b1c65110f04d..520cefdc3d90 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -138,35 +138,58 @@ handle_nest_cancel: return -EMSGSIZE; } +static void net_shaper_copy(struct net_shaper *dst, + const struct net_shaper *src) +{ + WRITE_ONCE(dst->parent.scope, READ_ONCE(src->parent.scope)); + WRITE_ONCE(dst->parent.id, READ_ONCE(src->parent.id)); + WRITE_ONCE(dst->handle.scope, READ_ONCE(src->handle.scope)); + WRITE_ONCE(dst->handle.id, READ_ONCE(src->handle.id)); + + WRITE_ONCE(dst->metric, READ_ONCE(src->metric)); + WRITE_ONCE(dst->bw_min, READ_ONCE(src->bw_min)); + WRITE_ONCE(dst->bw_max, READ_ONCE(src->bw_max)); + WRITE_ONCE(dst->burst, READ_ONCE(src->burst)); + WRITE_ONCE(dst->priority, READ_ONCE(src->priority)); + WRITE_ONCE(dst->weight, READ_ONCE(src->weight)); + + /* private fields are only used on the write path under the lock */ + data_race(dst->leaves = src->leaves); +} + static int net_shaper_fill_one(struct sk_buff *msg, const struct net_shaper_binding *binding, const struct net_shaper *shaper, const struct genl_info *info) { + struct net_shaper cur; void *hdr; hdr = genlmsg_iput(msg, info); if (!hdr) return -EMSGSIZE; + /* Make a copy to avoid data races */ + net_shaper_copy(&cur, shaper); + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || - net_shaper_fill_handle(msg, &shaper->parent, + net_shaper_fill_handle(msg, &cur.parent, NET_SHAPER_A_PARENT) || - net_shaper_fill_handle(msg, &shaper->handle, + net_shaper_fill_handle(msg, &cur.handle, NET_SHAPER_A_HANDLE) || - ((shaper->bw_min || shaper->bw_max || shaper->burst) && - nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || - (shaper->bw_min && - nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || - (shaper->bw_max && - nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || - (shaper->burst && - nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || - (shaper->priority && - nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || - (shaper->weight && - nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + ((cur.bw_min || cur.bw_max || cur.burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, cur.metric)) || + (cur.bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, cur.bw_min)) || + (cur.bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, cur.bw_max)) || + (cur.burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, cur.burst)) || + (cur.priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, cur.priority)) || + (cur.weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, cur.weight))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -424,7 +447,7 @@ static void net_shaper_commit(struct net_shaper_binding *binding, /* Successful update: drop the tentative mark * and update the hierarchy container. */ - *cur = shapers[i]; + net_shaper_copy(cur, &shapers[i]); smp_wmb(); __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID); } -- cgit v1.2.3 From b8d7519352ba8c6df83259295d4a3bad093cae90 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 May 2026 15:13:25 -0700 Subject: net: shaper: rework the VALID marking (again) Recent commit changed the semantics from NOT_VALID to VALID. I didn't realize that the flags are not stored atomically with the entry in XArray. There's still a race of reader observing a VALID mark for a slot, getting interrupted, writer replacing the entry with a different one, reader continuing, fetching the entry which is now a different pointer than the pointer for which VALID was meant. The biggest consequence of this is that we may see a UAF since net_shaper_rollback() assumed that entries without VALID can be freed without observing RCU. Looks like the XArray marks are buying us nothing at this point. Let's convert the code to an explicit valid field. The smp_load_acquire() / smp_store_release() barriers are marginally cleaner. Reported-by: Sashiko Fixes: 93954b40f6a4 ("net-shapers: implement NL set and delete operations") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260515221325.1685455-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/net_shaper.h | 1 + net/shaper/shaper.c | 45 ++++++++++++++++++--------------------------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/include/net/net_shaper.h b/include/net/net_shaper.h index 5c3f49b52fe9..3939b816b001 100644 --- a/include/net/net_shaper.h +++ b/include/net/net_shaper.h @@ -53,6 +53,7 @@ struct net_shaper { /* private: */ u32 leaves; /* accounted only for NODE scope */ + bool valid; struct rcu_head rcu; }; diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 520cefdc3d90..dea9270f3e57 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -306,31 +306,24 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle, parent->id = 0; } -/* MARK_0 is already in use due to XA_FLAGS_ALLOC. The VALID mark is set on - * an entry only after the device-side configuration has completed - * successfully (see net_shaper_commit()). Lookups and dumps must filter on - * this mark to avoid exposing tentative entries inserted by - * net_shaper_pre_insert() while the driver call is still in flight. - */ -#define NET_SHAPER_VALID XA_MARK_1 - static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, const struct net_shaper_handle *handle) { u32 index = net_shaper_handle_to_index(handle); struct net_shaper_hierarchy *hierarchy; + struct net_shaper *cur; hierarchy = net_shaper_hierarchy_rcu(binding); - if (!hierarchy || !xa_get_mark(&hierarchy->shapers, index, - NET_SHAPER_VALID)) + if (!hierarchy) return NULL; - /* Pairs with smp_wmb() in net_shaper_commit(): if the entry is - * valid, its contents must be visible too. - */ - smp_rmb(); - return xa_load(&hierarchy->shapers, index); + cur = xa_load(&hierarchy->shapers, index); + /* Check valid before reading fields */ + if (!cur || !smp_load_acquire(&cur->valid)) + return NULL; + + return cur; } /* Allocate on demand the per device shaper's hierarchy container. @@ -444,12 +437,10 @@ static void net_shaper_commit(struct net_shaper_binding *binding, if (WARN_ON_ONCE(!cur)) continue; - /* Successful update: drop the tentative mark - * and update the hierarchy container. - */ + /* Successful update: update the hierarchy container... */ net_shaper_copy(cur, &shapers[i]); - smp_wmb(); - __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID); + /* ... publish to lockless readers. */ + smp_store_release(&cur->valid, true); } xa_unlock(&hierarchy->shapers); } @@ -466,10 +457,10 @@ static void net_shaper_rollback(struct net_shaper_binding *binding) xa_lock(&hierarchy->shapers); xa_for_each(&hierarchy->shapers, index, cur) { - if (xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_VALID)) + if (cur->valid) continue; __xa_erase(&hierarchy->shapers, index); - kfree(cur); + kfree_rcu(cur, rcu); } xa_unlock(&hierarchy->shapers); } @@ -882,12 +873,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, goto out_unlock; for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, - U32_MAX, NET_SHAPER_VALID)); + U32_MAX, XA_PRESENT)); ctx->start_index++) { - /* Pairs with smp_wmb() in net_shaper_commit(): the entry - * is marked VALID, so its contents must be visible too. - */ - smp_rmb(); + /* Check valid before reading fields */ + if (!smp_load_acquire(&shaper->valid)) + continue; + ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; -- cgit v1.2.3 From 2b50aceafe6606ea52ed42aadd1b4d44a188aade Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 16 May 2026 00:05:13 +0100 Subject: crypto/krb5, rxrpc: Fix lack of pre-decrypt/pre-verify length checks Change the krb5 crypto library to provide facilities to precheck the length of the message about to be decrypted or verified. Fix AF_RXRPC to make use of this to validate DATA packets secured with RxGK. Fixes: 9d1d2b59341f ("rxrpc: rxgk: Implement the yfs-rxgk security class (GSSAPI)") Closes: https://sashiko.dev/#/patchset/20260511160753.607296-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Herbert Xu cc: Simon Horman cc: Chuck Lever cc: linux-afs@lists.infradead.org Reviewed-by: Jeffrey Altman Tested-by: Marc Dionne Link: https://patch.msgid.link/20260515230516.2718212-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/crypto/krb5.rst | 17 ++++++++++---- crypto/krb5/krb5_api.c | 54 ++++++++++++++++++++++++++++++++++++++----- include/crypto/krb5.h | 9 +++++--- include/trace/events/rxrpc.h | 1 + net/rxrpc/rxgk.c | 15 ++++++++++-- 5 files changed, 81 insertions(+), 15 deletions(-) diff --git a/Documentation/crypto/krb5.rst b/Documentation/crypto/krb5.rst index beffa0133446..f62e07ac6811 100644 --- a/Documentation/crypto/krb5.rst +++ b/Documentation/crypto/krb5.rst @@ -158,13 +158,22 @@ returned. When a message has been received, the location and size of the data with the message can be determined by calling:: - void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, - enum krb5_crypto_mode mode, - size_t *_offset, size_t *_len); + int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t *_offset, size_t *_len); The caller provides the offset and length of the message to the function, which then alters those values to indicate the region containing the data (plus any -padding). It is up to the caller to determine how much padding there is. +padding). It is up to the caller to determine how much padding there is. The +function returns an error if the length is too small or if the mode is +unsupported. An additional function:: + + int crypto_krb5_check_data_len(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t len, size_t min_content); + +is provided to just do a basic check that the decrypted/verified message would +have a sufficient minimum payload. Preparation Functions --------------------- diff --git a/crypto/krb5/krb5_api.c b/crypto/krb5/krb5_api.c index 23026d4206c8..c7ea40f900a7 100644 --- a/crypto/krb5/krb5_api.c +++ b/crypto/krb5/krb5_api.c @@ -134,27 +134,69 @@ EXPORT_SYMBOL(crypto_krb5_how_much_data); * Find the offset and size of the data in a secure message so that this * information can be used in the metadata buffer which will get added to the * digest by crypto_krb5_verify_mic(). + * + * Return: 0 if successful, -EBADMSG if the message is too short or -EINVAL if + * the mode is unsupported. */ -void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, - enum krb5_crypto_mode mode, - size_t *_offset, size_t *_len) +int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t *_offset, size_t *_len) { switch (mode) { case KRB5_CHECKSUM_MODE: + if (*_len < krb5->cksum_len) + return -EBADMSG; *_offset += krb5->cksum_len; *_len -= krb5->cksum_len; - return; + return 0; case KRB5_ENCRYPT_MODE: + if (*_len < krb5->conf_len + krb5->cksum_len) + return -EBADMSG; *_offset += krb5->conf_len; *_len -= krb5->conf_len + krb5->cksum_len; - return; + return 0; default: WARN_ON_ONCE(1); - return; + return -EINVAL; } } EXPORT_SYMBOL(crypto_krb5_where_is_the_data); +/** + * crypto_krb5_check_data_len - Check a message is big enough + * @krb5: The encoding to use. + * @mode: Mode of operation. + * @len: The length of the secure blob. + * @min_content: Minimum length of the content inside the blob. + * + * Check that a message is large enough to hold whatever bits the encryption + * type wants to glue on (nonce, checksum) plus a minimum amount of content. + * + * Return: 0 if successful, -EBADMSG if the message is too short or -EINVAL if + * the mode is unsupported. + */ +int crypto_krb5_check_data_len(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t len, size_t min_content) +{ + switch (mode) { + case KRB5_CHECKSUM_MODE: + if (len < krb5->cksum_len || + len - krb5->cksum_len < min_content) + return -EBADMSG; + return 0; + case KRB5_ENCRYPT_MODE: + if (len < krb5->conf_len + krb5->cksum_len || + len - (krb5->conf_len + krb5->cksum_len) < min_content) + return -EBADMSG; + return 0; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} +EXPORT_SYMBOL(crypto_krb5_check_data_len); + /* * Prepare the encryption with derived key data. */ diff --git a/include/crypto/krb5.h b/include/crypto/krb5.h index 71dd38f59be1..aac3ecf88467 100644 --- a/include/crypto/krb5.h +++ b/include/crypto/krb5.h @@ -121,9 +121,12 @@ size_t crypto_krb5_how_much_buffer(const struct krb5_enctype *krb5, size_t crypto_krb5_how_much_data(const struct krb5_enctype *krb5, enum krb5_crypto_mode mode, size_t *_buffer_size, size_t *_offset); -void crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, - enum krb5_crypto_mode mode, - size_t *_offset, size_t *_len); +int crypto_krb5_where_is_the_data(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t *_offset, size_t *_len); +int crypto_krb5_check_data_len(const struct krb5_enctype *krb5, + enum krb5_crypto_mode mode, + size_t len, size_t min_content); struct crypto_aead *crypto_krb5_prepare_encryption(const struct krb5_enctype *krb5, const struct krb5_buffer *TK, u32 usage, gfp_t gfp); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 573f2df3a2c9..704a10de6670 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -71,6 +71,7 @@ EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ EM(rxkad_abort_resp_version, "rxkad-resp-version") \ /* RxGK security errors */ \ + EM(rxgk_abort_1_short_header, "rxgk1-short-hdr") \ EM(rxgk_abort_1_verify_mic_eproto, "rxgk1-vfy-mic-eproto") \ EM(rxgk_abort_2_decrypt_eproto, "rxgk2-dec-eproto") \ EM(rxgk_abort_2_short_data, "rxgk2-short-data") \ diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index 0d5e654da918..26e723052a37 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -480,8 +480,12 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call, _enter(""); - crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, - &data_offset, &data_len); + if (crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, + &data_offset, &data_len) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_1_short_header); + goto put_gk; + } hdr = kzalloc_obj(*hdr, GFP_NOFS); if (!hdr) @@ -529,6 +533,13 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call, _enter(""); + if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE, + len, sizeof(hdr)) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_header); + goto error; + } + ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac); if (ret < 0) { if (ret != -ENOMEM) -- cgit v1.2.3 From d2bc90cf6c75cb96d2ce549be6c35efa3099d25b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 16 May 2026 00:05:14 +0100 Subject: rxrpc: Fix DATA decrypt vs splice() by copying data to buffer in recvmsg This improves the fix for CVE-2026-43500. Fix the pagecache corruption from in-place decryption of a DATA packet transmitted locally by splice() by getting rid of the packet sharing in the I/O thread and unconditionally extracting the packet content into a bounce buffer in which the buffer is decrypted. recvmsg() (or the kernel equivalent) then copies the data from the bounce buffer to the destination buffer. The sk_buff then remains unmodified. This has an additional advantage in that the packet is then arranged in the buffer with the correct alignment required for the crypto algorithms to process directly. The performance of the crypto does seem to be a little faster and, surprisingly, the unencrypted performance doesn't seem to change much - possibly due to removing complexity from the I/O thread. Yet another advantage is that the I/O thread doesn't have to copy packets which would slow down packet distribution, ACK generation, etc.. The buffer belongs to the call and is allocated initially at 2K, sufficiently large to hold a whole jumbo subpacket, but the buffer will be increased in size if needed. However, to take this work, MSG_PEEK may cause a later packet to be decrypted into the buffer, in which case the earlier one will need re-decrypting for a subsequent recvmsg(). Note that rx_pkt_offset may legitimately see 0 as a valid offset now, so switch to using USHRT_MAX to indicate an invalid offset. Note also that I would generally prefer to replace the buffers of the current sk_buff with a new kmalloc'd buffer of the right size, ditching the old data and frags as this makes the handling of MSG_PEEK easier and removes the re-decryption issue, but this looks like quite a complicated thing to achieve. skb_morph() looks half way to what I want, but I don't want to have to allocate a new sk_buff. Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") Reported-by: Hyunwoo Kim Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/ Signed-off-by: David Howells cc: Simon Horman cc: Jiayuan Chen cc: linux-afs@lists.infradead.org Reviewed-by: Jeffrey Altman Tested-by: Marc Dionne Link: https://patch.msgid.link/20260515230516.2718212-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 7 ++-- net/rxrpc/call_event.c | 22 +------------ net/rxrpc/call_object.c | 2 ++ net/rxrpc/insecure.c | 3 -- net/rxrpc/recvmsg.c | 68 ++++++++++++++++++++++++++++++-------- net/rxrpc/rxgk.c | 51 ++++++++++++++--------------- net/rxrpc/rxgk_common.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/rxkad.c | 86 ++++++++++++++++++------------------------------- 8 files changed, 201 insertions(+), 120 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 27c2aa2dd023..783367eea798 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -213,8 +213,6 @@ struct rxrpc_skb_priv { struct { u16 offset; /* Offset of data */ u16 len; /* Length of data */ - u8 flags; -#define RXRPC_RX_VERIFIED 0x01 }; struct { rxrpc_seq_t first_ack; /* First packet in acks table */ @@ -774,6 +772,11 @@ struct rxrpc_call { struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ + void *rx_dec_buffer; /* Decryption buffer */ + unsigned short rx_dec_bsize; /* rx_dec_buffer size */ + unsigned short rx_dec_offset; /* Decrypted packet data offset */ + unsigned short rx_dec_len; /* Decrypted packet data len */ + rxrpc_seq_t rx_dec_seq; /* Packet in decryption buffer */ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ rxrpc_seq_t rx_consumed; /* Highest packet consumed */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2b19b252225e..fec59d9338b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -332,27 +332,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.securityIndex != 0 && - (skb_cloned(skb) || - skb_has_frag_list(skb) || - skb_has_shared_frag(skb))) { - /* Unshare the packet so that it can be - * modified by in-place decryption. - */ - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - - if (nskb) { - rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); - rxrpc_input_call_packet(call, nskb); - rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); - } else { - /* OOM - Drop the packet. */ - rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); - } - } else { - rxrpc_input_call_packet(call, skb); - } + rxrpc_input_call_packet(call, skb); rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); did_receive = true; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f035f486c139..fcb9d38bb521 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -152,6 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, spin_lock_init(&call->notify_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; + call->rx_pkt_offset = USHRT_MAX; call->tx_total_len = -1; call->tx_jumbo_max = 1; call->next_rx_timo = 20 * HZ; @@ -553,6 +554,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) rxrpc_purge_queue(&call->recvmsg_queue); rxrpc_purge_queue(&call->rx_queue); rxrpc_purge_queue(&call->rx_oos_queue); + kfree(call->rx_dec_buffer); } /* diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 0a260df45d25..7a26c6097d03 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -32,9 +32,6 @@ static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - sp->flags |= RXRPC_RX_VERIFIED; return 0; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index e1f7513a46db..c940600117a4 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -147,15 +147,52 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) } /* - * Decrypt and verify a DATA packet. + * Decrypt and verify a DATA packet. The content of the packet is pulled out + * into a flat buffer rather than decrypting in place in the skbuff. This also + * has the advantage of aligning the buffer correctly for the crypto routines. + * + * We keep track of the sequence number of the packet currently decrypted into + * the buffer in ->rx_dec_seq. If MSG_PEEK is used and steps onto a new + * packet, subsequent recvmsg() calls will have to go back and re-decrypt the + * current packet. */ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + int ret; - if (sp->flags & RXRPC_RX_VERIFIED) - return 0; - return call->security->verify_packet(call, skb); + if (sp->len > call->rx_dec_bsize) { + /* Make sure we can hold a 1412-byte jumbo subpacket and make + * sure that the buffer size is aligned to a crypto blocksize. + */ + size_t size = clamp(round_up(sp->len, 32), 2048, 65535); + void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS); + + if (!buffer) + return -ENOMEM; + call->rx_dec_buffer = buffer; + call->rx_dec_bsize = size; + } + + ret = -EFAULT; + if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0) + goto err; + + call->rx_dec_offset = 0; + call->rx_dec_len = sp->len; + call->rx_dec_seq = sp->hdr.seq; + ret = call->security->verify_packet(call, skb); + if (ret < 0) + goto err; + return 0; + +err: + kfree(call->rx_dec_buffer); + call->rx_dec_buffer = NULL; + call->rx_dec_bsize = 0; + call->rx_dec_offset = 0; + call->rx_dec_len = 0; + return ret; } /* @@ -283,16 +320,21 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - if (rx_pkt_offset == 0) { + if (call->rx_dec_seq != sp->hdr.seq || + !call->rx_dec_buffer) { ret2 = rxrpc_verify_data(call, skb); trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, - sp->offset, sp->len, ret2); + call->rx_dec_offset, + call->rx_dec_len, ret2); if (ret2 < 0) { ret = ret2; goto out; } - rx_pkt_offset = sp->offset; - rx_pkt_len = sp->len; + } + + if (rx_pkt_offset == USHRT_MAX) { + rx_pkt_offset = call->rx_dec_offset; + rx_pkt_len = call->rx_dec_len; } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); @@ -304,10 +346,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (copy > remain) copy = remain; if (copy > 0) { - ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, - copy); - if (ret2 < 0) { - ret = ret2; + ret2 = copy_to_iter(call->rx_dec_buffer + rx_pkt_offset, + copy, iter); + if (ret2 != copy) { + ret = -EFAULT; goto out; } @@ -328,7 +370,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, /* The whole packet has been transferred. */ if (sp->hdr.flags & RXRPC_LAST_PACKET) ret = 1; - rx_pkt_offset = 0; + rx_pkt_offset = USHRT_MAX; rx_pkt_len = 0; skb = skb_peek_next(skb, &call->recvmsg_queue); diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index 26e723052a37..f81703ee7ac3 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -473,8 +473,9 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxgk_header *hdr; struct krb5_buffer metadata; - unsigned int offset = sp->offset, len = sp->len; + unsigned int len = call->rx_dec_len; size_t data_offset = 0, data_len = len; + void *data = call->rx_dec_buffer, *p = data; u32 ac = 0; int ret = -ENOMEM; @@ -500,16 +501,15 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call, metadata.len = sizeof(*hdr); metadata.data = hdr; - ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata, - skb, &offset, &len, &ac); + ret = rxgk_verify_mic(gk->krb5, gk->rx_Kc, &metadata, &p, &len, &ac); kfree(hdr); if (ret < 0) { if (ret != -ENOMEM) rxrpc_abort_eproto(call, skb, ac, rxgk_abort_1_verify_mic_eproto); } else { - sp->offset = offset; - sp->len = len; + call->rx_dec_offset = p - data; + call->rx_dec_len = len; } put_gk: @@ -526,56 +526,53 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxgk_header hdr; - unsigned int offset = sp->offset, len = sp->len; + struct rxgk_header *hdr; + unsigned int offset = 0, len = call->rx_dec_len; + void *data = call->rx_dec_buffer, *p = data; int ret; u32 ac = 0; _enter(""); if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE, - len, sizeof(hdr)) < 0) { + len, sizeof(*hdr)) < 0) { ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, rxgk_abort_2_short_header); goto error; } - ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac); + ret = rxgk_decrypt(gk->krb5, gk->rx_enc, &p, &len, &ac); if (ret < 0) { if (ret != -ENOMEM) rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto); goto error; } + offset = p - data; - if (len < sizeof(hdr)) { + if (len < sizeof(*hdr)) { ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, rxgk_abort_2_short_header); goto error; } /* Extract the header from the skb */ - ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr)); - if (ret < 0) { - ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, - rxgk_abort_2_short_encdata); - goto error; - } - offset += sizeof(hdr); - len -= sizeof(hdr); - - if (ntohl(hdr.epoch) != call->conn->proto.epoch || - ntohl(hdr.cid) != call->cid || - ntohl(hdr.call_number) != call->call_id || - ntohl(hdr.seq) != sp->hdr.seq || - ntohl(hdr.sec_index) != call->security_ix || - ntohl(hdr.data_len) > len) { + hdr = data + offset; + offset += sizeof(*hdr); + len -= sizeof(*hdr); + + if (ntohl(hdr->epoch) != call->conn->proto.epoch || + ntohl(hdr->cid) != call->cid || + ntohl(hdr->call_number) != call->call_id || + ntohl(hdr->seq) != sp->hdr.seq || + ntohl(hdr->sec_index) != call->security_ix || + ntohl(hdr->data_len) > len) { ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON, rxgk_abort_2_short_data); goto error; } - sp->offset = offset; - sp->len = ntohl(hdr.data_len); + call->rx_dec_offset = offset; + call->rx_dec_len = ntohl(hdr->data_len); ret = 0; error: rxgk_put(gk); diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h index 1e257d7ab8ec..112b5366ce11 100644 --- a/net/rxrpc/rxgk_common.h +++ b/net/rxrpc/rxgk_common.h @@ -105,6 +105,49 @@ int rxgk_decrypt_skb(const struct krb5_enctype *krb5, return ret; } +/* + * Apply decryption and checksumming functions a flat data buffer. The data + * point and length are updated to reflect the actual content of the encrypted + * region. + */ +static inline int rxgk_decrypt(const struct krb5_enctype *krb5, + struct crypto_aead *aead, + void **_data, unsigned int *_len, + int *_error_code) +{ + struct scatterlist sg[1]; + size_t offset = 0, len = *_len; + int ret; + + sg_init_one(sg, *_data, len); + + ret = crypto_krb5_decrypt(krb5, aead, sg, 1, &offset, &len); + switch (ret) { + case 0: + if (offset & 3) { + *_error_code = RXGK_INCONSISTENCY; + ret = -EPROTO; + break; + } + *_data += offset; + *_len = len; + break; + case -EBADMSG: /* Checksum mismatch. */ + case -EPROTO: + *_error_code = RXGK_SEALEDINCON; + break; + case -EMSGSIZE: + *_error_code = RXGK_PACKETSHORT; + break; + case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ + default: + *_error_code = RXGK_INCONSISTENCY; + break; + } + + return ret; +} + /* * Check the MIC on a region of an skbuff. The offset and length are updated * to reflect the actual content of the secure region. @@ -148,3 +191,42 @@ int rxgk_verify_mic_skb(const struct krb5_enctype *krb5, return ret; } + +/* + * Check the MIC on a flat buffer. The data pointer and length are updated to + * reflect the actual content of the secure region. + */ +static inline +int rxgk_verify_mic(const struct krb5_enctype *krb5, + struct crypto_shash *shash, + const struct krb5_buffer *metadata, + void **_data, unsigned int *_len, + u32 *_error_code) +{ + struct scatterlist sg[1]; + size_t offset = 0, len = *_len; + int ret; + + sg_init_one(sg, *_data, len); + + ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, 1, &offset, &len); + switch (ret) { + case 0: + *_data += offset; + *_len = len; + break; + case -EBADMSG: /* Checksum mismatch */ + case -EPROTO: + *_error_code = RXGK_SEALEDINCON; + break; + case -EMSGSIZE: + *_error_code = RXGK_PACKETSHORT; + break; + case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ + default: + *_error_code = RXGK_INCONSISTENCY; + break; + } + + return ret; +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index cba7935977f0..075936337836 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -430,27 +430,25 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_seq_t seq, struct skcipher_request *req) { - struct rxkad_level1_hdr sechdr; + struct rxkad_level1_hdr *sechdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; - struct scatterlist sg[16]; - u32 data_size, buf; + struct scatterlist sg[1]; + void *data = call->rx_dec_buffer; + u32 len = sp->len, data_size, buf; u16 check; int ret; _enter(""); - if (sp->len < 8) + if (len < 8) return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_1_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - sg_init_table(sg, ARRAY_SIZE(sg)); - ret = skb_to_sgvec(skb, sg, sp->offset, 8); - if (unlikely(ret < 0)) - return ret; + sg_init_one(sg, data, len); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -464,13 +462,11 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return ret; /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) - return rxrpc_abort_eproto(call, skb, RXKADDATALEN, - rxkad_abort_1_short_encdata); - sp->offset += sizeof(sechdr); - sp->len -= sizeof(sechdr); + sechdr = data; + call->rx_dec_offset = sizeof(*sechdr); + len -= sizeof(*sechdr); - buf = ntohl(sechdr.data_size); + buf = ntohl(sechdr->data_size); data_size = buf & 0xffff; check = buf >> 16; @@ -479,10 +475,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, if (check != 0) return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_1_short_check); - if (data_size > sp->len) + if (data_size > len) return rxrpc_abort_eproto(call, skb, RXKADDATALEN, rxkad_abort_1_short_data); - sp->len = data_size; + call->rx_dec_len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; @@ -496,43 +492,28 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxkad_level2_hdr sechdr; + struct rxkad_level2_hdr *sechdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; - struct scatterlist _sg[4], *sg; - u32 data_size, buf; + struct scatterlist sg[1]; + void *data = call->rx_dec_buffer; + u32 len = sp->len, data_size, buf; u16 check; - int nsg, ret; + int ret; - _enter(",{%d}", sp->len); + _enter(",{%d}", len); - if (sp->len < 8) + if (len < 8) return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_2_short_header); /* Don't let the crypto algo see a misaligned length. */ - sp->len = round_down(sp->len, 8); + len = round_down(len, 8); - /* Decrypt the skbuff in-place. TODO: We really want to decrypt - * directly into the target buffer. + /* Decrypt in place in the call's decryption buffer. TODO: We really + * want to decrypt directly into the target buffer. */ - sg = _sg; - nsg = skb_shinfo(skb)->nr_frags + 1; - if (nsg <= 4) { - nsg = 4; - } else { - sg = kmalloc_objs(*sg, nsg, GFP_NOIO); - if (!sg) - return -ENOMEM; - } - - sg_init_table(sg, nsg); - ret = skb_to_sgvec(skb, sg, sp->offset, sp->len); - if (unlikely(ret < 0)) { - if (sg != _sg) - kfree(sg); - return ret; - } + sg_init_one(sg, data, len); /* decrypt from the session key */ token = call->conn->key->payload.data[0]; @@ -540,11 +521,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); - if (sg != _sg) - kfree(sg); if (ret < 0) { if (ret == -ENOMEM) return ret; @@ -553,13 +532,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) - return rxrpc_abort_eproto(call, skb, RXKADDATALEN, - rxkad_abort_2_short_len); - sp->offset += sizeof(sechdr); - sp->len -= sizeof(sechdr); + sechdr = data; + call->rx_dec_offset = sizeof(*sechdr); + len -= sizeof(*sechdr); - buf = ntohl(sechdr.data_size); + buf = ntohl(sechdr->data_size); data_size = buf & 0xffff; check = buf >> 16; @@ -569,17 +546,18 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_2_short_check); - if (data_size > sp->len) + if (data_size > len) return rxrpc_abort_eproto(call, skb, RXKADDATALEN, rxkad_abort_2_short_data); - sp->len = data_size; + call->rx_dec_len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; } /* - * Verify the security on a received packet and the subpackets therein. + * Verify the security on a received (sub)packet. If the packet needs + * modifying (e.g. decrypting), it must be copied. */ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { -- cgit v1.2.3 From 8bfab4b6ffc2fe92da86300728fc8c3c7ebffb56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 16 May 2026 00:05:15 +0100 Subject: rxrpc: Fix RESPONSE packet verification to extract skb to a linear buffer This improves the fix for CVE-2026-43500. Fix the verification of RESPONSE packets to avoid the problem of overwriting a RESPONSE packet sent via splice to a local address by extracting the contents of the UDP packet into a kmalloc'd linear buffer rather than decrypting the data in place in the sk_buff (which may corrupt the original buffer). Fixes: 24481a7f5733 ("rxrpc: Fix conn-level packet handling to unshare RESPONSE packets") Reported-by: Hyunwoo Kim Closes: https://lore.kernel.org/r/afKV2zGR6rrelPC7@v4bel/ Signed-off-by: David Howells cc: Simon Horman cc: Jiayuan Chen cc: linux-afs@lists.infradead.org cc: stable@kernel.org Reviewed-by: Jeffrey Altman Tested-by: Marc Dionne Link: https://patch.msgid.link/20260515230516.2718212-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 7 ++-- net/rxrpc/conn_event.c | 32 +++++++---------- net/rxrpc/insecure.c | 5 +-- net/rxrpc/rxgk.c | 96 ++++++++++++++++--------------------------------- net/rxrpc/rxgk_app.c | 46 ++++++++++-------------- net/rxrpc/rxgk_common.h | 92 ++--------------------------------------------- net/rxrpc/rxkad.c | 29 ++++++--------- 7 files changed, 82 insertions(+), 225 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 783367eea798..98f2165159d7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -307,15 +307,16 @@ struct rxrpc_security { struct sk_buff *challenge); /* verify a response */ - int (*verify_response)(struct rxrpc_connection *, - struct sk_buff *); + int (*verify_response)(struct rxrpc_connection *conn, + struct sk_buff *response_skb, + void *response, unsigned int len); /* clear connection security */ void (*clear)(struct rxrpc_connection *); /* Default ticket -> key decoder */ int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb, - unsigned int ticket_offset, unsigned int ticket_len, + void *ticket, unsigned int ticket_len, struct key **_key); }; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 442414d90ba1..c96ca615b787 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -243,28 +243,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) static int rxrpc_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb) { + unsigned int len = skb->len - sizeof(struct rxrpc_wire_header); + void *buffer; int ret; - if (skb_cloned(skb) || skb_has_frag_list(skb) || - skb_has_shared_frag(skb)) { - /* Copy the packet if shared so that we can do in-place - * decryption. - */ - struct sk_buff *nskb = skb_copy(skb, GFP_NOFS); - - if (nskb) { - rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); - ret = conn->security->verify_response(conn, nskb); - rxrpc_free_skb(nskb, rxrpc_skb_put_response_copy); - } else { - /* OOM - Drop the packet. */ - rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); - ret = -ENOMEM; - } - } else { - ret = conn->security->verify_response(conn, skb); - } + buffer = kmalloc(len, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), buffer, len); + if (ret < 0) + goto out; + + ret = conn->security->verify_response(conn, skb, buffer, len); +out: + kfree(buffer); return ret; } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 7a26c6097d03..0b39046bdc61 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -54,9 +54,10 @@ static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge, } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb) + struct sk_buff *response_skb, + void *response, unsigned int len) { - return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + return rxrpc_abort_conn(conn, response_skb, RX_PROTOCOL_ERROR, -EPROTO, rxrpc_eproto_rxnull_response); } diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index f81703ee7ac3..a1ee102abae1 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -1084,11 +1084,12 @@ static int rxgk_sendmsg_respond_to_challenge(struct sk_buff *challenge, * unsigned int call_numbers<>; * }; */ -static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn, - const struct krb5_enctype *krb5, - struct sk_buff *skb, - __be32 *p, __be32 *end) +static int rxgk_verify_authenticator(struct rxrpc_connection *conn, + const struct krb5_enctype *krb5, + struct sk_buff *skb, + void *auth, unsigned int auth_len) { + __be32 *p = auth, *end = auth + auth_len; u32 app_len, call_count, level, epoch, cid, i; _enter(""); @@ -1151,37 +1152,6 @@ static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn, return 0; } -/* - * Extract the authenticator and verify it. - */ -static int rxgk_verify_authenticator(struct rxrpc_connection *conn, - const struct krb5_enctype *krb5, - struct sk_buff *skb, - unsigned int auth_offset, unsigned int auth_len) -{ - void *auth; - __be32 *p; - int ret; - - auth = kmalloc(auth_len, GFP_NOFS); - if (!auth) - return -ENOMEM; - - ret = skb_copy_bits(skb, auth_offset, auth, auth_len); - if (ret < 0) { - ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO, - rxgk_abort_resp_short_auth); - goto error; - } - - p = auth; - ret = rxgk_do_verify_authenticator(conn, krb5, skb, p, - p + auth_len / sizeof(*p)); -error: - kfree(auth); - return ret; -} - /* * Verify a response. * @@ -1192,49 +1162,45 @@ error: * }; */ static int rxgk_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb) + struct sk_buff *skb, + void *buffer, unsigned int len) { const struct krb5_enctype *krb5; struct rxrpc_key_token *token; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxgk_response rhdr; + struct rxgk_response *rhdr; struct rxgk_context *gk; struct key *key = NULL; - unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int len = skb->len - sizeof(struct rxrpc_wire_header); - unsigned int token_offset, token_len; - unsigned int auth_offset, auth_len; + unsigned int resp_token_len, auth_len; + void *resp_token, *auth; __be32 xauth_len; int ret, ec; _enter("{%d}", conn->debug_id); /* Parse the RXGK_Response object */ - if (sizeof(rhdr) + sizeof(__be32) > len) - goto short_packet; - - if (skb_copy_bits(skb, offset, &rhdr, sizeof(rhdr)) < 0) + if (len < sizeof(*rhdr) + sizeof(__be32)) goto short_packet; - offset += sizeof(rhdr); - len -= sizeof(rhdr); - - token_offset = offset; - token_len = ntohl(rhdr.token_len); - if (token_len > len || - xdr_round_up(token_len) + sizeof(__be32) > len) + rhdr = buffer; + buffer += sizeof(*rhdr); + len -= sizeof(*rhdr); + + resp_token = buffer; + resp_token_len = ntohl(rhdr->token_len); + if (resp_token_len > len || + xdr_round_up(resp_token_len) + sizeof(__be32) > len) goto short_packet; - trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len); + trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, resp_token_len); - offset += xdr_round_up(token_len); - len -= xdr_round_up(token_len); + buffer += xdr_round_up(resp_token_len); + len -= xdr_round_up(resp_token_len); - if (skb_copy_bits(skb, offset, &xauth_len, sizeof(xauth_len)) < 0) - goto short_packet; - offset += sizeof(xauth_len); + xauth_len = *(__be32 *)buffer; + buffer += sizeof(xauth_len); len -= sizeof(xauth_len); - auth_offset = offset; + auth = buffer; auth_len = ntohl(xauth_len); if (auth_len > len) goto short_packet; @@ -1249,7 +1215,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn, * to the app to deal with - which might mean a round trip to * userspace. */ - ret = rxgk_extract_token(conn, skb, token_offset, token_len, &key); + ret = rxgk_extract_token(conn, skb, resp_token, resp_token_len, &key); if (ret < 0) goto out; @@ -1263,7 +1229,7 @@ static int rxgk_verify_response(struct rxrpc_connection *conn, */ token = key->payload.data[0]; conn->security_level = token->rxgk->level; - conn->rxgk.start_time = __be64_to_cpu(rhdr.start_time); + conn->rxgk.start_time = __be64_to_cpu(rhdr->start_time); gk = rxgk_generate_transport_key(conn, token->rxgk, sp->hdr.cksum, GFP_NOFS); if (IS_ERR(gk)) { @@ -1273,18 +1239,18 @@ static int rxgk_verify_response(struct rxrpc_connection *conn, krb5 = gk->krb5; - trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, token_len); + trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, + resp_token_len); /* Decrypt, parse and verify the authenticator. */ - ret = rxgk_decrypt_skb(krb5, gk->resp_enc, skb, - &auth_offset, &auth_len, &ec); + ret = rxgk_decrypt(krb5, gk->resp_enc, &auth, &auth_len, &ec); if (ret < 0) { rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret, rxgk_abort_resp_auth_dec); goto out_gk; } - ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len); + ret = rxgk_verify_authenticator(conn, krb5, skb, auth, auth_len); if (ret < 0) goto out_gk; diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c index 0ef2a29eb695..200a30064fae 100644 --- a/net/rxrpc/rxgk_app.c +++ b/net/rxrpc/rxgk_app.c @@ -40,7 +40,7 @@ * }; */ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, - unsigned int ticket_offset, unsigned int ticket_len, + void *buffer, unsigned int ticket_len, struct key **_key) { struct rxrpc_key_token *token; @@ -49,7 +49,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, size_t pre_ticket_len, payload_len; unsigned int klen, enctype; void *payload, *ticket; - __be32 *t, *p, *q, tmp[2]; + __be32 *t, *p, *q, *tmp; int ret; _enter(""); @@ -59,10 +59,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, rxgk_abort_resp_short_yfs_tkt); /* Get the session key length */ - ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp)); - if (ret < 0) - return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, - rxgk_abort_resp_short_yfs_klen); + tmp = buffer; enctype = ntohl(tmp[0]); klen = ntohl(tmp[1]); @@ -84,12 +81,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, * it. */ ticket = payload + pre_ticket_len; - ret = skb_copy_bits(skb, ticket_offset, ticket, ticket_len); - if (ret < 0) { - ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO, - rxgk_abort_resp_short_yfs_tkt); - goto error; - } + memcpy(ticket, buffer, ticket_len); /* Fill out the form header. */ p = payload; @@ -131,7 +123,7 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, goto error; } - /* Ticket read in with skb_copy_bits above */ + /* Ticket appended above. */ q += xdr_round_up(ticket_len) / 4; if (WARN_ON((unsigned long)q - (unsigned long)payload != payload_len)) { ret = -EIO; @@ -182,14 +174,15 @@ error: * [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-afs-08 sec 6.1] */ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, - unsigned int token_offset, unsigned int token_len, + void *token, unsigned int token_len, struct key **_key) { const struct krb5_enctype *krb5; const struct krb5_buffer *server_secret; struct crypto_aead *token_enc = NULL; struct key *server_key; - unsigned int ticket_offset, ticket_len; + unsigned int ticket_len; + void *ticket; u32 kvno, enctype; int ret, ec = 0; @@ -197,24 +190,23 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, __be32 kvno; __be32 enctype; __be32 token_len; - } container; + } *container; - if (token_len < sizeof(container)) + if (token_len < sizeof(*container)) goto short_packet; /* Decode the RXGK_TokenContainer object. This tells us which server * key we should be using. We can then fetch the key, get the secret * and set up the crypto to extract the token. */ - if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0) - goto short_packet; + container = token; + token += sizeof(*container); - kvno = ntohl(container.kvno); - enctype = ntohl(container.enctype); - ticket_len = ntohl(container.token_len); - ticket_offset = token_offset + sizeof(container); + kvno = ntohl(container->kvno); + enctype = ntohl(container->enctype); + ticket_len = ntohl(container->token_len); - if (ticket_len > xdr_round_down(token_len - sizeof(container))) + if (ticket_len > xdr_round_down(token_len - sizeof(*container))) goto short_packet; _debug("KVNO %u", kvno); @@ -237,8 +229,8 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, * gain access to K0, from which we can derive the transport key and * thence decode the authenticator. */ - ret = rxgk_decrypt_skb(krb5, token_enc, skb, - &ticket_offset, &ticket_len, &ec); + ticket = token; + ret = rxgk_decrypt(krb5, token_enc, &ticket, &ticket_len, &ec); crypto_free_aead(token_enc); token_enc = NULL; if (ret < 0) { @@ -248,7 +240,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, return ret; } - ret = conn->security->default_decode_ticket(conn, skb, ticket_offset, + ret = conn->security->default_decode_ticket(conn, skb, ticket, ticket_len, _key); if (ret < 0) goto cant_get_token; diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h index 112b5366ce11..3deed5863f5a 100644 --- a/net/rxrpc/rxgk_common.h +++ b/net/rxrpc/rxgk_common.h @@ -41,10 +41,10 @@ struct rxgk_context { * rxgk_app.c */ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, - unsigned int ticket_offset, unsigned int ticket_len, + void *ticket, unsigned int ticket_len, struct key **_key); int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb, - unsigned int token_offset, unsigned int token_len, + void *token, unsigned int token_len, struct key **_key); /* @@ -61,50 +61,6 @@ int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key, const struct krb5_enctype **_krb5, gfp_t gfp); -/* - * Apply decryption and checksumming functions to part of an skbuff. The - * offset and length are updated to reflect the actual content of the encrypted - * region. - */ -static inline -int rxgk_decrypt_skb(const struct krb5_enctype *krb5, - struct crypto_aead *aead, - struct sk_buff *skb, - unsigned int *_offset, unsigned int *_len, - int *_error_code) -{ - struct scatterlist sg[16]; - size_t offset = 0, len = *_len; - int nr_sg, ret; - - sg_init_table(sg, ARRAY_SIZE(sg)); - nr_sg = skb_to_sgvec(skb, sg, *_offset, len); - if (unlikely(nr_sg < 0)) - return nr_sg; - - ret = crypto_krb5_decrypt(krb5, aead, sg, nr_sg, - &offset, &len); - switch (ret) { - case 0: - *_offset += offset; - *_len = len; - break; - case -EBADMSG: /* Checksum mismatch. */ - case -EPROTO: - *_error_code = RXGK_SEALEDINCON; - break; - case -EMSGSIZE: - *_error_code = RXGK_PACKETSHORT; - break; - case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ - default: - *_error_code = RXGK_INCONSISTENCY; - break; - } - - return ret; -} - /* * Apply decryption and checksumming functions a flat data buffer. The data * point and length are updated to reflect the actual content of the encrypted @@ -148,50 +104,6 @@ static inline int rxgk_decrypt(const struct krb5_enctype *krb5, return ret; } -/* - * Check the MIC on a region of an skbuff. The offset and length are updated - * to reflect the actual content of the secure region. - */ -static inline -int rxgk_verify_mic_skb(const struct krb5_enctype *krb5, - struct crypto_shash *shash, - const struct krb5_buffer *metadata, - struct sk_buff *skb, - unsigned int *_offset, unsigned int *_len, - u32 *_error_code) -{ - struct scatterlist sg[16]; - size_t offset = 0, len = *_len; - int nr_sg, ret; - - sg_init_table(sg, ARRAY_SIZE(sg)); - nr_sg = skb_to_sgvec(skb, sg, *_offset, len); - if (unlikely(nr_sg < 0)) - return nr_sg; - - ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, nr_sg, - &offset, &len); - switch (ret) { - case 0: - *_offset += offset; - *_len = len; - break; - case -EBADMSG: /* Checksum mismatch */ - case -EPROTO: - *_error_code = RXGK_SEALEDINCON; - break; - case -EMSGSIZE: - *_error_code = RXGK_PACKETSHORT; - break; - case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ - default: - *_error_code = RXGK_INCONSISTENCY; - break; - } - - return ret; -} - /* * Check the MIC on a flat buffer. The data pointer and length are updated to * reflect the actual content of the secure region. diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 075936337836..6fbd883401ac 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -963,7 +963,6 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, *_expiry = 0; ASSERT(server_key->payload.data[0] != NULL); - ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); @@ -1112,14 +1111,15 @@ unlock: * verify a response */ static int rxkad_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb) + struct sk_buff *skb, + void *buffer, unsigned int len) { struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; struct key *server_key; time64_t expiry; - void *ticket = NULL; + void *ticket; u32 version, kvno, ticket_len, level; __be32 csum; int ret, i; @@ -1142,13 +1142,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, } } - ret = -ENOMEM; - response = kzalloc_obj(struct rxkad_response, GFP_NOFS); - if (!response) - goto error; - - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - response, sizeof(*response)) < 0) { + response = buffer; + if (len < sizeof(*response)) { ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, rxkad_abort_resp_short); goto error; @@ -1160,6 +1155,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); + buffer += sizeof(*response); + len -= sizeof(*response); + if (version != RXKAD_VERSION) { ret = rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, rxkad_abort_resp_version); @@ -1179,13 +1177,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, } /* extract the kerberos ticket and decrypt and decode it */ - ret = -ENOMEM; - ticket = kmalloc(ticket_len, GFP_NOFS); - if (!ticket) - goto error; - - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), - ticket, ticket_len) < 0) { + ticket = buffer; + if (ticket_len > len) { ret = rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, rxkad_abort_resp_short_tkt); goto error; @@ -1265,8 +1258,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); error: - kfree(ticket); - kfree(response); key_put(server_key); _leave(" = %d", ret); return ret; -- cgit v1.2.3 From e7c70bf97e90d974cd575e4c90f8f9b07d056da3 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Sat, 16 May 2026 14:26:16 -0700 Subject: net: ag71xx: check error for platform_get_irq Complete error handling for a failed platform_get_irq() call Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Rosen Penev Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20260516212616.11758-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/ag71xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index a5ab99474179..4e4794c4dfdc 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1856,6 +1856,9 @@ static int ag71xx_probe(struct platform_device *pdev) ag71xx_int_disable(ag, AG71XX_INT_POLL); ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) + return ndev->irq; + err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, 0x0, dev_name(&pdev->dev), ndev); if (err) { -- cgit v1.2.3 From ddf8029623a1af20e984c040e89ff918158397ab Mon Sep 17 00:00:00 2001 From: Xingwang Xiang Date: Sun, 17 May 2026 23:56:26 +0900 Subject: bpf, skmsg: fix verdict sk_data_ready racing with ktls rx sk_psock_strp_data_ready() already checks tls_sw_has_ctx_rx() and defers to psock->saved_data_ready when a TLS RX context is present, avoiding a conflict with the TLS strparser's ownership of the receive queue (commit e91de6afa81c, "bpf: Fix running sk_skb program types with ktls"). sk_psock_verdict_data_ready() has no equivalent guard. When a socket is inserted into a sockmap (BPF_SK_SKB_VERDICT) before TLS RX is configured, tls_sw_strparser_arm() saves sk_psock_verdict_data_ready as rx_ctx->saved_data_ready. On data arrival: tls_data_ready -> tls_strp_data_ready -> tls_rx_msg_ready -> saved_data_ready() = sk_psock_verdict_data_ready() -> tcp_read_skb() drains sk_receive_queue via __skb_unlink() without calling tcp_eat_skb(), so copied_seq is not advanced. tls_strp_msg_load() then finds tcp_inq() >= full_len (stale), calls tcp_recv_skb() on the now-empty queue, hits WARN_ON_ONCE(!first), and returns with rx_ctx->strp.anchor.frag_list pointing at a psock-owned (potentially freed) skb. tls_decrypt_sg() subsequently walks that frag_list: use-after-free. Apply the same fix as sk_psock_strp_data_ready(): if a TLS RX context is present, call psock->saved_data_ready (sock_def_readable) to wake recv() waiters and return immediately, leaving the receive queue untouched. TLS retains sole ownership of the queue and decrypts the record normally through tls_sw_recvmsg(). Fixes: ef5659280eb1 ("bpf, sockmap: Allow skipping sk_skb parser program") Signed-off-by: Xingwang Xiang Link: https://patch.msgid.link/20260517145630.20521-2-v3rdant.xiang@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skmsg.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6187a83bd741..e1850caf1a71 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1268,12 +1268,19 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { const struct proto_ops *ops = NULL; + struct sk_psock *psock; struct socket *sock; int copied; trace_sk_data_ready(sk); rcu_read_lock(); + psock = sk_psock(sk); + if (psock && tls_sw_has_ctx_rx(sk)) { + psock->saved_data_ready(sk); + rcu_read_unlock(); + return; + } sock = READ_ONCE(sk->sk_socket); if (likely(sock)) ops = READ_ONCE(sock->ops); @@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk) copied = ops->read_skb(sk, sk_psock_verdict_recv); if (copied >= 0) { - struct sk_psock *psock; - rcu_read_lock(); psock = sk_psock(sk); if (psock) -- cgit v1.2.3 From 33644bd38aec24fe043e78ce5dca38e7156f8328 Mon Sep 17 00:00:00 2001 From: Xingwang Xiang Date: Sun, 17 May 2026 23:56:27 +0900 Subject: selftests/bpf: add regression test for ktls+sockmap verdict UAF Test the scenario where a socket is inserted into a sockmap with a BPF_SK_SKB_VERDICT program before TLS RX is configured. Previously sk_psock_verdict_data_ready() would call tcp_read_skb() and drain the receive queue without advancing copied_seq, causing tls_decrypt_sg() to walk a dangling frag_list pointer (use-after-free). The test drives the full vulnerable sequence and verifies that after the fix recv() returns the correct decrypted data. Signed-off-by: Xingwang Xiang Link: https://patch.msgid.link/20260517145630.20521-3-v3rdant.xiang@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/bpf/prog_tests/sockmap_ktls.c | 103 +++++++++++++++++++++ .../selftests/bpf/progs/test_sockmap_ktls.c | 21 +++++ 2 files changed, 124 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index b87e7f39e15a..6ed8e149e3d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -417,6 +417,107 @@ static void run_tests(int family, enum bpf_map_type map_type) close(map); } +/* + * Regression test for the KTLS + sockmap (verdict) reverse-order UAF. + * + * Vulnerable sequence: + * 1. Insert receiver socket into sockmap with BPF_SK_SKB_VERDICT program. + * sk->sk_data_ready becomes sk_psock_verdict_data_ready. + * 2. Configure TLS RX: tls_sw_strparser_arm() saves + * sk_psock_verdict_data_ready as rx_ctx->saved_data_ready. + * + * When data arrives, tls_rx_msg_ready() calls saved_data_ready() = + * sk_psock_verdict_data_ready(), which calls tcp_read_skb() and drains + * sk_receive_queue via __skb_unlink() without advancing copied_seq. + * tls_strp_msg_load() then finds the queue empty while tcp_inq() is still + * non-zero, hits WARN_ON_ONCE(!first), and leaves a dangling frag_list + * pointer that tls_decrypt_sg() walks — a use-after-free. + * + * The fix adds a tls_sw_has_ctx_rx() check to sk_psock_verdict_data_ready(), + * mirroring what sk_psock_strp_data_ready() already does: when a TLS RX + * context is present, defer to psock->saved_data_ready (sock_def_readable) + * instead of calling tcp_read_skb(), so TLS retains sole ownership of the + * receive queue. Data is then decrypted and returned correctly by + * tls_sw_recvmsg(). + */ +static void test_sockmap_ktls_verdict_with_tls_rx(int family, int sotype) +{ + struct tls12_crypto_info_aes_gcm_128 crypto_info = {}; + char send_buf[] = "hello ktls sockmap reverse order"; + char recv_buf[sizeof(send_buf)] = {}; + struct test_sockmap_ktls *skel; + int c = -1, p = -1, zero = 0; + int prog_fd, map_fd; + ssize_t n; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open_and_load")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_skb_verdict_pass); + map_fd = bpf_map__fd(skel->maps.sock_map_verdict); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_SKB_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk_skb verdict")) + goto out; + + /* Step 1: configure TLS TX on sender (no sockmap involvement) */ + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) client")) + goto out; + + crypto_info.info.version = TLS_1_2_VERSION; + crypto_info.info.cipher_type = TLS_CIPHER_AES_GCM_128; + memset(crypto_info.key, 0x01, sizeof(crypto_info.key)); + memset(crypto_info.salt, 0x02, sizeof(crypto_info.salt)); + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + /* Step 2: insert receiver into sockmap BEFORE TLS RX */ + err = bpf_map_update_elem(map_fd, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + + /* Step 3: configure TLS RX AFTER sockmap insertion */ + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP) server")) + goto out; + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info)); + if (!ASSERT_OK(err, "setsockopt(TLS_RX)")) + goto out; + + /* + * A buggy kernel hits WARN_ON_ONCE in tls_strp_load_anchor_with_queue + * and may UAF in tls_decrypt_sg here. With the fix, + * sk_psock_verdict_data_ready defers to sock_def_readable and TLS + * decrypts the record normally. + */ + n = send(c, send_buf, sizeof(send_buf), 0); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "send")) + goto out; + + n = recv_timeout(p, recv_buf, sizeof(recv_buf), 0, 5); + if (!ASSERT_EQ(n, (ssize_t)sizeof(send_buf), "recv")) + goto out; + + ASSERT_OK(memcmp(send_buf, recv_buf, sizeof(send_buf)), "data integrity"); + +out: + if (c != -1) + close(c); + if (p != -1) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_ktls_test(int family, int sotype) { if (test__start_subtest("tls simple offload")) @@ -429,6 +530,8 @@ static void run_ktls_test(int family, int sotype) test_sockmap_ktls_tx_no_buf(family, sotype, true); if (test__start_subtest("tls tx with pop")) test_sockmap_ktls_tx_pop(family, sotype); + if (test__start_subtest("tls verdict with tls rx")) + test_sockmap_ktls_verdict_with_tls_rx(family, sotype); } void test_sockmap_ktls(void) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c index 83df4919c224..facafeaf4620 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -17,6 +17,13 @@ struct { __type(value, int); } sock_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} sock_map_verdict SEC(".maps"); + SEC("sk_msg") int prog_sk_policy(struct sk_msg_md *msg) { @@ -38,3 +45,17 @@ int prog_sk_policy_redir(struct sk_msg_md *msg) bpf_msg_apply_bytes(msg, apply_bytes); return bpf_msg_redirect_map(msg, &sock_map, two, 0); } + +/* + * Verdict program for the reverse-order TLS/sockmap regression test. + * Returns SK_PASS so tcp_read_skb() drains the receive queue via + * sk_psock_verdict_recv() without calling tcp_eat_skb(), which is + * the precondition for the KTLS strparser frag_list UAF. + */ +SEC("sk_skb/verdict") +int prog_skb_verdict_pass(struct __sk_buff *skb) +{ + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From fd948c3f96b18ff9ba7d3e8eae13d196593e1aaf Mon Sep 17 00:00:00 2001 From: Carlos López Date: Tue, 12 May 2026 12:00:42 +0200 Subject: virt: sev-guest: Explicitly leak pages in unknown state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When set_memory_{encrypted,decrypted}() fail, the user cannot know at which point the function failed, meaning that the pages are left in an unknown state from the point of view of the caller. Since the pages may be left in an unencrypted state, they are not suitable for general use, and cannot be returned safely to the buddy allocator. Avoid the issue by never freeing the pages, and then do the proper accounting by calling snp_leak_pages(). Fixes: 3e385c0d6ce8 ("virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex") Signed-off-by: Carlos López Signed-off-by: Borislav Petkov (AMD) Cc: stable@kernel.org --- drivers/virt/coco/sev-guest/sev-guest.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 910a1de0d5a7..d186ae55cf63 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -176,6 +176,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; + u64 pfn; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; @@ -215,10 +216,11 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (!req.certs_data) return -ENOMEM; + pfn = PHYS_PFN(virt_to_phys(req.certs_data)); ret = set_memory_decrypted((unsigned long)req.certs_data, npages); if (ret) { pr_err("failed to mark page shared, ret=%d\n", ret); - free_pages_exact(req.certs_data, npages << PAGE_SHIFT); + snp_leak_pages(pfn, npages); return -EFAULT; } @@ -272,10 +274,12 @@ e_free: kfree(report_resp); e_free_data: if (npages) { - if (set_memory_encrypted((unsigned long)req.certs_data, npages)) + if (set_memory_encrypted((unsigned long)req.certs_data, npages)) { WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); - else + snp_leak_pages(pfn, npages); + } else { free_pages_exact(req.certs_data, npages << PAGE_SHIFT); + } } return ret; } -- cgit v1.2.3 From 4eb82ba543421e9e38cc14e4e82058b78850df50 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 19 May 2026 21:35:30 +0100 Subject: net: devmem: reject dma-buf bind with non-page-aligned size or SG length net_devmem_bind_dmabuf() trusts dmabuf->size and sg_dma_len() to be PAGE_SIZE multiples without checking: - tx_vec is sized dmabuf->size / PAGE_SIZE, and net_devmem_get_niov_at() only bounds-checks virt_addr < dmabuf->size before indexing tx_vec[virt_addr / PAGE_SIZE]. With size = N*PAGE_SIZE + r (1 <= r < PAGE_SIZE), sendmsg() at iov_base = N*PAGE_SIZE passes the bound check and reads tx_vec[N] -- one past. - owner->area.num_niovs = len / PAGE_SIZE while gen_pool_add_owner() covers the full byte len, so a non-page-multiple non-final sg desyncs num_niovs from the gen_pool region for every later sg, on both RX and TX. dma-buf does not require page-aligned sizes, so the bind path has to enforce what its own indexing assumes. Reject both with -EINVAL. The size check is TX-only (only tx_vec is sized off dmabuf->size); the SG-length check covers both directions. Fixes: bd61848900bf ("net: devmem: Implement TX path") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Reviewed-by: Bobby Eshleman Acked-by: Stanislav Fomichev Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20260519203530.66310-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/core/devmem.c b/net/core/devmem.c index 468344739db2..4f71de44c0fb 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -241,6 +241,11 @@ net_devmem_bind_dmabuf(struct net_device *dev, } if (direction == DMA_TO_DEVICE) { + if (!IS_ALIGNED(dmabuf->size, PAGE_SIZE)) { + err = -EINVAL; + NL_SET_ERR_MSG(extack, "TX dma-buf size must be a multiple of PAGE_SIZE"); + goto err_unmap; + } binding->tx_vec = kvmalloc_objs(struct net_iov *, dmabuf->size / PAGE_SIZE); if (!binding->tx_vec) { @@ -267,6 +272,12 @@ net_devmem_bind_dmabuf(struct net_device *dev, size_t len = sg_dma_len(sg); struct net_iov *niov; + if (!IS_ALIGNED(len, PAGE_SIZE)) { + err = -EINVAL; + NL_SET_ERR_MSG(extack, "dma-buf SG length must be PAGE_SIZE aligned"); + goto err_free_chunks; + } + owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, dev_to_node(&dev->dev)); if (!owner) { -- cgit v1.2.3 From 7eb72c1e3984150c45f77aa4299f7c2598a68e9b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2026 20:08:36 +0000 Subject: ipv4: icmp: reject broadcast/multicast routes syzbot was able to trigger ip_rt_bug() in a loop, using an IPv4 packet with a crafted IPOPT_SSRR option: options: ipv4_options { options: array[ipv4_option] { union ipv4_option { ssrr: ipv4_option_route[IPOPT_SSRR] { type: const = 0x89 (1 bytes) length: len = 0x7 (1 bytes) pointer: int8 = 0xa2 (1 bytes) data: array[ipv4_addr] { union ipv4_addr { broadcast: const = 0xffffffff (4 bytes) } } } } Change __icmp_send() to not send ICMP to broadcast/multicast destinations. Fixes: c378a9c019cf ("ipv4: Give backtrace in ip_rt_bug().") Reported-by: syzbot+c13a57c2639c2c0d03a6@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6a0cc169.170a0220.1f6c2d.0004.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20260519200836.4141061-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7eeff658b467..23e921d313b3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -961,6 +961,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (IS_ERR(rt)) goto out_unlock; + if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + goto ende; + /* peer icmp_ratelimit */ if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) goto ende; -- cgit v1.2.3 From e4bdef4d320b2fe73b8ebfc0cc0507fa9dc4a3b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2026 19:32:48 +0000 Subject: ipv4: use WARN_ON_ONCE() in ip_rt_bug() It turns out ip_rt_bug() can be called more than expected. syzbot will still panic (because of panic_on_warn=1), but non debug kernels will no longer die while repeating stack traces on the console. Fixes: c378a9c019cf ("ipv4: Give backtrace in ip_rt_bug().") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260519193248.4018872-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bc1296f0ea69..3d62d45d84bd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1272,7 +1272,7 @@ static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, skb->dev ? skb->dev->name : "?"); kfree_skb(skb); - WARN_ON(1); + WARN_ON_ONCE(1); return 0; } -- cgit v1.2.3 From fa997ddef508b1b37b2fe4d2dad7c4b70958335e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 19 May 2026 15:22:05 +0200 Subject: dpll: zl3073x: fix memory leak on pin registration failure If zl3073x_dpll_pin_register() fails, the allocated pin is not yet added to zldpll->pins list. The error path calls zl3073x_dpll_pins_unregister() which only iterates pins on the list, so the current pin is leaked. Free the pin before jumping to the error label. Additionally move the pin->dpll_pin = NULL assignment in zl3073x_dpll_pin_register() from err_register to the common err_pin_get path. When dpll_pin_get() fails, pin->dpll_pin holds an ERR_PTR value. Without this fix the subsequent zl3073x_dpll_pin_free() would trigger a spurious WARN because it checks pin->dpll_pin for non-NULL. Fixes: 75a71ecc2412 ("dpll: zl3073x: Register DPLL devices and pins") Reviewed-by: Petr Oros Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260519132205.161847-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index c95e93ef3ab0..64b4e9e3e8fe 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1394,8 +1394,8 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index) err_register: dpll_pin_put(pin->dpll_pin, &pin->tracker); - pin->dpll_pin = NULL; err_pin_get: + pin->dpll_pin = NULL; fwnode_handle_put(pin->fwnode); pin->fwnode = NULL; zl3073x_pin_props_put(props); @@ -1563,8 +1563,10 @@ zl3073x_dpll_pins_register(struct zl3073x_dpll *zldpll) } rc = zl3073x_dpll_pin_register(pin, index); - if (rc) + if (rc) { + zl3073x_dpll_pin_free(pin); goto error; + } list_add(&pin->list, &zldpll->pins); } -- cgit v1.2.3 From 99e22ddf4edb63dc8382bc028af928056d3450cf Mon Sep 17 00:00:00 2001 From: Minh Nguyen Date: Tue, 19 May 2026 17:23:10 +0700 Subject: vsock/vmci: fix UAF when peer resets connection during handshake vmci_transport_recv_connecting_server() returned err = 0 for a peer RST in its default switch arm: err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; That made vmci_transport_recv_listen() skip vsock_remove_pending(), leaving the pending socket on the listener's pending_links with sk_state = TCP_CLOSE while destroy: still dropped the explicit reference taken before schedule_delayed_work(). One second later vsock_pending_work() observed is_pending=true and performed full cleanup: vsock_remove_pending() then the two trailing sock_put(sk) calls -- the first reached refcount 0 and __sk_freed the socket, and the second wrote into the freed object: BUG: KASAN: slab-use-after-free in refcount_warn_saturate Write of size 4 at addr ffff88800b1cac80 by task kworker Workqueue: events vsock_pending_work Treat peer RST like any other unexpected packet type (err = -EINVAL). All destroy: arms now return err < 0, so vmci_transport_recv_listen() removes pending from pending_links synchronously and vsock_pending_work() takes the is_pending=false / !rejected branch, dropping only its own work reference. This also closes the multi-packet race Sashiko reported on v2: pending is removed from the list before any subsequent packet can find it. The pre-existing sk_acceptq_removed() gap on the err < 0 path of vmci_transport_recv_listen() that Sashiko also noted is not introduced or changed by this patch. Tested on lts-6.12.79 with KASAN: 52/100 unpatched -> 0/100 patched. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Cc: stable@vger.kernel.org Signed-off-by: Minh Nguyen Acked-by: Bryan Tan Link: https://patch.msgid.link/20260519102310.237181-1-minhnguyen.080505@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/vmci_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 4296ca1183f1..d2579380f51e 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1164,7 +1164,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, /* Close and cleanup the connection. */ vmci_transport_send_reset(pending, pkt); skerr = EPROTO; - err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + err = -EINVAL; goto destroy; } -- cgit v1.2.3 From 1bbf0ced1d9db73ac7893c2187f3459288603e0d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 May 2026 08:46:11 +0000 Subject: tcp: fix stale per-CPU tcp_tw_isn leak enabling ISN prediction Blamed commit moved the TIME_WAIT-derived ISN from the skb control block to a per-CPU variable, assuming the value would always be consumed by tcp_conn_request() for the same packet that wrote it. That assumption is violated by multiple drop paths between the producer (__this_cpu_write(tcp_tw_isn, isn) in tcp_v{4,6}_rcv()) and the consumer (tcp_conn_request()): - min_ttl / min_hopcount check - xfrm policy check - tcp_inbound_hash() MD5/AO mismatch - tcp_filter() eBPF/SO_ATTACH_FILTER drop - th->syn && th->fin discard in tcp_rcv_state_process() TCP_LISTEN - psp_sk_rx_policy_check() in tcp_v{4,6}_do_rcv() - tcp_checksum_complete() in tcp_v{4,6}_do_rcv() - tcp_v{4,6}_cookie_check() returning NULL When a packet is dropped on any of these paths, tcp_tw_isn is left set. The next SYN processed on the same CPU then consumes the non zero value in tcp_conn_request(), receiving a potentially predictable ISN. This patch moves back tcp_tw_isn to skb->cb[], getting rid of the per-cpu variable. Note that tcp_v{4,6}_fill_cb() do not set it. Very litle impact on overall code size/complexity: $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 2/1 up/down: 8/-15 (-7) Function old new delta tcp_v6_rcv 3038 3042 +4 tcp_v4_rcv 3035 3039 +4 tcp_conn_request 2938 2923 -15 Total: Before=24436060, After=24436053, chg -0.00% Fixes: 41eecbd712b7 ("tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field") Reported-by: Chris Mason Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260519084611.2485277-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 7 ++++--- net/ipv4/tcp.c | 3 --- net/ipv4/tcp_input.c | 15 ++++++--------- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 3 ++- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index ecbadcb3a744..98848db62894 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -65,8 +65,6 @@ static inline void tcp_orphan_count_dec(void) this_cpu_dec(tcp_orphan_count); } -DECLARE_PER_CPU(u32, tcp_tw_isn); - void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -1102,10 +1100,13 @@ struct tcp_skb_cb { __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ union { - /* Note : + /* Notes : + * tcp_tw_isn is used in input path only + * (isn chosen by tcp_timewait_state_process()) * tcp_gso_segs/size are used in write queue only, * cf tcp_skb_pcount()/tcp_skb_mss() */ + u32 tcp_tw_isn; struct { u16 tcp_gso_segs; u16 tcp_gso_size; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 432fa28e47d4..389a7cc17110 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -299,9 +299,6 @@ enum { DEFINE_PER_CPU(unsigned int, tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); -DEFINE_PER_CPU(u32, tcp_tw_isn); -EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); - long sysctl_tcp_mem[3] __read_mostly; DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d5c9e65d9760..de9f68a9c0cf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7589,6 +7589,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct sock *sk, struct sk_buff *skb) { struct tcp_fastopen_cookie foc = { .len = -1 }; + u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); @@ -7599,20 +7600,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct dst_entry *dst; struct flowi fl; u8 syncookies; - u32 isn; #ifdef CONFIG_TCP_AO const struct tcp_ao_hdr *aoh; #endif - isn = __this_cpu_read(tcp_tw_isn); - if (isn) { - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - __this_cpu_write(tcp_tw_isn, 0); - } else { + /* If isn is non-zero, this SYN originally matched a TIME_WAIT socket. + * TW sockets are converted to open requests without limitations, + * we skip the queue limits and syncookie checks in the block below. + */ + if (!isn) { syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c0526cc03980..fdc81150ff6c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2198,6 +2198,7 @@ lookup: } } + isn = 0; process: if (static_branch_unlikely(&ip4_min_ttl)) { /* min_ttl can be changed concurrently from do_ip_setsockopt() */ @@ -2227,6 +2228,7 @@ process: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); + TCP_SKB_CB(skb)->tcp_tw_isn = isn; skb->dev = NULL; @@ -2313,7 +2315,6 @@ do_time_wait: sk = sk2; tcp_v4_restore_cb(skb); refcounted = false; - __this_cpu_write(tcp_tw_isn, isn); goto process; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d13d49bfef19..36d75fb50a70 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1839,6 +1839,7 @@ lookup: } } + isn = 0; process: if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ @@ -1868,6 +1869,7 @@ process: th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); + TCP_SKB_CB(skb)->tcp_tw_isn = isn; skb->dev = NULL; @@ -1956,7 +1958,6 @@ do_time_wait: sk = sk2; tcp_v6_restore_cb(skb); refcounted = false; - __this_cpu_write(tcp_tw_isn, isn); goto process; } -- cgit v1.2.3 From f42d01aadcedd7bbf4f9a466cabe25c1781dedad Mon Sep 17 00:00:00 2001 From: Hongtao Lee Date: Wed, 20 May 2026 11:01:26 +0800 Subject: tools/bootconfig: Fix buf leaks in apply_xbc If data calloc failed, free the buf before return. Link: https://lore.kernel.org/all/20260520030126.147782-1-lihongtao@kylinos.cn/ Fixes: 950313ebf79c ("tools: bootconfig: Add bootconfig command") Signed-off-by: Hongtao Lee Signed-off-by: Masami Hiramatsu (Google) --- tools/bootconfig/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 643f707b8f1d..ddabde20585f 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -390,8 +390,10 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Backup the bootconfig data */ data = calloc(size + BOOTCONFIG_ALIGN + BOOTCONFIG_FOOTER_SIZE, 1); - if (!data) + if (!data) { + free(buf); return -ENOMEM; + } memcpy(data, buf, size); /* Check the data format */ -- cgit v1.2.3 From 7d2b37d3e42d19071b62f4ddbee6e16e905efbf1 Mon Sep 17 00:00:00 2001 From: Jan Volckaert Date: Sun, 17 May 2026 17:32:37 +0200 Subject: USB: serial: option: add MeiG SRM813Q Add support for the Qualcomm Technology Snapdragon X35-based MeiG SRM813Q module. The module can be put in different modes via AT commands to enable/disable GPS functionality: MODEM - PPP mode(2dee:4d63): AT+SER=1,1 If#= 0: RMNET If#= 1: DIAG/ADB If#= 2: MODEM If#= 3: AT P: Vendor=2dee ProdID=4d63 Rev=05.15 S: Manufacturer=MEIG S: Product=LTE-A Module S: SerialNumber=1bd51f0e C: #Ifs= 4 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms NMEA mode(2dee:4d64): AT+SER=51,1 If#= 0: RMNET If#= 1: DIAG/ADB If#= 2: NMEA If#= 3: AT P: Vendor=2dee ProdID=4d64 Rev=05.15 S: Manufacturer=MEIG S: Product=LTE-A Module S: SerialNumber=1bd51f0e C: #Ifs= 4 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Signed-off-by: Jan Volckaert Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 42e4cecd28ac..92c2feda3e3d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2450,6 +2450,12 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d38, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM825WN (Diag) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d38, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825WN (AT) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d38, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825WN (NMEA) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d63, 0xff, 0xff, 0x30) }, /* MeiG SRM813Q (Diag) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d63, 0xff, 0xff, 0x40) }, /* MeiG SRM813Q (AT) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d64, 0xff, 0xff, 0x30) }, /* MeiG SRM813Q (Diag) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d64, 0xff, 0xff, 0x40) }, /* MeiG SRM813Q (AT) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d64, 0xff, 0xff, 0x60) }, /* MeiG SRM813Q (NMEA) */ + { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ -- cgit v1.2.3 From 689f2facc689c8add11d7ff69fbbad17d65ee596 Mon Sep 17 00:00:00 2001 From: Wanquan Zhong Date: Wed, 20 May 2026 19:32:45 +0800 Subject: USB: serial: option: add missing RSVD(5) flag for Rolling RW135R-GL The RW135R-GL entry added in commit 01e8d0f74222 ("USB: serial: option: add support for Rolling Wireless RW135R-GL") was missing the .driver_info = RSVD(5) flag used by other Rolling Wireless MBIM laptop modules (e.g. RW135-GL and RW350-GL). Without this flag, the option driver incorrectly binds to the reserved ADB interface (If#5) in multi-interface USB modes, causing AT/MBIM communication failures after mode switching. This matches the handling of other Rolling Wireless MBIM devices. - VID:PID 33f8:1003, RW135R-GL for laptop debug M.2 cards (with MBIM interface for Linux/Chrome OS) 0x1003: mbim, diag, AT, pipe Here are the outputs of usb-devices: T: Bus=03 Lev=01 Prnt=01 Port=04 Cnt=02 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=1003 Rev= 5.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling RW135R-GL Module S: SerialNumber=12345678 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms - VID:PID 33f8:1003, RW135R-GL for laptop debug M.2 cards (with MBIM interface for Linux/Chrome OS) 0x1003: mbim, diag, AT, ADB, pipe Here are the outputs of usb-devices: T: Bus=03 Lev=01 Prnt=01 Port=04 Cnt=02 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=1003 Rev= 5.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling RW135R-GL Module S: SerialNumber=12345678 C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms - VID:PID 33f8:1003, RW135R-GL for laptop debug M.2 cards (with MBIM interface for Linux/Chrome OS) 0x1003: mbim, pipe Here are the outputs of usb-devices: T: Bus=03 Lev=01 Prnt=01 Port=04 Cnt=02 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=33f8 ProdID=1003 Rev= 5.15 S: Manufacturer=Rolling Wireless S.a.r.l. S: Product=Rolling RW135R-GL Module S: SerialNumber=12345678 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Fixes: 01e8d0f74222 ("USB: serial: option: add support for Rolling Wireless RW135R-GL") Signed-off-by: Wanquan Zhong Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 92c2feda3e3d..48ae0188f2e9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2476,7 +2476,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0302, 0xff) }, /* Rolling RW101R-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0802, 0xff), /* Rolling RW350-GL (laptop MBIM) */ .driver_info = RSVD(5) }, - { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x1003, 0xff) }, /* Rolling RW135R-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x1003, 0xff), /* Rolling RW135R-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Global */ { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x40) }, -- cgit v1.2.3 From be460cedb67ab803c1bebceac19b1d44acb85d30 Mon Sep 17 00:00:00 2001 From: Stepan Ionichev Date: Wed, 20 May 2026 16:05:04 +0500 Subject: gpio: pca953x: propagate regulator_enable() error from resume pca953x_resume() returns 0 when regulator_enable() fails, dropping the real error code and masking the failure as a successful resume. The caller then proceeds as if the chip is powered, while the regulator is in fact disabled. Return ret so PM core sees the actual failure. Signed-off-by: Stepan Ionichev Link: https://patch.msgid.link/20260520110504.13969-1-sozdayvek@gmail.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 52e96cc5f67b..b9c905a0ffa9 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1411,7 +1411,7 @@ static int pca953x_resume(struct device *dev) ret = regulator_enable(chip->regulator); if (ret) { dev_err(dev, "Failed to enable regulator: %d\n", ret); - return 0; + return ret; } } -- cgit v1.2.3 From 8a3bee801d420be8a7a0bae4a26547b353b8fe22 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 22 Apr 2026 15:46:37 +0100 Subject: comedi: comedi_test: Fix limiting of convert_arg in waveform_ai_cmdtest() The function checks and possibly modifies the description of an asynchronous command to be run on the analog input subdevice of a comedi device attached to the "comedi_test" driver, returning 0 if no modifications were required, or a positive value that indicates which step of the checking process it failed on. Step 4 fixes up various argument values for various trigger sources. There are two bugs in the fixing up of the `convert_arg` value to keep the `scan_begin_arg` value within the range of `unsigned int` when `scan_begin_src` and `convert_src` both have the value `TRIG_TIMER`, which indicates that the corresponding `_arg` values hold a time period in nanoseconds. The code also uses `scan_end_arg` which hold the number of "conversions" within each "scan". The goal is to end up with the scan period being less than or equal to the convert period multiplied by the number of conversions per scan. It intends to do that by clamping the `convert_arg` value to a maximum value of `UINT_MAX / scan_end_arg` rounded down to a multiple of 1000 (`NSEC_PER_USEC`). (The rounding from nanoseconds to microseconds is because the driver is modelling a device that uses a 1 MHz clock for timing. This is partly because that is a more typical timing base for real hardware devices driven by comedi, and partly because the driver used to use `struct timeval` internally.) The first bug is that the code checks if `scan_begin_arg == TRIG_TIMER` when it should be checking if `scan_begin_src == TRIG_TIMER`. The bugged check will always fail because if `scan_begin_src == TRIG_TIMER`, then `scan_begin_arg` will be at least 1000 (`NSEC_PER_USEC`), otherwise `scan_begin_src == TRIG_FOLLOW` and `scan_begin_arg` will be 0. (N.B `TRIG_TIMER` is defined as `0x10`.) The second bug is that is rounding the maximum value down to a multiple of 1000000000 (`NSEC_PER_SEC`) instead of 1000 (`NSEC_PER_USEC`), however this bug is not reached due to the first bug. This patch fixes both bugs. Fixes: 783ddaebd397 ("staging: comedi: comedi_test: support scan_begin_src == TRIG_FOLLOW") Fixes: 5afdcad2f818 ("staging: comedi: comedi_test: limit maximum convert_arg") Cc: stable Signed-off-by: Ian Abbott Link: https://patch.msgid.link/20260422144637.27692-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 01aafce20ef8..4050f66193e5 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -324,10 +324,10 @@ static int waveform_ai_cmdtest(struct comedi_device *dev, arg = min(arg, rounddown(UINT_MAX, (unsigned int)NSEC_PER_USEC)); arg = NSEC_PER_USEC * DIV_ROUND_CLOSEST(arg, NSEC_PER_USEC); - if (cmd->scan_begin_arg == TRIG_TIMER) { + if (cmd->scan_begin_src == TRIG_TIMER) { /* limit convert_arg to keep scan_begin_arg in range */ limit = UINT_MAX / cmd->scan_end_arg; - limit = rounddown(limit, (unsigned int)NSEC_PER_SEC); + limit = rounddown(limit, (unsigned int)NSEC_PER_USEC); arg = min(arg, limit); } err |= comedi_check_trigger_arg_is(&cmd->convert_arg, arg); -- cgit v1.2.3 From 542f5248cb481073203e0dadab5bcbd28aeae308 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 22 Apr 2026 17:21:19 +0100 Subject: comedi: comedi_test: fix check for valid scan_begin_src in waveform_ai_cmdtest() Commit 783ddaebd397 ("staging: comedi: comedi_test: support scan_begin_src == TRIG_FOLLOW") neglected to add a test that `scan_begin_src` has only one bit set. The allowed values are `TRIG_FOLLOW` and `TRIG_TIMER`, but the code incorrectly also allows `TRIG_FOLLOW | TRIG_TIMER`. Add a call to `comedi_check_trigger_is_unique()` to check that only one trigger source bit is set. Fixes: 783ddaebd397 ("staging: comedi: comedi_test: support scan_begin_src == TRIG_FOLLOW") Cc: stable Signed-off-by: Ian Abbott Link: https://patch.msgid.link/20260422162138.36003-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c index 4050f66193e5..1f430ffc7bd9 100644 --- a/drivers/comedi/drivers/comedi_test.c +++ b/drivers/comedi/drivers/comedi_test.c @@ -274,6 +274,7 @@ static int waveform_ai_cmdtest(struct comedi_device *dev, /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->convert_src); + err |= comedi_check_trigger_is_unique(cmd->scan_begin_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ -- cgit v1.2.3 From 48f6a5356a33dd78e7144ae1faef95ffc990aae0 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 16 May 2026 07:28:53 +0900 Subject: net: skbuff: propagate shared-frag marker through frag-transfer helpers Two frag-transfer helpers (__pskb_copy_fclone() and skb_shift()) fail to propagate the SKBFL_SHARED_FRAG bit in skb_shinfo()->flags when moving frags from source to destination. __pskb_copy_fclone() defers the rest of the shinfo metadata to skb_copy_header() after copying frag descriptors, but that helper only carries over gso_{size,segs, type} and never touches skb_shinfo()->flags; skb_shift() moves frag descriptors directly and leaves flags untouched. As a result, the destination skb keeps a reference to the same externally-owned or page-cache-backed pages while reporting skb_has_shared_frag() as false. The mismatch is harmful in any in-place writer that uses skb_has_shared_frag() to decide whether shared pages must be detoured through skb_cow_data(). ESP input is one such writer (esp4.c, esp6.c), and a single nft 'dup to ' rule -- or any other nf_dup_ipv4() / xt_TEE caller -- is enough to land a pskb_copy()'d skb in esp_input() with the marker stripped, letting an unprivileged user write into the page cache of a root-owned read-only file via authencesn-ESN stray writes. Set SKBFL_SHARED_FRAG on the destination whenever frag descriptors were actually moved from the source. skb_copy() and skb_copy_expand() share skb_copy_header() too but linearize all paged data into freshly allocated head storage and emerge with nr_frags == 0, so skb_has_shared_frag() returns false on its own; they need no change. The same omission exists in skb_gro_receive() and skb_gro_receive_list(). The former moves the incoming skb's frag descriptors into the accumulator's last sub-skb via two paths (a direct frag-move loop and the head_frag + memcpy path); the latter chains the incoming skb whole onto p's frag_list. Downstream skb_segment() reads only skb_shinfo(p)->flags, and skb_segment_list() reuses each sub-skb's shinfo as the nskb -- both p and lp must carry the marker. The same omission also exists in tcp_clone_payload(), which builds an MTU probe skb by moving frag descriptors from skbs on sk_write_queue into a freshly allocated nskb. The helper falls into the same family and warrants the same fix for consistency; no TCP TX-side in-place writer is currently known to reach a user page through this gap, but a future consumer depending on the marker would regress silently. The same omission exists in skb_segment(): the per-iteration flag merge takes only head_skb's flag, and the inner switch that rebinds frag_skb to list_skb on head_skb-frags exhaustion does not fold the new frag_skb's flag into nskb. Fold frag_skb's flag at both sites so segments drawing frags from frag_list members carry the marker. Fixes: cef401de7be8 ("net: fix possible wrong checksum generation") Fixes: f4c50a4034e6 ("xfrm: esp: avoid in-place decrypt on shared skb frags") Suggested-by: Sabrina Dubroca Suggested-by: Sultan Alsawaf Suggested-by: Ben Hutchings Suggested-by: Lin Ma Suggested-by: Jingguo Tan Suggested-by: Aaron Esau Cc: stable@vger.kernel.org Signed-off-by: Hyunwoo Kim Tested-by: Rajat Gupta Link: https://patch.msgid.link/ageeJfJHwgzmKXbh@v4bel Signed-off-by: Paolo Abeni --- net/core/gro.c | 4 ++++ net/core/skbuff.c | 9 ++++++++- net/ipv4/tcp_output.c | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/net/core/gro.c b/net/core/gro.c index 31d21de5b15a..9f8960789b2c 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -213,10 +213,12 @@ done: p->data_len += len; p->truesize += delta_truesize; p->len += len; + skb_shinfo(p)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG; if (lp != p) { lp->data_len += len; lp->truesize += delta_truesize; lp->len += len; + skb_shinfo(lp)->flags |= skbinfo->flags & SKBFL_SHARED_FRAG; } NAPI_GRO_CB(skb)->same_flow = 1; return 0; @@ -244,6 +246,8 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) p->truesize += skb->truesize; p->len += skb->len; + skb_shinfo(p)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; + NAPI_GRO_CB(skb)->same_flow = 1; return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9c4e8d331d6d..44ac121cfccb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2248,6 +2248,7 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; + skb_shinfo(n)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; } if (skb_has_frag_list(skb)) { @@ -4349,6 +4350,8 @@ onlymerged: tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; + skb_shinfo(tgt)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; + skb_len_add(skb, -shiftlen); skb_len_add(tgt, shiftlen); @@ -4959,7 +4962,8 @@ normal: skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags & + skb_shinfo(nskb)->flags |= (skb_shinfo(head_skb)->flags | + skb_shinfo(frag_skb)->flags) & SKBFL_SHARED_FRAG; if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) @@ -4976,6 +4980,9 @@ normal: nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; + + skb_shinfo(nskb)->flags |= skb_shinfo(frag_skb)->flags & SKBFL_SHARED_FRAG; + if (!skb_headlen(list_skb)) { BUG_ON(!nfrags); } else { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f9d8755705f7..6e4bb411dc04 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2626,6 +2626,7 @@ static int tcp_clone_payload(struct sock *sk, struct sk_buff *to, todo = min_t(int, skb_frag_size(fragfrom), probe_size - len); len += todo; + skb_shinfo(to)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; if (lastfrag && skb_frag_page(fragfrom) == skb_frag_page(lastfrag) && skb_frag_off(fragfrom) == skb_frag_off(lastfrag) + -- cgit v1.2.3 From 96031b31a4b3b6ec836b9fe7be8f6e6ebcfe8d67 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 21 May 2026 00:04:22 +0200 Subject: irqchip/exynos-combiner: Switch to raw_spinlock The exynos-combiner driver uses a regular spinlock to protect access to the combiner interrupt status register in combiner_handle_cascade_irq(), which is invoked in hard interrupt context as a chained interrupt handler. When PREEMPT_RT is enabled on ARM, regular spinlock is converted to a sleeping lock (mutex-based), which must not be used in atomic context such as hard interrupt handlers. Switch the irq_controller_lock to raw_spinlock, which remains a true non-sleeping spinlock even under PREEMPT_RT. Fixes: a900e5d99718 ("ARM: exynos: move exynos4210-combiner to drivers/irqchip") Signed-off-by: Marek Szyprowski Signed-off-by: Thomas Gleixner --- drivers/irqchip/exynos-combiner.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index 11d105457798..03cafcc5c835 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -24,7 +24,7 @@ #define IRQ_IN_COMBINER 8 -static DEFINE_SPINLOCK(irq_controller_lock); +static DEFINE_RAW_SPINLOCK(irq_controller_lock); struct combiner_chip_data { unsigned int hwirq_offset; @@ -72,9 +72,9 @@ static void combiner_handle_cascade_irq(struct irq_desc *desc) chained_irq_enter(chip, desc); - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); status = readl_relaxed(chip_data->base + COMBINER_INT_STATUS); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); status &= chip_data->irq_mask; if (status == 0) -- cgit v1.2.3 From c36069c6f46c52458bb86fa8eb4803f1e0b70fb0 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Mon, 18 May 2026 10:20:23 +0800 Subject: dt-bindings: ethernet: eswin: add optional TXD and RXD delay register offsets Document two optional cells in eswin,hsp-sp-csr for the TXD and RXD delay control register offsets. These registers are used by the driver to clear any residual delay configuration left by the bootloader, ensuring that MAC-side RGMII delay settings are applied solely according to the kernel configuration. Add a reference to the EIC7700X SoC Technical Reference Manual for background information about the HSP CSR block. Fixes: 888bd0eca93c ("dt-bindings: ethernet: eswin: Document for EIC7700 SoC") Signed-off-by: Zhi Li Acked-by: Conor Dooley Link: https://patch.msgid.link/20260518022023.427-1-lizhi2@eswincomputing.com Signed-off-by: Paolo Abeni --- .../devicetree/bindings/net/eswin,eic7700-eth.yaml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml index 91e8cd1db67b..b66ae6300faf 100644 --- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml +++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml @@ -73,6 +73,15 @@ properties: HSP CSR is to control and get status of different high-speed peripherals (such as Ethernet, USB, SATA, etc.) via register, which can tune board-level's parameters of PHY, etc. + + Additional background information about the High-Speed Subsystem + and the HSP CSR block is available in Chapter 10 ("High-Speed Interface") + of the EIC7700X SoC Technical Reference Manual, Part 4 + (EIC7700X_SoC_Technical_Reference_Manual_Part4.pdf). The manual is + publicly available at + https://github.com/eswincomputing/EIC7700X-SoC-Technical-Reference-Manual/releases + + This reference is provided for background information only. $ref: /schemas/types.yaml#/definitions/phandle-array items: - items: @@ -82,6 +91,8 @@ properties: - description: Offset of AXI clock controller Low-Power request register - description: Offset of register controlling TX/RX clock delay + - description: Optional offset of register controlling TXD delay + - description: Optional offset of register controlling RXD delay required: - compatible @@ -116,7 +127,7 @@ examples: reset-names = "stmmaceth"; rx-internal-delay-ps = <200>; tx-internal-delay-ps = <200>; - eswin,hsp-sp-csr = <&hsp_sp_csr 0x100 0x108 0x118>; + eswin,hsp-sp-csr = <&hsp_sp_csr 0x100 0x108 0x118 0x114 0x11c>; snps,axi-config = <&stmmac_axi_setup>; snps,aal; snps,fixed-burst; -- cgit v1.2.3 From 23386defe949c0db4f746bed7098fc5e06746083 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Mon, 18 May 2026 10:20:55 +0800 Subject: net: stmmac: eswin: fix HSP CSR init ordering after clock enable Fix the initialization ordering of the HSP CSR configuration in the EIC7700 DWMAC glue driver. The HSP CSR registers control MAC-side RGMII delay behavior and must only be accessed after the corresponding clocks are enabled. The previous implementation could trigger register access before clock enablement, leading to undefined behavior depending on boot state. Move the HSP CSR configuration into the post-clock-enable initialization path to ensure all register accesses occur under valid clock domains. This change ensures deterministic initialization and prevents clock-dependent register access failures during probe or resume. Fixes: ea77dbbdbc4e ("net: stmmac: add Eswin EIC7700 glue driver") Signed-off-by: Zhi Li Link: https://patch.msgid.link/20260518022055.444-1-lizhi2@eswincomputing.com Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/dwmac-eic7700.c | 73 ++++++++++++---------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c index bcb8e000e720..63001c4acdb7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c @@ -42,6 +42,11 @@ static const char * const eic7700_clk_names[] = { struct eic7700_qos_priv { struct plat_stmmacenet_data *plat_dat; + struct regmap *eic7700_hsp_regmap; + u32 eth_axi_lp_ctrl_offset; + u32 eth_phy_ctrl_offset; + u32 eth_clk_offset; + u32 eth_clk_dly_param; }; static int eic7700_clks_config(void *priv, bool enabled) @@ -61,8 +66,28 @@ static int eic7700_clks_config(void *priv, bool enabled) static int eic7700_dwmac_init(struct device *dev, void *priv) { struct eic7700_qos_priv *dwc = priv; + int ret; + + ret = eic7700_clks_config(dwc, true); + if (ret) + return ret; + + ret = regmap_set_bits(dwc->eic7700_hsp_regmap, + dwc->eth_phy_ctrl_offset, + EIC7700_ETH_TX_CLK_SEL | + EIC7700_ETH_PHY_INTF_SELI); + if (ret) { + eic7700_clks_config(dwc, false); + return ret; + } + + regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_axi_lp_ctrl_offset, + EIC7700_ETH_CSYSREQ_VAL); - return eic7700_clks_config(dwc, true); + regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_clk_offset, + dwc->eth_clk_dly_param); + + return 0; } static void eic7700_dwmac_exit(struct device *dev, void *priv) @@ -93,12 +118,6 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; struct eic7700_qos_priv *dwc_priv; - struct regmap *eic7700_hsp_regmap; - u32 eth_axi_lp_ctrl_offset; - u32 eth_phy_ctrl_offset; - u32 eth_phy_ctrl_regset; - u32 eth_rxd_dly_offset; - u32 eth_dly_param = 0; u32 delay_ps; int i, ret; @@ -121,8 +140,9 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) "rx-internal-delay-ps", &delay_ps)) { u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT); - eth_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY; - eth_dly_param |= FIELD_PREP(EIC7700_ETH_RX_ADJ_DELAY, val); + dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY; + dwc_priv->eth_clk_dly_param |= + FIELD_PREP(EIC7700_ETH_RX_ADJ_DELAY, val); } else { return dev_err_probe(&pdev->dev, -EINVAL, "missing required property rx-internal-delay-ps\n"); @@ -133,53 +153,42 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) "tx-internal-delay-ps", &delay_ps)) { u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT); - eth_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY; - eth_dly_param |= FIELD_PREP(EIC7700_ETH_TX_ADJ_DELAY, val); + dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY; + dwc_priv->eth_clk_dly_param |= + FIELD_PREP(EIC7700_ETH_TX_ADJ_DELAY, val); } else { return dev_err_probe(&pdev->dev, -EINVAL, "missing required property tx-internal-delay-ps\n"); } - eic7700_hsp_regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "eswin,hsp-sp-csr"); - if (IS_ERR(eic7700_hsp_regmap)) + dwc_priv->eic7700_hsp_regmap = + syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "eswin,hsp-sp-csr"); + if (IS_ERR(dwc_priv->eic7700_hsp_regmap)) return dev_err_probe(&pdev->dev, - PTR_ERR(eic7700_hsp_regmap), + PTR_ERR(dwc_priv->eic7700_hsp_regmap), "Failed to get hsp-sp-csr regmap\n"); ret = of_property_read_u32_index(pdev->dev.of_node, "eswin,hsp-sp-csr", - 1, ð_phy_ctrl_offset); + 1, &dwc_priv->eth_phy_ctrl_offset); if (ret) return dev_err_probe(&pdev->dev, ret, "can't get eth_phy_ctrl_offset\n"); - regmap_read(eic7700_hsp_regmap, eth_phy_ctrl_offset, - ð_phy_ctrl_regset); - eth_phy_ctrl_regset |= - (EIC7700_ETH_TX_CLK_SEL | EIC7700_ETH_PHY_INTF_SELI); - regmap_write(eic7700_hsp_regmap, eth_phy_ctrl_offset, - eth_phy_ctrl_regset); - ret = of_property_read_u32_index(pdev->dev.of_node, "eswin,hsp-sp-csr", - 2, ð_axi_lp_ctrl_offset); + 2, &dwc_priv->eth_axi_lp_ctrl_offset); if (ret) return dev_err_probe(&pdev->dev, ret, "can't get eth_axi_lp_ctrl_offset\n"); - regmap_write(eic7700_hsp_regmap, eth_axi_lp_ctrl_offset, - EIC7700_ETH_CSYSREQ_VAL); - ret = of_property_read_u32_index(pdev->dev.of_node, "eswin,hsp-sp-csr", - 3, ð_rxd_dly_offset); + 3, &dwc_priv->eth_clk_offset); if (ret) return dev_err_probe(&pdev->dev, ret, - "can't get eth_rxd_dly_offset\n"); - - regmap_write(eic7700_hsp_regmap, eth_rxd_dly_offset, - eth_dly_param); + "can't get eth_clk_offset\n"); plat_dat->num_clks = ARRAY_SIZE(eic7700_clk_names); plat_dat->clks = devm_kcalloc(&pdev->dev, -- cgit v1.2.3 From 6872fb088edc1a3c36792b301f8e4a1c35dd7c35 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Mon, 18 May 2026 10:21:37 +0800 Subject: net: stmmac: eswin: clear TXD and RXD delay registers during initialization Clear the TXD and RXD delay control registers during EIC7700 DWMAC initialization. These registers may retain values programmed by the bootloader. If left unchanged, residual delays can alter the effective RGMII timing seen by the MAC and override the configuration described by the device tree. This may violate the expected RGMII timing model and can cause link instability or prevent the Ethernet controller from operating correctly. Explicitly clearing these registers ensures that the MAC delay settings are determined solely by the kernel configuration. The corresponding register offsets are optional, and the registers are only cleared when the offsets are provided in the device tree. Fixes: ea77dbbdbc4e ("net: stmmac: add Eswin EIC7700 glue driver") Signed-off-by: Zhi Li Link: https://patch.msgid.link/20260518022137.464-1-lizhi2@eswincomputing.com Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/dwmac-eic7700.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c index 63001c4acdb7..541b279f08a1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c @@ -46,7 +46,11 @@ struct eic7700_qos_priv { u32 eth_axi_lp_ctrl_offset; u32 eth_phy_ctrl_offset; u32 eth_clk_offset; + u32 eth_txd_offset; + u32 eth_rxd_offset; u32 eth_clk_dly_param; + bool has_txd_offset; + bool has_rxd_offset; }; static int eic7700_clks_config(void *priv, bool enabled) @@ -84,6 +88,12 @@ static int eic7700_dwmac_init(struct device *dev, void *priv) regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_axi_lp_ctrl_offset, EIC7700_ETH_CSYSREQ_VAL); + if (dwc->has_txd_offset) + regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_txd_offset, 0); + + if (dwc->has_rxd_offset) + regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_rxd_offset, 0); + regmap_write(dwc->eic7700_hsp_regmap, dwc->eth_clk_offset, dwc->eth_clk_dly_param); @@ -190,6 +200,18 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "can't get eth_clk_offset\n"); + ret = of_property_read_u32_index(pdev->dev.of_node, + "eswin,hsp-sp-csr", + 4, &dwc_priv->eth_txd_offset); + if (!ret) + dwc_priv->has_txd_offset = true; + + ret = of_property_read_u32_index(pdev->dev.of_node, + "eswin,hsp-sp-csr", + 5, &dwc_priv->eth_rxd_offset); + if (!ret) + dwc_priv->has_rxd_offset = true; + plat_dat->num_clks = ARRAY_SIZE(eic7700_clk_names); plat_dat->clks = devm_kcalloc(&pdev->dev, plat_dat->num_clks, -- cgit v1.2.3 From 6ffcef9bc1fc2ad8110777decd6d026e3cb468ce Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Mon, 18 May 2026 10:21:52 +0800 Subject: net: stmmac: eswin: correct RGMII delay granularity to 20 ps The EIC7700 MAC implements programmable RGMII delay adjustment with a granularity of 20 ps per hardware step. The driver previously converted rx-internal-delay-ps and tx-internal-delay-ps values using a 100 ps step size, resulting in incorrect delay programming. Update the conversion to use the correct 20 ps granularity so the programmed delay matches the values described in the device tree. Fixes: ea77dbbdbc4e ("net: stmmac: add Eswin EIC7700 glue driver") Signed-off-by: Zhi Li Link: https://patch.msgid.link/20260518022156.484-1-lizhi2@eswincomputing.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c index 541b279f08a1..ef60cab24533 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c @@ -28,8 +28,8 @@ /* * TX/RX Clock Delay Bit Masks: - * - TX Delay: bits [14:8] — TX_CLK delay (unit: 0.1ns per bit) - * - RX Delay: bits [30:24] — RX_CLK delay (unit: 0.1ns per bit) + * - TX Delay: bits [14:8] — TX_CLK delay (unit: 0.02ns per bit) + * - RX Delay: bits [30:24] — RX_CLK delay (unit: 0.02ns per bit) */ #define EIC7700_ETH_TX_ADJ_DELAY GENMASK(14, 8) #define EIC7700_ETH_RX_ADJ_DELAY GENMASK(30, 24) @@ -148,7 +148,7 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) /* Read rx-internal-delay-ps and update rx_clk delay */ if (!of_property_read_u32(pdev->dev.of_node, "rx-internal-delay-ps", &delay_ps)) { - u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT); + u32 val = min(delay_ps / 20, EIC7700_MAX_DELAY_UNIT); dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY; dwc_priv->eth_clk_dly_param |= @@ -161,7 +161,7 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) /* Read tx-internal-delay-ps and update tx_clk delay */ if (!of_property_read_u32(pdev->dev.of_node, "tx-internal-delay-ps", &delay_ps)) { - u32 val = min(delay_ps / 100, EIC7700_MAX_DELAY_UNIT); + u32 val = min(delay_ps / 20, EIC7700_MAX_DELAY_UNIT); dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY; dwc_priv->eth_clk_dly_param |= -- cgit v1.2.3 From c2e152f7ce3208b9333d212d41a87637ec1dd170 Mon Sep 17 00:00:00 2001 From: Zhi Li Date: Mon, 18 May 2026 10:22:13 +0800 Subject: net: stmmac: eswin: validate RGMII delay values Validate rx-internal-delay-ps and tx-internal-delay-ps against the hardware capabilities of the EIC7700 MAC. The programmable RGMII delay supports 20 ps steps and a maximum value of 2540 ps. The driver previously accepted arbitrary values and silently truncated unsupported settings when converting them to hardware units. As a result, invalid device tree values could lead to unexpected delay programming and incorrect RGMII timing. Reject delay values that are not multiples of 20 ps or exceed the supported hardware range. Fixes: ea77dbbdbc4e ("net: stmmac: add Eswin EIC7700 glue driver") Signed-off-by: Zhi Li Link: https://patch.msgid.link/20260518022214.507-1-lizhi2@eswincomputing.com Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/dwmac-eic7700.c | 29 +++++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c index ef60cab24533..4ac979d874d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-eic7700.c @@ -34,7 +34,10 @@ #define EIC7700_ETH_TX_ADJ_DELAY GENMASK(14, 8) #define EIC7700_ETH_RX_ADJ_DELAY GENMASK(30, 24) -#define EIC7700_MAX_DELAY_UNIT 0x7F +#define EIC7700_MAX_DELAY_STEPS 0x7F +#define EIC7700_DELAY_STEP_PS 20 +#define EIC7700_MAX_DELAY_PS \ + (EIC7700_MAX_DELAY_STEPS * EIC7700_DELAY_STEP_PS) static const char * const eic7700_clk_names[] = { "tx", "axi", "cfg", @@ -128,7 +131,7 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; struct eic7700_qos_priv *dwc_priv; - u32 delay_ps; + u32 delay_ps, val; int i, ret; ret = stmmac_get_platform_resources(pdev, &stmmac_res); @@ -148,7 +151,16 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) /* Read rx-internal-delay-ps and update rx_clk delay */ if (!of_property_read_u32(pdev->dev.of_node, "rx-internal-delay-ps", &delay_ps)) { - u32 val = min(delay_ps / 20, EIC7700_MAX_DELAY_UNIT); + if (delay_ps % EIC7700_DELAY_STEP_PS) + return dev_err_probe(&pdev->dev, -EINVAL, + "rx delay must be multiple of %dps\n", + EIC7700_DELAY_STEP_PS); + + if (delay_ps > EIC7700_MAX_DELAY_PS) + return dev_err_probe(&pdev->dev, -EINVAL, + "rx delay out of range\n"); + + val = delay_ps / EIC7700_DELAY_STEP_PS; dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_RX_ADJ_DELAY; dwc_priv->eth_clk_dly_param |= @@ -161,7 +173,16 @@ static int eic7700_dwmac_probe(struct platform_device *pdev) /* Read tx-internal-delay-ps and update tx_clk delay */ if (!of_property_read_u32(pdev->dev.of_node, "tx-internal-delay-ps", &delay_ps)) { - u32 val = min(delay_ps / 20, EIC7700_MAX_DELAY_UNIT); + if (delay_ps % EIC7700_DELAY_STEP_PS) + return dev_err_probe(&pdev->dev, -EINVAL, + "tx delay must be multiple of %dps\n", + EIC7700_DELAY_STEP_PS); + + if (delay_ps > EIC7700_MAX_DELAY_PS) + return dev_err_probe(&pdev->dev, -EINVAL, + "tx delay out of range\n"); + + val = delay_ps / EIC7700_DELAY_STEP_PS; dwc_priv->eth_clk_dly_param &= ~EIC7700_ETH_TX_ADJ_DELAY; dwc_priv->eth_clk_dly_param |= -- cgit v1.2.3 From 3e6ccd790ed69bedd3d9626d01dd35cf9821c121 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 21 May 2026 10:42:16 +0200 Subject: gpio: cdev: check if uAPI v2 config attributes are correctly zeroed We check the padding of other uAPI v2 structures but not that of line config attributes. For used attributes: check if their padding is zeroed, for unused: check if the entire structure is zeroed. Fixes: 3c0d9c635ae2 ("gpiolib: cdev: support GPIO_V2_GET_LINE_IOCTL and GPIO_V2_LINE_GET_VALUES_IOCTL") Reviewed-by: Kent Gibson Link: https://patch.msgid.link/20260521-gpio-cdev-attr-padding-check-v3-1-ec3bcbe2e358@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index f36b7c06996d..82f27db0b230 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1184,6 +1184,7 @@ static int gpio_v2_line_flags_validate(u64 flags) static int gpio_v2_line_config_validate(struct gpio_v2_line_config *lc, unsigned int num_lines) { + size_t unused_attrs; unsigned int i; u64 flags; int ret; @@ -1191,9 +1192,21 @@ static int gpio_v2_line_config_validate(struct gpio_v2_line_config *lc, if (lc->num_attrs > GPIO_V2_LINE_NUM_ATTRS_MAX) return -EINVAL; + unused_attrs = GPIO_V2_LINE_NUM_ATTRS_MAX - lc->num_attrs; + if (!mem_is_zero(lc->padding, sizeof(lc->padding))) return -EINVAL; + for (i = 0; i < lc->num_attrs; i++) { + if (lc->attrs[i].attr.padding != 0) + return -EINVAL; + } + + if (unused_attrs) { + if (!mem_is_zero(&lc->attrs[lc->num_attrs], unused_attrs * sizeof(*lc->attrs))) + return -EINVAL; + } + for (i = 0; i < num_lines; i++) { flags = gpio_v2_line_config_flags(lc, i); ret = gpio_v2_line_flags_validate(flags); -- cgit v1.2.3 From 30c073cab97afb31901f94de9605177b6b84367e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 20 May 2026 10:49:11 +0200 Subject: gpio: aggregator: fix a potential use-after-free On error we free aggr->lookups->dev_id before removing the entry from the lookup table. If a concurrent thread calls gpiod_find() before we remove the entry, it could iterate over the list and call gpiod_match_lookup_table() which unconditionally dereferences dev_id when calling strcmp(). Reverse the order of cleanup. Fixes: 86f162e73d2d ("gpio: aggregator: introduce basic configfs interface") Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20260520084911.27938-1-bartosz.golaszewski@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 5915209e1e21..b53230065f50 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -979,8 +979,8 @@ static int gpio_aggregator_activate(struct gpio_aggregator *aggr) err_unregister_pdev: platform_device_unregister(pdev); err_remove_lookup_table: - kfree(aggr->lookups->dev_id); gpiod_remove_lookup_table(aggr->lookups); + kfree(aggr->lookups->dev_id); err_remove_swnode: fwnode_remove_software_node(swnode); err_remove_lookups: -- cgit v1.2.3 From 61fef83f239ecace1cce716135762a2d9b7b1fc6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 20 May 2026 14:16:31 +0200 Subject: gpio: aggregator: remove the software node when deactivating the aggregator The dynamic software node we create for the aggregator platform device when using configfs is leaked when the device is deactivated. Destroy it as the last step in the tear-down path. Fixes: 86f162e73d2d ("gpio: aggregator: introduce basic configfs interface") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdVZ=XUvJTGdDAjnkxgtw7Uvnn61iOy3XN_5XNZM2anctw@mail.gmail.com/ Link: https://patch.msgid.link/20260520121631.33976-1-bartosz.golaszewski@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index b53230065f50..a9ad809708fb 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -991,11 +991,15 @@ err_remove_lookups: static void gpio_aggregator_deactivate(struct gpio_aggregator *aggr) { + struct fwnode_handle *swnode; + + swnode = dev_fwnode(&aggr->pdev->dev); platform_device_unregister(aggr->pdev); aggr->pdev = NULL; gpiod_remove_lookup_table(aggr->lookups); kfree(aggr->lookups->dev_id); kfree(aggr->lookups); + fwnode_remove_software_node(swnode); } static void gpio_aggregator_lockup_configfs(struct gpio_aggregator *aggr, -- cgit v1.2.3 From 2b98b990404700297af4ef1ca00041a7cefddaa0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 20 May 2026 17:00:01 +0300 Subject: MAINTAINERS: ASoC: Intel/SOF: Remove Ranjani Sridharan as maintainer Ranjani no longer works on Intel/SOF audio drivers and her email address now bounce due to her departure from Intel. Unfortunately, she was not able to send the removal mail by herself. Thanks for the years of work and dedication, Ranjani! Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Guennadi Liakhovetski Reviewed-by: Liam Girdwood Reviewed-by: Kai Vehmanen Reviewed-by: Jyri Sarha Link: https://patch.msgid.link/20260520140001.1375-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c9495b60f31d..6e1ab293f948 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12782,7 +12782,6 @@ M: Cezary Rojewski M: Liam Girdwood M: Peter Ujfalusi M: Bard Liao -M: Ranjani Sridharan M: Kai Vehmanen R: Pierre-Louis Bossart L: linux-sound@vger.kernel.org @@ -25057,7 +25056,6 @@ SOUND - SOUND OPEN FIRMWARE (SOF) DRIVERS M: Liam Girdwood M: Peter Ujfalusi M: Bard Liao -M: Ranjani Sridharan M: Daniel Baluta R: Kai Vehmanen R: Pierre-Louis Bossart -- cgit v1.2.3 From a4f0b001782b21663d10df983b4b208195bec66c Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 18 May 2026 11:06:55 +0200 Subject: vsock/virtio: reset connection on receiving queue overflow When there is no more space to queue an incoming packet, the packet is silently dropped. This causes data loss without any notification to either peer, since there is no retransmission. Under normal circumstances, this should never happen. However, it could happen if the other peer doesn't respect the credit, or if the skb overhead, which we recently began to take into account with commit 059b7dbd20a6 ("vsock/virtio: fix potential unbounded skb queue"), is too high. Fix this by resetting the connection and setting the local socket error to ENOBUFS when virtio_transport_recv_enqueue() can no longer queue a packet, so both peers are explicitly notified of the failure rather than silently losing data. Fixes: ae6fcfbf5f03 ("vsock/virtio: discard packets if credit is not respected") Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20260518090656.134588-2-sgarzare@redhat.com Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 1e3409d28164..5028ff534888 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1335,7 +1335,7 @@ destroy: return err; } -static void +static bool virtio_transport_recv_enqueue(struct vsock_sock *vsk, struct sk_buff *skb) { @@ -1350,10 +1350,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); - if (!can_enqueue) { - free_pkt = true; + if (!can_enqueue) goto out; - } if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count++; @@ -1393,6 +1391,8 @@ out: spin_unlock_bh(&vvs->rx_lock); if (free_pkt) kfree_skb(skb); + + return can_enqueue; } static int @@ -1405,7 +1405,17 @@ virtio_transport_recv_connected(struct sock *sk, switch (le16_to_cpu(hdr->op)) { case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_enqueue(vsk, skb); + if (!virtio_transport_recv_enqueue(vsk, skb)) { + /* There is no more space to queue the packet, so let's + * close the connection; otherwise, we'll lose data. + */ + (void)virtio_transport_reset(vsk, skb); + virtio_transport_do_close(vsk, true); + sk->sk_err = ENOBUFS; + sk_error_report(sk); + vsock_remove_sock(vsk); + break; + } vsock_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_REQUEST: -- cgit v1.2.3 From c6087c5aaad6d1b8be1a1a641e0a422218ade911 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 18 May 2026 11:06:56 +0200 Subject: vsock/virtio: fix skb overhead accounting to preserve full buf_alloc After commit 059b7dbd20a6 ("vsock/virtio: fix potential unbounded skb queue"), virtio_transport_inc_rx_pkt() subtracts per-skb overhead from buf_alloc when checking whether a new packet fits. This reduces the effective receive buffer below what the user configured via SO_VM_SOCKETS_BUFFER_SIZE, causing legitimate data packets to be silently dropped and applications that rely on the full buffer size to deadlock. Also, the reduced space is not communicated to the remote peer, so its credit calculation accounts more credit than the receiver will actually accept, causing data loss (there is no retransmission). With this approach we currently have failures in tools/testing/vsock/vsock_test.c. Test 18 sometimes fails, while test 22 always fails in this way: 18 - SOCK_STREAM MSG_ZEROCOPY...hash mismatch 22 - SOCK_STREAM virtio credit update + SO_RCVLOWAT...send failed: Resource temporarily unavailable Fix by allowing at most `buf_alloc * 2` as the total budget for payload plus skb overhead in virtio_transport_inc_rx_pkt(), similar to how SO_RCVBUF is doubled to reserve space for sk_buff metadata. This preserves the full buf_alloc for payload under normal operation, while still bounding the skb queue growth. With this patch, all tests in tools/testing/vsock/vsock_test.c are now passing again. Fixes: 059b7dbd20a6 ("vsock/virtio: fix potential unbounded skb queue") Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20260518090656.134588-3-sgarzare@redhat.com Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 5028ff534888..df3b418e0392 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -419,7 +419,14 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, { u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); - if (skb_overhead + vvs->buf_used + len > vvs->buf_alloc) + /* Allow at most buf_alloc * 2 total budget (payload + overhead), + * similar to how SO_RCVBUF is doubled to reserve space for sk_buff + * metadata. Check payload against buf_alloc to be sure the other + * peer is respecting the credit, and sk_buff overhead to bound + * queue growth. + */ + if ((u64)vvs->buf_used + len > vvs->buf_alloc || + skb_overhead > vvs->buf_alloc) return false; vvs->rx_bytes += len; -- cgit v1.2.3 From c69439a891ccb37ede5d68539636337c6bd92fab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 May 2026 08:02:05 +0200 Subject: xfs: fix a buffer lookup against removal race When a buffer is freed either by LRU eviction or because it is unset, the lockref is marked as dead instantly, which prevents the buffer from being used after finding it in the buffer hash in xfs_buf_lookup and xfs_buf_find_insert. But the latter will then not add the new buffer to the hash because it already found an existing buffer. Fix this using in two places: Remove the buffer from the hash before marking the lockref dead so that that no buffer with a dead lockref can be found in the hash, but if we find one in xfs_buf_find_insert due to store reordering, handle this case correctly instead of returning an unhashed buffer. Fixes: 67fe4303972e ("xfs: don't keep a reference for buffers on the LRU") Reported-by: Andrey Albershteyn Reported-by: Carlos Maiolino Signed-off-by: Christoph Hellwig Reviewed-by: Andrey Albershteyn Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 580d40a5ee57..0cea458f1353 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -472,6 +472,7 @@ xfs_buf_find_insert( /* The new buffer keeps the perag reference until it is freed. */ new_bp->b_pag = pag; +retry: rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&btp->bt_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); @@ -480,8 +481,16 @@ xfs_buf_find_insert( error = PTR_ERR(bp); goto out_free_buf; } - if (bp && lockref_get_not_dead(&bp->b_lockref)) { - /* found an existing buffer */ + if (bp) { + /* + * If there is an existing buffer with a dead lockref, retry + * until the new buffer is added, or a usable buffer is found. + */ + if (!lockref_get_not_dead(&bp->b_lockref)) { + rcu_read_unlock(); + cpu_relax(); + goto retry; + } rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) @@ -820,15 +829,20 @@ xfs_buf_destroy( ASSERT(__lockref_is_dead(&bp->b_lockref)); ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + if (bp->b_pag) + xfs_perag_put(bp->b_pag); + xfs_buf_free(bp); +} + +static inline void +xfs_buf_kill( + struct xfs_buf *bp) +{ + lockref_mark_dead(&bp->b_lockref); if (!xfs_buf_is_uncached(bp)) { rhashtable_remove_fast(&bp->b_target->bt_hash, &bp->b_rhash_head, xfs_buf_hash_params); - - if (bp->b_pag) - xfs_perag_put(bp->b_pag); } - - xfs_buf_free(bp); } /* @@ -851,7 +865,7 @@ xfs_buf_rele( return; kill: - lockref_mark_dead(&bp->b_lockref); + xfs_buf_kill(bp); list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru); spin_unlock(&bp->b_lockref.lock); @@ -1433,7 +1447,7 @@ xfs_buftarg_drain_rele( return LRU_SKIP; } - lockref_mark_dead(&bp->b_lockref); + xfs_buf_kill(bp); list_lru_isolate_move(lru, item, dispose); spin_unlock(&bp->b_lockref.lock); return LRU_REMOVED; @@ -1545,7 +1559,7 @@ xfs_buftarg_isolate( return LRU_ROTATE; } - lockref_mark_dead(&bp->b_lockref); + xfs_buf_kill(bp); list_lru_isolate_move(lru, item, dispose); spin_unlock(&bp->b_lockref.lock); return LRU_REMOVED; -- cgit v1.2.3 From a254b6d13b0edd6272926674d2afc46d46e496b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 May 2026 22:08:01 -0400 Subject: ring-buffer: Fix reporting of missed events in iterator When tracing is active while reading the trace file, if the iterator reading the buffer detects that the writer has passed the iterator head, it will reset and set a "missed events" flag. This flag is passed to the output processing to show the user that events were missed: CPU:4 [LOST EVENTS] The problem is that the flag is reset after it is checked in ring_buffer_iter_dropped(). But the "trace" file iterates over all the CPU ring buffers and it will check if they are dropped when figuring out which buffer to print next. This prematurely clears the missed_events flag if the CPU buffer with the missed events is not the one that is printed next. On the iteration where the CPU buffer with the missed events is printed, the check if it had missed events would return false and the output does not show that events were missed. Do not reset the missed_events flag when checking if there were missed events, but instead clear it when moving the iterator head to the next event. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260520220801.4fd09d13@fedora Fixes: c9b7a4a72ff64 ("ring-buffer/tracing: Have iterator acknowledge dropped events") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5326924615a4..fcd93d49851e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5407,6 +5407,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->head_page = cpu_buffer->reader_page; iter->head = cpu_buffer->reader_page->read; iter->next_event = iter->head; + iter->missed_events = 0; iter->cache_reader_page = iter->head_page; iter->cache_read = cpu_buffer->read; @@ -6086,10 +6087,7 @@ ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, */ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter) { - bool ret = iter->missed_events != 0; - - iter->missed_events = 0; - return ret; + return iter->missed_events != 0; } EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped); @@ -6251,7 +6249,7 @@ void ring_buffer_iter_advance(struct ring_buffer_iter *iter) unsigned long flags; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - + iter->missed_events = 0; rb_advance_iter(iter); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); -- cgit v1.2.3 From a494d3c8d5392bcdff83c2a593df0c160ff9f322 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 30 Apr 2026 12:28:16 +0900 Subject: ring-buffer: Flush and stop persistent ring buffer on panic On real hardware, panic and machine reboot may not flush hardware cache to memory. This means the persistent ring buffer, which relies on a coherent state of memory, may not have its events written to the buffer and they may be lost. Moreover, there may be inconsistency with the counters which are used for validation of the integrity of the persistent ring buffer which may cause all data to be discarded. To avoid this issue, stop recording of the ring buffer on panic and flush the cache of the ring buffer's memory. Fixes: e645535a954a ("tracing: Add option to use memmapped memory for trace boot instance") Cc: stable@vger.kernel.org Cc: Will Deacon Cc: Mathieu Desnoyers Cc: Ian Rogers Link: https://patch.msgid.link/177751969602.2136606.12031934362587643488.stgit@mhiramat.tok.corp.google.com Signed-off-by: Masami Hiramatsu (Google) Acked-by: Catalin Marinas Acked-by: Geert Uytterhoeven Signed-off-by: Steven Rostedt --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/ring_buffer.h | 10 ++++++++++ arch/csky/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/loongarch/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/nios2/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/riscv/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/ring_buffer.h | 13 +++++++++++++ kernel/trace/ring_buffer.c | 22 ++++++++++++++++++++++ 23 files changed, 65 insertions(+) create mode 100644 arch/arm64/include/asm/ring_buffer.h create mode 100644 include/asm-generic/ring_buffer.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 483965c5a4de..b154b4e3dfa8 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -5,4 +5,5 @@ generic-y += agp.h generic-y += asm-offsets.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 4c69522e0328..483caacc6988 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -5,5 +5,6 @@ generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += user.h generic-y += text-patching.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 03657ff8fbe3..decad5f2c826 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += parport.h +generic-y += ring_buffer.h generated-y += mach-types.h generated-y += unistd-nr.h diff --git a/arch/arm64/include/asm/ring_buffer.h b/arch/arm64/include/asm/ring_buffer.h new file mode 100644 index 000000000000..62316c406888 --- /dev/null +++ b/arch/arm64/include/asm/ring_buffer.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_ARM64_RING_BUFFER_H +#define _ASM_ARM64_RING_BUFFER_H + +#include + +/* Flush D-cache on persistent ring buffer */ +#define arch_ring_buffer_flush_range(start, end) dcache_clean_pop(start, end) + +#endif /* _ASM_ARM64_RING_BUFFER_H */ diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 3a5c7f6e5aac..7dca0c6cdc84 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += qrwlock.h generic-y += qrwlock_types.h generic-y += qspinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += user.h generic-y += vmlinux.lds.h generic-y += text-patching.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 1efa1e993d4b..0f887d4238ed 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -5,4 +5,5 @@ generic-y += extable.h generic-y += iomap.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 9034b583a88a..7e92957baf6a 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -10,5 +10,6 @@ generic-y += qrwlock.h generic-y += user.h generic-y += ioctl.h generic-y += mmzone.h +generic-y += ring_buffer.h generic-y += statfs.h generic-y += text-patching.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index b282e0dd8dc1..62543bf305ff 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -3,5 +3,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += spinlock.h generic-y += text-patching.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 7178f990e8b3..0030309b47ad 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += syscalls.h generic-y += tlb.h generic-y += user.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 684569b2ecd6..9771c3d85074 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -12,5 +12,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h +generic-y += ring_buffer.h generic-y += user.h generic-y += text-patching.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 28004301c236..0a2530964413 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cmpxchg.h generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += spinlock.h generic-y += user.h generic-y += text-patching.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index cef49d60d74c..8aa34621702d 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -8,4 +8,5 @@ generic-y += spinlock_types.h generic-y += spinlock.h generic-y += qrwlock_types.h generic-y += qrwlock.h +generic-y += ring_buffer.h generic-y += user.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 4fb596d94c89..d48d158f7241 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -4,4 +4,5 @@ generated-y += syscall_table_64.h generic-y += agp.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += user.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 2e23533b67e3..805b5aeebb6f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -5,4 +5,5 @@ generated-y += syscall_table_spu.h generic-y += agp.h generic-y += mcs_spinlock.h generic-y += qrwlock.h +generic-y += ring_buffer.h generic-y += early_ioremap.h diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index bd5fc9403295..7721b63642f4 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -14,5 +14,6 @@ generic-y += ticket_spinlock.h generic-y += qrwlock.h generic-y += qrwlock_types.h generic-y += qspinlock.h +generic-y += ring_buffer.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 80bad7de7a04..0c1fc47c3ba0 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,3 +7,4 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += mcs_spinlock.h generic-y += mmzone.h +generic-y += ring_buffer.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 4d3f10ed8275..f0403d3ee8ab 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -3,4 +3,5 @@ generated-y += syscall_table.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 17ee8a273aa6..49c6bb326b75 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -4,4 +4,5 @@ generated-y += syscall_table_64.h generic-y += agp.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += ring_buffer.h generic-y += text-patching.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 1b9b82bbe322..2a1629ba8140 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -17,6 +17,7 @@ generic-y += module.lds.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h +generic-y += ring_buffer.h generic-y += runtime-const.h generic-y += softirq_stack.h generic-y += switch_to.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4566000e15c4..078fd2c0d69d 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -14,3 +14,4 @@ generic-y += early_ioremap.h generic-y += fprobe.h generic-y += mcs_spinlock.h generic-y += mmzone.h +generic-y += ring_buffer.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 13fe45dea296..e57af619263a 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -6,5 +6,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h +generic-y += ring_buffer.h generic-y += user.h generic-y += text-patching.h diff --git a/include/asm-generic/ring_buffer.h b/include/asm-generic/ring_buffer.h new file mode 100644 index 000000000000..201d2aee1005 --- /dev/null +++ b/include/asm-generic/ring_buffer.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Generic arch dependent ring_buffer macros. + */ +#ifndef __ASM_GENERIC_RING_BUFFER_H__ +#define __ASM_GENERIC_RING_BUFFER_H__ + +#include + +/* Flush cache on ring buffer range if needed. Do nothing by default. */ +#define arch_ring_buffer_flush_range(start, end) do { } while (0) + +#endif /* __ASM_GENERIC_RING_BUFFER_H__ */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index fcd93d49851e..7b07d2004cc6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include +#include #include #include #include @@ -559,6 +561,7 @@ struct trace_buffer { unsigned long range_addr_start; unsigned long range_addr_end; + struct notifier_block flush_nb; struct ring_buffer_meta *meta; @@ -2521,6 +2524,16 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) kfree(cpu_buffer); } +/* Stop recording on a persistent buffer and flush cache if needed. */ +static int rb_flush_buffer_cb(struct notifier_block *nb, unsigned long event, void *data) +{ + struct trace_buffer *buffer = container_of(nb, struct trace_buffer, flush_nb); + + ring_buffer_record_off(buffer); + arch_ring_buffer_flush_range(buffer->range_addr_start, buffer->range_addr_end); + return NOTIFY_DONE; +} + static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long end, @@ -2651,6 +2664,12 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, mutex_init(&buffer->mutex); + /* Persistent ring buffer needs to flush cache before reboot. */ + if (start && end) { + buffer->flush_nb.notifier_call = rb_flush_buffer_cb; + atomic_notifier_chain_register(&panic_notifier_list, &buffer->flush_nb); + } + return_ptr(buffer); fail_free_buffers: @@ -2749,6 +2768,9 @@ ring_buffer_free(struct trace_buffer *buffer) { int cpu; + if (buffer->range_addr_start && buffer->range_addr_end) + atomic_notifier_chain_unregister(&panic_notifier_list, &buffer->flush_nb); + cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); irq_work_sync(&buffer->irq_work.work); -- cgit v1.2.3 From c2d2856cf6c9efccdf5e0d2564162ec616ce58cf Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 12 May 2026 14:54:20 +0100 Subject: tracing: Fix nr_subbufs initialization in simple_ring_buffer_init_mm() nr_subbufs in the ring buffer metadata is always initialized to zero because it is assigned from cpu_buffer->nr_pages before the page initialization loop has run. While nr_subbufs is not currently read by the kernel, it should reflect the actual buffer geometry in the meta page for correctness. Move the assignment after the page loop so that cpu_buffer->nr_pages holds the final count. Link: https://patch.msgid.link/20260512135420.99194-1-devnexen@gmail.com Fixes: 34e5b958bdad ("tracing: Introduce simple_ring_buffer") Reviewed-by: Vincent Donnefort Assisted-by: Claude:claude-opus-4-7 Signed-off-by: David Carlier Signed-off-by: Steven Rostedt --- kernel/trace/simple_ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/simple_ring_buffer.c b/kernel/trace/simple_ring_buffer.c index 02af2297ae5a..f731f14d0ff7 100644 --- a/kernel/trace/simple_ring_buffer.c +++ b/kernel/trace/simple_ring_buffer.c @@ -395,7 +395,6 @@ int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer, memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta)); cpu_buffer->meta->meta_page_size = PAGE_SIZE; - cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages; /* The reader page is not part of the ring initially */ page = load_page(desc->page_va[0]); @@ -437,6 +436,7 @@ int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer, return ret; } + cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages; /* Close the ring */ bpage->link.next = &cpu_buffer->tail_page->link; cpu_buffer->tail_page->link.prev = &bpage->link; -- cgit v1.2.3 From a0a2f42a37f90b29d8c43374dd9c8bd2f3e7bdcc Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Tue, 12 May 2026 15:16:14 +0100 Subject: tracing: Fix unload_page for simple_ring_buffer init rollback The unload_page callback expects the return value of load_page() as its argument: ret = load_page(va); unload(ret). Fix the rollback code in simple_ring_buffer_init_mm() where the descriptor's VA is used instead of the loaded page address. Link: https://patch.msgid.link/20260512141614.1759430-1-vdonnefort@google.com Fixes: 635923081c79 ("tracing: load/unload page callbacks for simple_ring_buffer") Signed-off-by: Vincent Donnefort Signed-off-by: Steven Rostedt --- kernel/trace/simple_ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/simple_ring_buffer.c b/kernel/trace/simple_ring_buffer.c index f731f14d0ff7..f4642f5adda3 100644 --- a/kernel/trace/simple_ring_buffer.c +++ b/kernel/trace/simple_ring_buffer.c @@ -430,7 +430,7 @@ int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer, if (ret) { for (i--; i >= 0; i--) - unload_page((void *)desc->page_va[i]); + unload_page(bpages[i].page); unload_page(cpu_buffer->meta); return ret; -- cgit v1.2.3 From 057caace5214da3b457bbd295e1a2ad34d3685ea Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 20 May 2026 20:01:55 +0200 Subject: tracing: Create output file from cmd_check_undefined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the output file is currently never created, the check will run every time, even if the inputs have not changed. Create an empty output file which allows make to skip the execution when it is not necessary. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Cc: Marc Zyngier Cc: Arnd Bergmann Link: https://patch.msgid.link/20260520-tracing-ringbuffer-check-v1-1-d979cfab1338@weissschuh.net Fixes: 1211907ac0b5 ("tracing: Generate undef symbols allowlist for simple_ring_buffer") Fixes: 58b4bd18390e ("tracing: Adjust cmd_check_undefined to show unexpected undefined symbols") Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Thomas Weißschuh Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 9b0834134cae..8d3d96e847d8 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -154,7 +154,8 @@ quiet_cmd_check_undefined = NM $< echo "Unexpected symbols in $<:" >&2; \ echo "$$undefsyms" >&2; \ false; \ - fi + fi; \ + touch $@ $(obj)/%.o.checked: $(obj)/%.o $(obj)/undefsyms_base.o FORCE $(call if_changed,check_undefined) -- cgit v1.2.3 From e70ae40d6660c5428c790c329318c570b4d038ab Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 18 May 2026 11:53:17 +0200 Subject: gpio: sim: lock device when calling device_is_bound() The kerneldoc for device_is_bound() says it must be called with the device lock taken. Add missing synchronization to this driver. Fixes: 7fb3287946f9 ("gpio: sim: stop using dev-sync-probe") Link: https://patch.msgid.link/20260518-gpio-dev-lock-v1-1-cc4736f3ff0b@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index e19701c2ed67..0da2c5a45843 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -901,7 +901,7 @@ static int gpio_sim_device_activate(struct gpio_sim_device *dev) struct platform_device *pdev; struct fwnode_handle *swnode; struct gpio_sim_bank *bank; - int ret; + int ret = 0; lockdep_assert_held(&dev->lock); @@ -945,9 +945,12 @@ static int gpio_sim_device_activate(struct gpio_sim_device *dev) } wait_for_device_probe(); - if (!device_is_bound(&pdev->dev)) { - ret = -ENXIO; - goto err_unregister_pdev; + + scoped_guard(device, &pdev->dev) { + if (!device_is_bound(&pdev->dev)) { + ret = -ENXIO; + goto err_unregister_pdev; + } } dev->pdev = pdev; -- cgit v1.2.3 From 598a2b3e2e0e6aa2e9f7843c96c45b5ea11e0411 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 18 May 2026 11:53:18 +0200 Subject: gpio: aggregator: lock device when calling device_is_bound() The kerneldoc for device_is_bound() says it must be called with the device lock taken. Add missing synchronization to this driver. Fixes: 3a27f40b4570 ("gpio: aggregator: stop using dev-sync-probe") Link: https://patch.msgid.link/20260518-gpio-dev-lock-v1-2-cc4736f3ff0b@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index a9ad809708fb..bc6699a821ee 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -968,9 +968,12 @@ static int gpio_aggregator_activate(struct gpio_aggregator *aggr) } wait_for_device_probe(); - if (!device_is_bound(&pdev->dev)) { - ret = -ENXIO; - goto err_unregister_pdev; + + scoped_guard(device, &pdev->dev) { + if (!device_is_bound(&pdev->dev)) { + ret = -ENXIO; + goto err_unregister_pdev; + } } aggr->pdev = pdev; -- cgit v1.2.3 From a4fa45c1d980bc2b9837f469119af24a9304a1fc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 18 May 2026 11:53:19 +0200 Subject: gpio: virtuser: lock device when calling device_is_bound() The kerneldoc for device_is_bound() says it must be called with the device lock taken. Add missing synchronization to this driver. Fixes: c3e2a8aef28c ("gpio: virtuser: stop using dev-sync-probe") Link: https://patch.msgid.link/20260518-gpio-dev-lock-v1-3-cc4736f3ff0b@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index fe0eac920ced..128520d340d4 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -1477,9 +1477,12 @@ gpio_virtuser_device_activate(struct gpio_virtuser_device *dev) } wait_for_device_probe(); - if (!device_is_bound(&pdev->dev)) { - ret = -ENXIO; - goto err_unregister_pdev; + + scoped_guard(device, &pdev->dev) { + if (!device_is_bound(&pdev->dev)) { + ret = -ENXIO; + goto err_unregister_pdev; + } } dev->pdev = pdev; -- cgit v1.2.3 From 03d8273542146f228c0019f08b57545fdee79704 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 21 May 2026 20:58:36 +0800 Subject: efi/loongarch: Randomize kernel preferred address for KASLR Introduce efi_get_kimg_kaslr_address() helper to compute the preferred kernel image load address dynamically when CONFIG_RANDOMIZE_BASE is enabled. The function derives a random offset by using the EFI-provided randomness combined with the timer tick value, and constrains it within CONFIG_RANDOMIZE_BASE_MAX_OFFSET. Update EFI_KIMG_PREFERRED_ADDRESS to call this helper so that the EFI stub can select a randomized load address when KASLR is active, while preserving the original base address behavior when KASLR is disabled or "nokaslr" is specified. Note: LoongArch can't KASLR for hibernation, so set efi_nokaslr to true if "resume=" is explicitly specified in cmdline. Acked-by: Ard Biesheuvel Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/efi.h | 4 +++- drivers/firmware/efi/libstub/efi-stub-helper.c | 4 ++++ drivers/firmware/efi/libstub/loongarch.c | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index eddc8e79b3fa..1ad764b18c3e 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -30,6 +30,8 @@ static inline unsigned long efi_get_kimg_min_align(void) return SZ_2M; } -#define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS) +unsigned long efi_get_kimg_kaslr_address(void); + +#define EFI_KIMG_PREFERRED_ADDRESS efi_get_kimg_kaslr_address() #endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 7aa2f9ad2935..f27f2e1f0019 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -79,6 +79,10 @@ efi_status_t efi_parse_options(char const *cmdline) efi_noinitrd = true; } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) { efi_no5lvl = true; + } else if (IS_ENABLED(CONFIG_LOONGARCH) && + IS_ENABLED(CONFIG_HIBERNATION) && + !strcmp(param, "resume") && val) { + efi_nokaslr = true; /* LoongArch can't KASLR for hibernation */ } else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) && !strcmp(param, "mem_encrypt") && val) { if (parse_option_str(val, "on")) diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c index f7938d5c196a..2b0c87dc9908 100644 --- a/drivers/firmware/efi/libstub/loongarch.c +++ b/drivers/firmware/efi/libstub/loongarch.c @@ -23,6 +23,22 @@ void efi_cache_sync_image(unsigned long image_base, unsigned long alloc_size) asm volatile ("ibar 0" ::: "memory"); } +unsigned long efi_get_kimg_kaslr_address(void) +{ + unsigned int random_offset = 0; + +#ifdef CONFIG_RANDOMIZE_BASE + if (!efi_nokaslr) { + efi_get_random_bytes(sizeof(random_offset), (u8 *)&random_offset); + random_offset ^= (random_get_entropy() << 16); + random_offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1); + random_offset = ALIGN(random_offset + SZ_64K, SZ_64K); + } +#endif + + return PHYSADDR(VMLINUX_LOAD_ADDRESS) + random_offset; +} + struct exit_boot_struct { efi_memory_desc_t *runtime_map; int runtime_entry_count; -- cgit v1.2.3 From 08ade00fbb088b8f5a1af706ee970c26cf842bf0 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 21 May 2026 20:58:36 +0800 Subject: LoongArch: Skip relocation-time KASLR if already applied When the kernel is relocated during early boot (efistub or kexec_file), a randomized load address may has already been selected and applied. In this case, performing KASLR again in relocate.c is unnecessary. Note: strictly-defined KASLR means the kernel's final runtime address has a random offset from the kernel's load address, which is implemented in relocate.c; broadly-defined KALSR means the kernel's final runtime address has a random offset from the kernel's link address (a.k.a. VMLINUX_LOAD_ADDRESS), which also include the efistlub implementation, kexec_file implementation and QEMU direct kernel boot. kaslr_disabled() return true only means strictly-defined KASLR is disabled. Acked-by: Ard Biesheuvel Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 16f6a9b39659..0a045964fad5 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -134,11 +134,23 @@ early_param("nokaslr", nokaslr); #define KASLR_DISABLED_MESSAGE "KASLR is disabled by %s in %s cmdline.\n" +/* + * Note: strictly-defined KASLR means the kernel's final runtime address + * has a random offset from the kernel's load address, which is implemented + * in relocate.c; broadly-defined KALSR means the kernel's final runtime + * address has a random offset from the kernel's link address (a.k.a. + * VMLINUX_LOAD_ADDRESS), which also include the efistlub implementation, + * kexec_file implementation and QEMU direct kernel boot. kaslr_disabled() + * return true only means strictly-defined KASLR is disabled. + */ static inline __init bool kaslr_disabled(void) { char *str; const char *builtin_cmdline = CONFIG_CMDLINE; + if (kaslr_offset()) + return true; /* KASLR is performed during early boot. */ + str = strstr(builtin_cmdline, "nokaslr"); if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) { pr_info(KASLR_DISABLED_MESSAGE, "\'nokaslr\'", "built-in"); -- cgit v1.2.3 From 5b710aa89343c5217889b7788e279565b36e0e5e Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 21 May 2026 20:58:36 +0800 Subject: LoongArch: Avoid initrd overlap during kernel relocation Validate the relocation address against the initrd region specified via "initrd=" or "initrdmem=" on the command line. Reject relocation targets that overlap the initrd to prevent memory corruption during early boot. Acked-by: Ard Biesheuvel Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 0a045964fad5..4b61a9632a98 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -222,14 +222,52 @@ static inline void __init *determine_relocation_address(void) return RELOCATED_KASLR(destination); } +static unsigned long __init determine_initrd_address(unsigned long *size) +{ + unsigned long start = 0; + unsigned long key_length; + char *p, *endp, *key = "initrd="; + + key_length = strlen(key); + p = strstr(boot_command_line, key); + + if (!p) { + key = "initrdmem="; + key_length = strlen(key); + p = strstr(boot_command_line, key); + } + + if (p == boot_command_line || (p > boot_command_line && *(p - 1) == ' ')) { + p += key_length; + start = memparse(p, &endp); + if (*endp == ',') + *size = memparse(endp + 1, NULL); + } + + return start; +} + static inline int __init relocation_addr_valid(void *location_new) { + unsigned long kernel_start, kernel_size; + unsigned long initrd_start, initrd_size = 0; + if ((unsigned long)location_new & 0x00000ffff) return 0; /* Inappropriately aligned new location */ if ((unsigned long)location_new < (unsigned long)_end) return 0; /* New location overlaps original kernel */ + initrd_start = determine_initrd_address(&initrd_size); + if (initrd_start && initrd_size) { + kernel_start = PHYSADDR(location_new); + kernel_size = (unsigned long)_end - (unsigned long)_text; + + if (kernel_start < (initrd_start + initrd_size) && + initrd_start < (kernel_start + kernel_size)) + return 0; /* initrd/initramfs overlaps kernel */ + } + return 1; } #endif -- cgit v1.2.3 From 0ccc9d47cf020994097ff51827cebd04aa2b0bf4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 21 May 2026 20:58:40 +0800 Subject: LoongArch: Remove unused code to avoid build warning After commit feee6b2989165631b1 ("mm/memory_hotplug: shrink zones when offlining memory"), __remove_pages() doesn't need the "zone" parameter so the "page" variable is also unused. Remove the unused code to avoid such build warning: arch/loongarch/mm/init.c: In function 'arch_remove_memory': arch/loongarch/mm/init.c:134:22: warning: variable 'page' set but not used [-Wunused-but-set-variable=] 134 | struct page *page = pfn_to_page(start_pfn); Cc: Reviewed-by: Guo Ren Signed-off-by: Huacai Chen --- arch/loongarch/mm/init.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 3f9ab54114c5..031b39eb081c 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -123,11 +123,7 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn); - /* With altmap the first mapped page is offset from @start */ - if (altmap) - page += vmem_altmap_offset(altmap); __remove_pages(start_pfn, nr_pages, altmap); } #endif -- cgit v1.2.3 From 18e7bd9f2446664053f8c34b72abd4606d22d858 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 21 May 2026 13:30:57 +0100 Subject: ASoC: cs35l56: Fix flushing of IRQ work in cs35l56_sdw_remove() Use flush_work() instead of cancel_work_sync() to terminate pending IRQ work in cs35l56_sdw_remove(). And flush_work() again after masking the interrupts to flush any queueing that was racing with the masking. This is the same sequence as cs35l56_sdw_system_suspend(). cs35l56_sdw_interrupt() takes the pm_runtime to prevent the bus powering- down before the interrupt status can be read and handled. The work releases this pm_runtime. So cancelling it, instead of flushing, could leave an unbalanced pm_runtime. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://patch.msgid.link/20260521123057.988732-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-sdw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index d344217de7aa..88e0aac540d6 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -585,10 +585,11 @@ static void cs35l56_sdw_remove(struct sdw_slave *peripheral) /* Disable SoundWire interrupts */ cs35l56->sdw_irq_no_unmask = true; - cancel_work_sync(&cs35l56->sdw_irq_work); + flush_work(&cs35l56->sdw_irq_work); sdw_write_no_pm(peripheral, CS35L56_SDW_GEN_INT_MASK_1, 0); sdw_read_no_pm(peripheral, CS35L56_SDW_GEN_INT_STAT_1); sdw_write_no_pm(peripheral, CS35L56_SDW_GEN_INT_STAT_1, 0xFF); + flush_work(&cs35l56->sdw_irq_work); cs35l56_remove(cs35l56); } -- cgit v1.2.3 From 2e78b21864dd6e21b76160753ea632b5e758fdbd Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Fri, 24 Apr 2026 22:21:31 +0900 Subject: HID: u2fzero: free allocated URB on probe errors u2fzero_fill_in_urb() allocates dev->urb with usb_alloc_urb(), but u2fzero_probe() ignored its return value and only freed the URB from u2fzero_remove(). If LED or hwrng registration fails after the URB allocation, probe returns an error and the driver core does not call .remove(), leaking the URB. A failed URB setup was also allowed to continue probing with an unusable device. Check the URB setup result and add the missing probe-error unwind so the URB is freed before returning from later errors. Signed-off-by: Myeonghun Pak Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-u2fzero.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index 744a91e6e78c..82404b6e2d25 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -341,29 +341,33 @@ static int u2fzero_probe(struct hid_device *hdev, if (ret) return ret; - u2fzero_fill_in_urb(dev); + ret = u2fzero_fill_in_urb(dev); + if (ret) + goto err_hid_hw_stop; dev->present = true; minor = ((struct hidraw *) hdev->hidraw)->minor; ret = u2fzero_init_led(dev, minor); - if (ret) { - hid_hw_stop(hdev); - return ret; - } + if (ret) + goto err_free_urb; hid_info(hdev, "%s LED initialised\n", hw_configs[dev->hw_revision].name); ret = u2fzero_init_hwrng(dev, minor); - if (ret) { - hid_hw_stop(hdev); - return ret; - } + if (ret) + goto err_free_urb; hid_info(hdev, "%s RNG initialised\n", hw_configs[dev->hw_revision].name); return 0; + +err_free_urb: + usb_free_urb(dev->urb); +err_hid_hw_stop: + hid_hw_stop(hdev); + return ret; } static void u2fzero_remove(struct hid_device *hdev) -- cgit v1.2.3 From 6c4e001cb0567b81340270d8fc3d53e39435ae70 Mon Sep 17 00:00:00 2001 From: Kean Ren Date: Thu, 21 May 2026 11:52:27 +0800 Subject: hwmon: (lenovo-ec-sensors): Convert to devm_request_region() Replace manual request_region()/release_region() with devm_request_region(). This lets the device-managed framework handle I/O region lifetime automatically and fixes: - A double release_region() when probe fails after acquiring the I/O region: the probe error path releases it, and then lenovo_ec_init() releases it again on the same error path. - A release-after-use window in lenovo_ec_exit() where release_region() was called before platform_device_unregister(), leaving the hwmon device active with a released I/O region. - Missing release_region() in lenovo_ec_probe() if devm_hwmon_device_register_with_info() fails. Remove all four manual release_region() calls that are now handled automatically and replace request_region with devm_request_region, use dev_err replace pr_err. Also remove the now-unnecessary braces around the single-statement if body. Fixes: 70118f85e6538 ("hwmon: Add EC Chip driver for Lenovo ThinkStation motherboards") Reviewed-by: Mark Pearson Signed-off-by: Kean Ren Link: https://lore.kernel.org/r/20260521035228.533317-2-rh_king@163.com Signed-off-by: Guenter Roeck --- drivers/hwmon/lenovo-ec-sensors.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/hwmon/lenovo-ec-sensors.c b/drivers/hwmon/lenovo-ec-sensors.c index 8681bbf6665b..a16cc5e4053a 100644 --- a/drivers/hwmon/lenovo-ec-sensors.c +++ b/drivers/hwmon/lenovo-ec-sensors.c @@ -519,8 +519,8 @@ static int lenovo_ec_probe(struct platform_device *pdev) if (!ec_data) return -ENOMEM; - if (!request_region(IO_REGION_START, IO_REGION_LENGTH, "LNV-WKS")) { - pr_err(":request fail\n"); + if (!devm_request_region(dev, IO_REGION_START, IO_REGION_LENGTH, "LNV-WKS")) { + dev_err(dev, "Failed to request I/O region\n"); return -EIO; } @@ -540,10 +540,8 @@ static int lenovo_ec_probe(struct platform_device *pdev) if ((inb_p(MCHP_EMI0_EC_DATA_BYTE0) != 'M') && (inb_p(MCHP_EMI0_EC_DATA_BYTE1) != 'C') && (inb_p(MCHP_EMI0_EC_DATA_BYTE2) != 'H') && - (inb_p(MCHP_EMI0_EC_DATA_BYTE3) != 'P')) { - release_region(IO_REGION_START, IO_REGION_LENGTH); + (inb_p(MCHP_EMI0_EC_DATA_BYTE3) != 'P')) return -ENODEV; - } dmi_id = dmi_first_match(thinkstation_dmi_table); @@ -577,7 +575,6 @@ static int lenovo_ec_probe(struct platform_device *pdev) lenovo_ec_chip_info.info = lenovo_ec_hwmon_info_p8; break; default: - release_region(IO_REGION_START, IO_REGION_LENGTH); return -ENODEV; } @@ -606,10 +603,8 @@ static int __init lenovo_ec_init(void) platform_create_bundle(&lenovo_ec_sensors_platform_driver, lenovo_ec_probe, NULL, 0, NULL, 0); - if (IS_ERR(lenovo_ec_sensors_platform_device)) { - release_region(IO_REGION_START, IO_REGION_LENGTH); + if (IS_ERR(lenovo_ec_sensors_platform_device)) return PTR_ERR(lenovo_ec_sensors_platform_device); - } return 0; } @@ -617,7 +612,6 @@ module_init(lenovo_ec_init); static void __exit lenovo_ec_exit(void) { - release_region(IO_REGION_START, IO_REGION_LENGTH); platform_device_unregister(lenovo_ec_sensors_platform_device); platform_driver_unregister(&lenovo_ec_sensors_platform_driver); } -- cgit v1.2.3 From e6056b1f5a38bbe57ccbbba2609efecbd87ae08c Mon Sep 17 00:00:00 2001 From: Kean Ren Date: Thu, 21 May 2026 11:52:28 +0800 Subject: hwmon: (lenovo-ec-sensors): Fix EC "MCHP" signature validation logic The EC signature check uses && instead of || between the four byte comparisons. With &&, the condition is true only when ALL four bytes fail to match simultaneously, meaning the driver accepts a device as a valid Microchip EC if ANY single byte of the 4-byte "MCHP" signature happens to match. Due to short-circuit evaluation, if the first byte reads back as 'M' (0x4D, a very common register value), the remaining three comparisons are skipped entirely and the device is accepted. Change && to || so the check rejects devices that do not fully match the expected EC signature, as originally intended. Fixes: 70118f85e6538 ("hwmon: Add EC Chip driver for Lenovo ThinkStation motherboards") Reviewed-by: Mark Pearson Signed-off-by: Kean Ren Link: https://lore.kernel.org/r/20260521035228.533317-3-rh_king@163.com Signed-off-by: Guenter Roeck --- drivers/hwmon/lenovo-ec-sensors.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/lenovo-ec-sensors.c b/drivers/hwmon/lenovo-ec-sensors.c index a16cc5e4053a..24a182abf9a3 100644 --- a/drivers/hwmon/lenovo-ec-sensors.c +++ b/drivers/hwmon/lenovo-ec-sensors.c @@ -537,9 +537,9 @@ static int lenovo_ec_probe(struct platform_device *pdev) outw_p(MCHP_SING_IDX, MCHP_EMI0_EC_ADDRESS); mutex_unlock(&ec_data->mec_mutex); - if ((inb_p(MCHP_EMI0_EC_DATA_BYTE0) != 'M') && - (inb_p(MCHP_EMI0_EC_DATA_BYTE1) != 'C') && - (inb_p(MCHP_EMI0_EC_DATA_BYTE2) != 'H') && + if ((inb_p(MCHP_EMI0_EC_DATA_BYTE0) != 'M') || + (inb_p(MCHP_EMI0_EC_DATA_BYTE1) != 'C') || + (inb_p(MCHP_EMI0_EC_DATA_BYTE2) != 'H') || (inb_p(MCHP_EMI0_EC_DATA_BYTE3) != 'P')) return -ENODEV; -- cgit v1.2.3 From b86095e3d7dcf2bf80c747349a35912a87a85098 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Fri, 15 May 2026 15:11:47 -0700 Subject: hwmon: (pmbus/adm1266) seed timestamp from the real-time clock adm1266_set_rtc() seeds the chip's SET_RTC register from ktime_get_seconds(), which returns CLOCK_MONOTONIC -- i.e. seconds since the host last booted, not seconds since the Unix epoch. The chip stamps that value into every blackbox record it captures. Userspace reading those timestamps back expects wall-clock seconds: that's what the SET_RTC frame layout documents (datasheet Rev. D, Table 84) and what every other consumer of "seconds since epoch" assumes. Seeding from CLOCK_MONOTONIC gives blackbox records a timestamp that is only meaningful within a single boot of the host and silently resets to small values on every reboot. Switch to ktime_get_real_seconds() so the seed matches what the register is documented to hold. Fixes: 15609d189302 ("hwmon: (pmbus/adm1266) read blackbox") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260515-adm1266-fixes-v1-1-1c1ea1349cfe@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index d90f8f80be8e..a86666c73a5e 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -432,7 +432,7 @@ static int adm1266_set_rtc(struct adm1266_data *data) char write_buf[6]; int i; - kt = ktime_get_seconds(); + kt = ktime_get_real_seconds(); memset(write_buf, 0, sizeof(write_buf)); -- cgit v1.2.3 From eee213daa1e1b402eb631bcd1b8c5aa340a6b081 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Fri, 15 May 2026 15:11:48 -0700 Subject: hwmon: (pmbus/adm1266) widen blackbox-info buffer to I2C_SMBUS_BLOCK_MAX adm1266_nvmem_read_blackbox() declares a 5-byte stack buffer and passes it to i2c_smbus_read_block_data() to retrieve the 4-byte BLACKBOX_INFO response. i2c_smbus_read_block_data() does not honour caller buffer sizes -- it memcpy()s data.block[0] bytes from the SMBus transaction (where data.block[0] is the length byte returned by the slave device, up to I2C_SMBUS_BLOCK_MAX = 32): memcpy(values, &data.block[1], data.block[0]); If the device returns any block length above 5, the call overflows the caller's 5-byte stack buffer before the post-call if (ret != 4) return -EIO; check has a chance to reject the response. Widen the local buffer to I2C_SMBUS_BLOCK_MAX so the helper has room for any well-formed SMBus block response, matching the convention used by the other i2c_smbus_read_block_data() callers in this driver. Fixes: 15609d189302 ("hwmon: (pmbus/adm1266) read blackbox") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260515-adm1266-fixes-v1-2-1c1ea1349cfe@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index a86666c73a5e..94691dec1359 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -349,7 +349,7 @@ static int adm1266_nvmem_read_blackbox(struct adm1266_data *data, u8 *read_buff) { int record_count; char index; - u8 buf[5]; + u8 buf[I2C_SMBUS_BLOCK_MAX]; int ret; ret = i2c_smbus_read_block_data(data->client, ADM1266_BLACKBOX_INFO, buf); -- cgit v1.2.3 From 4afca954622d672ea65ed961bed01cf91caa034e Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Fri, 15 May 2026 15:11:49 -0700 Subject: hwmon: (pmbus/adm1266) reject implausible blackbox record_count adm1266_nvmem_read_blackbox() loops over a record_count that comes straight from byte 3 of the BLACKBOX_INFO response. The destination buffer is data->dev_mem, sized for the nvmem cell's declared 2048 bytes (ADM1266_BLACKBOX_MAX_RECORDS * ADM1266_BLACKBOX_SIZE = 32 * 64). A device that reports a record_count greater than 32 -- whether due to firmware bugs, bus corruption, or a non-responsive slave returning 0xff -- would walk read_buff past the end of the dev_mem allocation on the trailing iterations. Cap record_count at ADM1266_BLACKBOX_MAX_RECORDS (introduced here) before entering the loop and return -EIO on any larger value, so a malformed BLACKBOX_INFO response cannot drive the loop out of bounds. Fixes: 15609d189302 ("hwmon: (pmbus/adm1266) read blackbox") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260515-adm1266-fixes-v1-3-1c1ea1349cfe@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 94691dec1359..43d9e7407795 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -46,6 +46,7 @@ #define ADM1266_BLACKBOX_OFFSET 0 #define ADM1266_BLACKBOX_SIZE 64 +#define ADM1266_BLACKBOX_MAX_RECORDS 32 #define ADM1266_PMBUS_BLOCK_MAX 255 @@ -360,6 +361,8 @@ static int adm1266_nvmem_read_blackbox(struct adm1266_data *data, u8 *read_buff) return -EIO; record_count = buf[3]; + if (record_count > ADM1266_BLACKBOX_MAX_RECORDS) + return -EIO; for (index = 0; index < record_count; index++) { ret = adm1266_pmbus_block_xfer(data, ADM1266_READ_BLACKBOX, 1, &index, read_buff); -- cgit v1.2.3 From 487566cb1ccdf3756fdd7bf8d875e612ff3169bb Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Fri, 15 May 2026 15:11:50 -0700 Subject: hwmon: (pmbus/adm1266) include PEC byte in pmbus_block_xfer read buffer adm1266_pmbus_block_xfer() sets up the read transaction with .buf = data->read_buf, .len = ADM1266_PMBUS_BLOCK_MAX + 2, but read_buf in struct adm1266_data is declared as u8 read_buf[ADM1266_PMBUS_BLOCK_MAX + 1]; For a max-length block response (length byte = 255 + up to 1 PEC byte), the i2c controller is told to write 257 bytes into a 256-byte buffer, putting one byte past the end of read_buf. The same response also makes the subsequent PEC compare if (crc != msgs[1].buf[msgs[1].buf[0] + 1]) read a byte beyond the array. Bump the read_buf declaration to ADM1266_PMBUS_BLOCK_MAX + 2 so the buffer can hold the length byte, up to 255 payload bytes, and the PEC byte the i2c_msg length already accounts for. Fixes: 407dc802a9c0 ("hwmon: (pmbus/adm1266) Add Block process call") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260515-adm1266-fixes-v1-4-1c1ea1349cfe@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 43d9e7407795..5c68e3177f64 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -61,7 +61,7 @@ struct adm1266_data { u8 *dev_mem; struct mutex buf_mutex; u8 write_buf[ADM1266_PMBUS_BLOCK_MAX + 1] ____cacheline_aligned; - u8 read_buf[ADM1266_PMBUS_BLOCK_MAX + 1] ____cacheline_aligned; + u8 read_buf[ADM1266_PMBUS_BLOCK_MAX + 2] ____cacheline_aligned; }; static const struct nvmem_cell_info adm1266_nvmem_cells[] = { -- cgit v1.2.3 From 4d25342543c01310fc4e0cba7cb17c775e2421e2 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 14 May 2026 20:32:10 +0000 Subject: drm/xe/oa: Fix exec_queue leak on width check in stream open In xe_oa_stream_open_ioctl(), when param.exec_q->width > 1 the function returns -EOPNOTSUPP directly, skipping the existing err_exec_q cleanup path. The exec_queue reference obtained by xe_exec_queue_lookup() is leaked. The exec queue holds a reference on the xe_file, which is only dropped during queue teardown. The leaked lookup ref is not on the file's exec_queue xarray, so file close cannot release it. This keeps both the exec queue and the file private state pinned indefinitely. Jump to err_exec_q instead of returning directly so the reference is released. Fixes: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC") Assisted-by: Claude:claude-opus-4.6 Reviewed-by: Ashutosh Dixit Link: https://patch.msgid.link/20260514203210.593488-1-shuicheng.lin@intel.com Signed-off-by: Shuicheng Lin (cherry picked from commit 339fa0be9e4a5d69fa47e91f4a36574224fb478f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6337e671c97a..d908f4e03906 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2032,8 +2032,10 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) - return -EOPNOTSUPP; + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) { + ret = -EOPNOTSUPP; + goto err_exec_q; + } } /* -- cgit v1.2.3 From b0ddda571d15528e6caee7090beff4a66dfdb1a2 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Tue, 12 May 2026 11:56:28 -0700 Subject: hwmon: (pmbus/adm1266) include adapter number in GPIO line label Platforms that fit more than one ADM1266 on different I2C buses at the same 7-bit slave address (a common shelf-management pattern, e.g. one device per power domain) end up with duplicate GPIO line labels because the existing format only includes the slave address. Including the adapter number disambiguates them. The adapter number is formatted as decimal to match the i2c-N convention used elsewhere in Linux (sysfs paths, dev nodes); the slave address keeps its conventional hexadecimal form. The label is purely informational (visible via gpioinfo and the gpiochip /sys/class/gpio name); no DT or ABI consumer parses it. Fixes: d98dfad35c38c ("hwmon: (pmbus/adm1266) Add support for GPIOs") Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260512-adm1266-v3-5-a81a479b0bb0@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 5c68e3177f64..9820dc9e1f2e 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -291,8 +291,9 @@ static int adm1266_config_gpio(struct adm1266_data *data) int i; for (i = 0; i < ARRAY_SIZE(data->gpio_names); i++) { - gpio_name = devm_kasprintf(&data->client->dev, GFP_KERNEL, "adm1266-%x-%s", - data->client->addr, adm1266_names[i]); + gpio_name = devm_kasprintf(&data->client->dev, GFP_KERNEL, "adm1266-%d-%x-%s", + data->client->adapter->nr, data->client->addr, + adm1266_names[i]); if (!gpio_name) return -ENOMEM; -- cgit v1.2.3 From 43cae21424ff8e33894a0f86c6b80b840c049fd7 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Fri, 15 May 2026 15:11:51 -0700 Subject: hwmon: (pmbus/adm1266) bounce blackbox records through a protocol-sized buffer adm1266_pmbus_block_xfer() copies the device-supplied block payload into the caller-provided buffer using the device-supplied length: memcpy(data_r, &msgs[1].buf[1], msgs[1].buf[0]); The helper does not know how large data_r is and trusts the device to return at most one record's worth of bytes. adm1266_nvmem_read_blackbox() violates that contract: it advances read_buff inside data->dev_mem in ADM1266_BLACKBOX_SIZE (64-byte) strides while the helper is willing to write up to ADM1266_PMBUS_BLOCK_MAX (255) bytes. A device that returns more than 64 bytes on the trailing record (read_buff offset 1984 in the 2048-byte dev_mem allocation) overflows dev_mem by up to 191 bytes before the post-call if (ret != ADM1266_BLACKBOX_SIZE) return -EIO; can reject the response. Contain the fix in the caller without changing the helper signature: read each record into a 255-byte local bounce buffer that matches the helper's maximum output, validate the returned length, and only then copy exactly ADM1266_BLACKBOX_SIZE bytes into the dev_mem slot. Fixes: 407dc802a9c0 ("hwmon: (pmbus/adm1266) Add Block process call") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260515-adm1266-fixes-v1-5-1c1ea1349cfe@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 9820dc9e1f2e..1fcd86ee96fe 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -349,6 +349,7 @@ static void adm1266_init_debugfs(struct adm1266_data *data) static int adm1266_nvmem_read_blackbox(struct adm1266_data *data, u8 *read_buff) { + u8 record[ADM1266_PMBUS_BLOCK_MAX]; int record_count; char index; u8 buf[I2C_SMBUS_BLOCK_MAX]; @@ -366,13 +367,14 @@ static int adm1266_nvmem_read_blackbox(struct adm1266_data *data, u8 *read_buff) return -EIO; for (index = 0; index < record_count; index++) { - ret = adm1266_pmbus_block_xfer(data, ADM1266_READ_BLACKBOX, 1, &index, read_buff); + ret = adm1266_pmbus_block_xfer(data, ADM1266_READ_BLACKBOX, 1, &index, record); if (ret < 0) return ret; if (ret != ADM1266_BLACKBOX_SIZE) return -EIO; + memcpy(read_buff, record, ADM1266_BLACKBOX_SIZE); read_buff += ADM1266_BLACKBOX_SIZE; } -- cgit v1.2.3 From d7834d92251baade796812876e95555e2066fa9f Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:25 -0700 Subject: hwmon: (pmbus/adm1266) cap PDIO scan in get_multiple at ADM1266_PDIO_NR adm1266_gpio_get_multiple() iterates the PDIO portion of the caller-supplied mask using for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_STATUS) { ... } where ADM1266_PDIO_STATUS is the PMBus command code (0xE9, i.e. 233), not the number of PDIO pins. The intended upper bound is ADM1266_GPIO_NR + ADM1266_PDIO_NR = 25. gpiolib hands in a mask sized for gc.ngpio (= 25 bits on this chip), so the iteration walks find_next_bit() up to 242, reading up to 217 extra bits (a handful of unsigned-long words: four on 64-bit, seven on 32-bit) of whatever lives past the end of the mask in the caller's stack. Any incidental set bit in that range then drives a set_bit(gpio_nr, bits) call that writes past the end of the caller-supplied bits array too -- both out-of-bounds. Substitute ADM1266_PDIO_NR for the constant so the scan stops at the last real PDIO bit. Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Reviewed-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-1-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 1fcd86ee96fe..4b2a8a765a4e 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -212,7 +212,7 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask status = read_buf[0] + (read_buf[1] << 8); *bits = 0; - for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_STATUS) { + for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_NR) { if (test_bit(gpio_nr - ADM1266_GPIO_NR, &status)) set_bit(gpio_nr, bits); } -- cgit v1.2.3 From 3327a12aee9e10ffa903e28b8445dfd1af5307c0 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:26 -0700 Subject: hwmon: (pmbus/adm1266) don't clobber GPIO bits before PDIO read in get_multiple adm1266_gpio_get_multiple() zeroes *bits before the GPIO_STATUS loop and then a second time before the PDIO_STATUS loop: *bits = 0; for_each_set_bit(gpio_nr, mask, ADM1266_GPIO_NR) { ... set_bit(gpio_nr, bits); } ret = i2c_smbus_read_block_data(data->client, ADM1266_PDIO_STATUS, ...); ... *bits = 0; for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_NR) { ... set_bit(gpio_nr, bits); } The second *bits = 0 throws away every GPIO bit the first loop just populated, so callers asking for any combination of GPIO and PDIO pins always see the GPIO portion of the returned bits as zero. Drop the redundant second assignment so both halves of the result survive. Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Reviewed-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-2-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 4b2a8a765a4e..3bf512df30cd 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -211,7 +211,6 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask status = read_buf[0] + (read_buf[1] << 8); - *bits = 0; for_each_set_bit_from(gpio_nr, mask, ADM1266_GPIO_NR + ADM1266_PDIO_NR) { if (test_bit(gpio_nr - ADM1266_GPIO_NR, &status)) set_bit(gpio_nr, bits); -- cgit v1.2.3 From a7232f68c43ca62f545049b7f5fbfc75137b843b Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:27 -0700 Subject: hwmon: (pmbus/adm1266) reject short block-read responses in the GPIO accessors adm1266_gpio_get() and adm1266_gpio_get_multiple() both compose the pin-status word as pins_status = read_buf[0] + (read_buf[1] << 8); right after i2c_smbus_read_block_data(), guarding only against an error return. A well-behaved device returns 2 bytes for GPIO_STATUS/PDIO_STATUS, but the helper happily reports a 0- or 1-byte response too. If the device returns 0 bytes, both read_buf slots are uninitialized stack memory; if it returns 1 byte, read_buf[1] is. The composed value then flows through set_bit() into the caller's *bits in adm1266_gpio_get_multiple(), or into the return value of adm1266_gpio_get(), and ends up in userspace via gpiolib (sysfs and the char-dev ioctls). That leaks a few bits of kernel stack per request on any device whose firmware glitch, bus error, or hostile slave produces a short block-read response. Add the missing length check to both call sites and surface a short response as -EIO. Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-3-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 3bf512df30cd..6f904a5a8ea6 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -176,6 +176,8 @@ static int adm1266_gpio_get(struct gpio_chip *chip, unsigned int offset) ret = i2c_smbus_read_block_data(data->client, pmbus_cmd, read_buf); if (ret < 0) return ret; + if (ret < 2) + return -EIO; pins_status = read_buf[0] + (read_buf[1] << 8); if (offset < ADM1266_GPIO_NR) @@ -196,6 +198,8 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask ret = i2c_smbus_read_block_data(data->client, ADM1266_GPIO_STATUS, read_buf); if (ret < 0) return ret; + if (ret < 2) + return -EIO; status = read_buf[0] + (read_buf[1] << 8); @@ -208,6 +212,8 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask ret = i2c_smbus_read_block_data(data->client, ADM1266_PDIO_STATUS, read_buf); if (ret < 0) return ret; + if (ret < 2) + return -EIO; status = read_buf[0] + (read_buf[1] << 8); -- cgit v1.2.3 From 491403b9b76cf66abd81301c5901aa4a4549f1e8 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:28 -0700 Subject: hwmon: (pmbus/adm1266) register the gpio_chip after pmbus_do_probe() adm1266_probe() calls adm1266_config_gpio() -- which goes on to devm_gpiochip_add_data() and exposes the gpio_chip callbacks to gpiolib -- before pmbus_do_probe() has initialised the per-client PMBus state (notably the pmbus_lock mutex the core hands out via pmbus_get_data()). That ordering is already a latent hazard: any GPIO access that lands between adm1266_config_gpio() and the end of pmbus_do_probe() (for example a sysfs read from a user space agent that opens the gpiochip the instant gpiolib advertises it) races pmbus_do_probe()'s own device accesses with no serialisation. Move adm1266_config_gpio() down past pmbus_do_probe() so the chip isn't reachable from userspace until the PMBus state it depends on is fully initialised. Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-4-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 6f904a5a8ea6..c2ac5d228b7d 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -473,10 +473,6 @@ static int adm1266_probe(struct i2c_client *client) crc8_populate_msb(pmbus_crc_table, 0x7); mutex_init(&data->buf_mutex); - ret = adm1266_config_gpio(data); - if (ret < 0) - return ret; - ret = adm1266_set_rtc(data); if (ret < 0) return ret; @@ -489,6 +485,10 @@ static int adm1266_probe(struct i2c_client *client) if (ret) return ret; + ret = adm1266_config_gpio(data); + if (ret < 0) + return ret; + adm1266_init_debugfs(data); return 0; -- cgit v1.2.3 From 6af713af91d5c34ec049eb3cc2c5b3f5eba953b8 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:29 -0700 Subject: hwmon: (pmbus/adm1266) register the nvmem device after pmbus_do_probe() adm1266_probe() calls adm1266_config_nvmem() -- which goes on to devm_nvmem_register() and exposes adm1266_nvmem_read() to userspace -- before pmbus_do_probe() has initialised the per-client PMBus state. Same latent hazard as the gpio_chip one fixed in the previous patch: once the nvmem device is registered, gpiolib's nvmem char-dev / sysfs interface is reachable, and any concurrent read triggers adm1266_nvmem_read() -> adm1266_nvmem_read_blackbox(), which issues PMBus traffic that races pmbus_do_probe()'s own device accesses with no serialisation. Move adm1266_config_nvmem() down past pmbus_do_probe() so the nvmem device isn't reachable from userspace until the PMBus state the nvmem accessors depend on is fully initialised. Fixes: 15609d189302 ("hwmon: (pmbus/adm1266) read blackbox") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-5-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index c2ac5d228b7d..3e8f2619cb9b 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -477,14 +477,14 @@ static int adm1266_probe(struct i2c_client *client) if (ret < 0) return ret; - ret = adm1266_config_nvmem(data); - if (ret < 0) - return ret; - ret = pmbus_do_probe(client, &data->info); if (ret) return ret; + ret = adm1266_config_nvmem(data); + if (ret < 0) + return ret; + ret = adm1266_config_gpio(data); if (ret < 0) return ret; -- cgit v1.2.3 From bab8c6fb5af8df7e753d196c1262cb78e92ca872 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:30 -0700 Subject: hwmon: (pmbus/adm1266) serialize GPIO PMBus accesses with pmbus_lock adm1266_gpio_get(), adm1266_gpio_get_multiple(), and adm1266_gpio_dbg_show() all issue PMBus reads against the device but none of them take pmbus_lock. The pmbus_core framework holds pmbus_lock around its own multi-transaction sequences (notably the "set PAGE, then read paged register" pattern used by hwmon attributes), so an unlocked GPIO accessor can land between a PAGE write and the subsequent paged read in another thread and corrupt either side's view of the device state machine. Take pmbus_lock at the top of each of the three accessors via the scope-based guard(). The lock is uncontended in the common case and adds only a single mutex round-trip per call. Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-6-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 3e8f2619cb9b..0eef58dd69a6 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -173,6 +173,8 @@ static int adm1266_gpio_get(struct gpio_chip *chip, unsigned int offset) else pmbus_cmd = ADM1266_PDIO_STATUS; + guard(pmbus_lock)(data->client); + ret = i2c_smbus_read_block_data(data->client, pmbus_cmd, read_buf); if (ret < 0) return ret; @@ -195,6 +197,8 @@ static int adm1266_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask unsigned int gpio_nr; int ret; + guard(pmbus_lock)(data->client); + ret = i2c_smbus_read_block_data(data->client, ADM1266_GPIO_STATUS, read_buf); if (ret < 0) return ret; @@ -236,6 +240,8 @@ static void adm1266_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) int ret; int i; + guard(pmbus_lock)(data->client); + for (i = 0; i < ADM1266_GPIO_NR; i++) { write_cmd = adm1266_gpio_mapping[i][1]; ret = adm1266_pmbus_block_xfer(data, ADM1266_GPIO_CONFIG, 1, &write_cmd, read_buf); -- cgit v1.2.3 From 9f1dd8f9491eb840cbea7ffdf4cad031e25f8ae0 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:31 -0700 Subject: hwmon: (pmbus/adm1266) serialize NVMEM blackbox read with pmbus_lock adm1266_nvmem_read() is the reg_read callback the NVMEM core invokes when userspace reads /sys/bus/nvmem/devices/.../nvmem on this chip. On the first byte of every read it does a memset of data->dev_mem, walks the device blackbox through adm1266_nvmem_read_blackbox() (which issues a chain of PMBus block transactions), and then memcpys the refreshed buffer out to userspace. None of that runs under pmbus_lock today. Two consequences: - The PMBus traffic the refresh issues is not serialised against pmbus_core's own multi-step PAGE+register sequences. A paged hwmon attribute read from another thread can land between a PAGE write and the paged read in either direction and corrupt one side's view of the device state machine. - The NVMEM core does not serialise concurrent reg_read calls, so two userspace readers racing at offset 0 can interleave the memset of data->dev_mem with another reader's adm1266_nvmem_read_blackbox() refill or memcpy out, returning torn data to userspace. Take pmbus_lock at the top of adm1266_nvmem_read() via the scope-based guard(). Patch 5 of this series moves adm1266_config_nvmem() past pmbus_do_probe() so the lock is guaranteed to be live before the callback is reachable from userspace. Fixes: 15609d189302 ("hwmon: (pmbus/adm1266) read blackbox") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-7-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 0eef58dd69a6..5ddca5701032 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -400,6 +400,8 @@ static int adm1266_nvmem_read(void *priv, unsigned int offset, void *val, size_t if (offset + bytes > data->nvmem_config.size) return -EINVAL; + guard(pmbus_lock)(data->client); + if (offset == 0) { memset(data->dev_mem, 0, data->nvmem_config.size); -- cgit v1.2.3 From 4e4af55aaca7f6d7673d5f9889ad0529db86a048 Mon Sep 17 00:00:00 2001 From: Abdurrahman Hussain Date: Mon, 18 May 2026 17:52:32 -0700 Subject: hwmon: (pmbus/adm1266) serialize sequencer_state debugfs read with pmbus_lock adm1266_state_read() backs the sequencer_state debugfs entry and issues an i2c_smbus_read_word_data(client, ADM1266_READ_STATE) against the device without taking pmbus_lock. pmbus_core holds pmbus_lock around its own multi-transaction sequences (notably the "set PAGE, then read paged register" pattern used by hwmon attributes), so an unlocked debugfs reader can land between a PAGE write and the subsequent paged read in another thread. READ_STATE itself is not paged, so it cannot corrupt PAGE in flight, but the same defensive serialisation that applies to the GPIO accessors applies here: any direct device access from outside pmbus_core should be ordered with respect to pmbus_core's own. Take pmbus_lock at the top of adm1266_state_read() via the scope-based guard(). Fixes: ed1ff457e187 ("hwmon: (pmbus/adm1266) add debugfs for states") Cc: stable@vger.kernel.org Signed-off-by: Abdurrahman Hussain Link: https://lore.kernel.org/r/20260518-adm1266-gpio-fixes-v3-8-e425e4f88139@nexthop.ai Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index 5ddca5701032..6f6ad7b20e9a 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -335,6 +335,7 @@ static int adm1266_state_read(struct seq_file *s, void *pdata) struct i2c_client *client = to_i2c_client(dev); int ret; + guard(pmbus_lock)(client); ret = i2c_smbus_read_word_data(client, ADM1266_READ_STATE); if (ret < 0) return ret; -- cgit v1.2.3 From 67a52d3ebb5a0ae0c0e23ffa99470d9463179c9f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 21 May 2026 13:25:09 +0100 Subject: ASoC: cs-amp-lib: Fix wrong sizeof() in _cs_amp_set_efi_calibration_data() When calculating data->count replace the incorrect sizeof(data) with use of struct_offset(). The faulty sizeof(data) was incorrectly calculating the size of the pointer instead of the size of the struct pointed to. As it happens, both values are 8 on a 64-bit CPU. In the unlikely event of using this code on a 32-bit CPU the number of available bytes would be calculated 4 larger than is actually available. Instead of changing to sizeof(*data) it has been replaced by struct_offset() because it has better chance of detecting these sorts of typos. Also the offset of the data[] array is actually what we want to know here anyway. Signed-off-by: Richard Fitzgerald Fixes: 2b62e66626f0 ("ASoC: cs-amp-lib: Add function to write calibration to UEFI") Link: https://patch.msgid.link/20260521122511.987322-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index b34b1f5f121f..881c6f2264f3 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -500,7 +500,7 @@ static int _cs_amp_set_efi_calibration_data(struct device *dev, int amp_index, i * must be set. */ if (data->count == 0) - data->count = (data->size - sizeof(data)) / sizeof(data->data[0]); + data->count = (data->size - struct_offset(data, data)) / sizeof(data->data[0]); if (amp_index < 0) { /* Is there already a slot for this target? */ -- cgit v1.2.3 From ba28a07a9a0b53a538c809e04e517e1ce1f1bee3 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 21 May 2026 13:25:10 +0100 Subject: ASoC: cs-amp-lib: Fix missing dput() after debugfs_lookup() Rewrite cs_amp_create_debugfs() so that dput() will be called on a valid dentry returned from debugfs_lookup(). The pointer returned from debugfs_lookup() must be released by dput(). The pointer returned from debugfs_create_dir() does not need to be passed to dput(). Signed-off-by: Richard Fitzgerald Fixes: cdd27fa3298a ("ASoC: cs-amp-lib: Add helpers for factory calibration") Link: https://patch.msgid.link/20260521122511.987322-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 881c6f2264f3..e97a125ebbb3 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -833,11 +833,18 @@ EXPORT_SYMBOL_NS_GPL(cs_amp_devm_get_vendor_specific_variant_id, "SND_SOC_CS_AMP */ struct dentry *cs_amp_create_debugfs(struct device *dev) { - struct dentry *dir; + struct dentry *dir, *created; + /* debugfs_lookup() can return NULL or ERR_PTR on error */ dir = debugfs_lookup("cirrus_logic", NULL); - if (!dir) - dir = debugfs_create_dir("cirrus_logic", NULL); + if (!IS_ERR_OR_NULL(dir)) { + created = debugfs_create_dir(dev_name(dev), dir); + dput(dir); + + return created; + } + + dir = debugfs_create_dir("cirrus_logic", NULL); return debugfs_create_dir(dev_name(dev), dir); } -- cgit v1.2.3 From a685252686851633b795bc30facac8a39344ae09 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 21 May 2026 13:25:11 +0100 Subject: ASoC: cs-amp-lib: Fix typo in error message: write -> read Fix the error message in cs_amp_read_cal_coeff() to say "Failed to read". It was incorrectly "Failed to write", probably a copy-paste error. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20260521122511.987322-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index e97a125ebbb3..fb5b950e584c 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -118,7 +118,7 @@ static int cs_amp_read_cal_coeff(struct cs_dsp *dsp, } if (ret < 0) { - dev_err(dsp->dev, "Failed to write to '%s': %d\n", ctl_name, ret); + dev_err(dsp->dev, "Failed to read '%s': %d\n", ctl_name, ret); return ret; } -- cgit v1.2.3 From dd2147375a8fe7c5bc3f1f1b1d3a9567c26faefa Mon Sep 17 00:00:00 2001 From: Liu Kai Date: Thu, 7 May 2026 16:32:04 +0800 Subject: HID: remove duplicate hid_warn_ratelimited definition The hid_warn_ratelimited macro is defined twice in include/linux/hid.h: - first one added by commit 4051ead99888 ("HID: rate-limit hid_warn to prevent log flooding") - second one added by commit 1d64624243af ("HID: core: Add printk_ratelimited variants to hid_warn() etc")). The second definition is correctly grouped with other ratelimited macros. Remove the duplicate definition. Fixes: 1d64624243af ("HID: core: Add printk_ratelimited variants to hid_warn() etc") Signed-off-by: Liu Kai [bentiss: edited commit message] Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/hid.h b/include/linux/hid.h index bfb9859f391e..47dc0bc89fa4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1316,8 +1316,6 @@ void hid_quirks_exit(__u16 bus); dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_warn(hid, fmt, ...) \ dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__) -#define hid_warn_ratelimited(hid, fmt, ...) \ - dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_info(hid, fmt, ...) \ dev_info(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_dbg(hid, fmt, ...) \ -- cgit v1.2.3 From c68337442f03953237a94577beb468ab2662a851 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Tue, 19 May 2026 17:18:05 +0800 Subject: cifs: Fix busy dentry used after unmounting Since commit 340cea84f691c ("cifs: open files should not hold ref on superblock"), cifs file only holds the dentry ref_cnt, the cifs file close work(cfile->deferred) could be executed after unmounting, which will trigger a warning in generic_shutdown_super: BUG: Dentry 00000000a14a6845{i=c,n=file} still in use (1) [unmount of cifs cifs] The detailed processs is: process A process B kworker fd = open(PATH) vfs_open file->__f_path = *path // dentry->d_lockref.count = 1 cifs_open cifs_new_fileinfo cfile->dentry = dget(dentry) // dentry->d_lockref.count = 2 close(fd) __fput cifs_close queue_delayed_work(deferredclose_wq, cfile->deferred) dput(dentry) // dentry->d_lockref.count = 1 smb2_deferred_work_close _cifsFileInfo_put list_del(&cifs_file->flist) umount cleanup_mnt deactivate_super cifs_kill_sb cifs_close_all_deferred_files_sb cifs_close_all_deferred_files // cannot find cfile, skip _cifsFileInfo_put kill_anon_super generic_shutdown_super shrink_dcache_for_umount umount_check WARN ! // dentry->d_lockref.count = 1 cifsFileInfo_put_final dput(cifs_file->dentry) // dentry->d_lockref.count = 0 Fix it by flushing 'deferredclose_wq' before calling kill_anon_super. Fetch a reproducer in https://bugzilla.kernel.org/show_bug.cgi?id=221548. Fixes: 340cea84f691c ("cifs: open files should not hold ref on superblock") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Zhihao Cheng Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 9f76b0347fa9..06ebad58be0e 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -306,6 +306,8 @@ static void cifs_kill_sb(struct super_block *sb) /* Wait for all pending oplock breaks to complete */ flush_workqueue(cifsoplockd_wq); + /* Wait for all opened files to release */ + flush_workqueue(deferredclose_wq); /* finally release root dentry */ dput(cifs_sb->root); -- cgit v1.2.3 From fbc15231181669b51c0103fd74c9c2ac2756ab56 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 20 May 2026 20:03:58 +0200 Subject: smb: smbdirect: divide, not multiply, milliseconds by 1000 Unless smbdirect_connection_legacy_debug_proc_show() wants to debug-log keep_alive_interval as microseconds, a magnitude higher precision than available by the way, keepalive_interval_msec should not be multiplied by 1000. Fixes: cc55f65dd352 ("smb: client: make use of common smbdirect_socket_parameters") Reviewed-by: Stefan Metzmacher Signed-off-by: Alexander A. Klimov Signed-off-by: Steve French --- fs/smb/smbdirect/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/smbdirect/debug.c b/fs/smb/smbdirect/debug.c index 05ba7c8d165e..3445843445bf 100644 --- a/fs/smb/smbdirect/debug.c +++ b/fs/smb/smbdirect/debug.c @@ -40,7 +40,7 @@ void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, seq_puts(m, "\n"); seq_printf(m, "Conn keep_alive_interval: %u ", - sp->keepalive_interval_msec * 1000); + sp->keepalive_interval_msec / 1000); seq_printf(m, "max_readwrite_size: %u rdma_readwrite_threshold: %u", sp->max_read_write_size, rdma_readwrite_threshold); -- cgit v1.2.3 From dbc81608e3a653dea6cf403f20cae35468b8ab9c Mon Sep 17 00:00:00 2001 From: Zijing Yin Date: Tue, 19 May 2026 10:26:33 -0700 Subject: phonet/pep: disable BH around forwarded sk_receive_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The networking receive path is usually run from softirq context, but protocols that take the socket lock may have packets stored in the backlog and processed later from process context. In that case release_sock() -> __release_sock() drops the slock with spin_unlock_bh() and then calls sk->sk_backlog_rcv() with bottom halves enabled. Typical sk_backlog_rcv handlers process the socket whose backlog is being drained, so the BH state at entry is irrelevant for the slocks they touch. pep_do_rcv() is different: when the inbound skb targets an existing PEP pipe, it forwards the skb to a different *child* socket via sk_receive_skb(). That helper takes the child slock with bh_lock_sock_nested(), which is just spin_lock_nested() and assumes BH is already off. The same child slock therefore ends up acquired with BH on (process path) and with BH off (softirq path): process context softirq context --------------- --------------- release_sock(listener) __netif_receive_skb() __release_sock() phonet_rcv() spin_unlock_bh() __sk_receive_skb(listener) [BH now ENABLED] [BH already disabled] sk_backlog_rcv: sk_backlog_rcv: pep_do_rcv() pep_do_rcv() sk_receive_skb(child) sk_receive_skb(child) bh_lock_sock_nested(child) bh_lock_sock_nested(child) => SOFTIRQ-ON-W => IN-SOFTIRQ-W Lockdep flags this as inconsistent lock state, and it can become a real self-deadlock if a softirq on the same CPU tries to receive to the same child socket while its slock is held in the BH-enabled path: WARNING: inconsistent lock state inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. (slock-AF_PHONET/1){+.?.}-{3:3}, at: __sk_receive_skb+0x1cf/0x900 __sk_receive_skb net/core/sock.c:563 sk_receive_skb include/net/sock.h:2022 [inline] pep_do_rcv net/phonet/pep.c:675 sk_backlog_rcv include/net/sock.h:1190 __release_sock net/core/sock.c:3216 release_sock net/core/sock.c:3815 pep_sock_accept net/phonet/pep.c:879 Wrap the forwarded sk_receive_skb() in local_bh_disable() / local_bh_enable() so the child slock is always acquired with BH off. local_bh_disable() nests safely on the softirq path. Discovered via in-house syzkaller fuzzing; the same root cause also on the linux-6.1.y syzbot dashboard as extid 44f0626dd6284f02663c. Reproduced under KASAN + LOCKDEP + PROVE_LOCKING, reproducer: https://pastebin.com/A3t8xzCR Fixes: 9641458d3ec4 ("Phonet: Pipe End Point for Phonet Pipes protocol") Link: https://syzkaller.appspot.com/bug?extid=44f0626dd6284f02663c Cc: stable@vger.kernel.org Signed-off-by: Zijing Yin Acked-by: Rémi Denis-Courmont Reported-by: syzbot+9f4a135646b66c509935@syzkaller.appspotmail.com Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260519172635.86304-1-yzjaurora@gmail.com Signed-off-by: Jakub Kicinski --- net/phonet/pep.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4dbf0914df7d..706927139393 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -671,8 +671,23 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) /* Look for an existing pipe handle */ sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); - if (sknode) - return sk_receive_skb(sknode, skb, 1); + if (sknode) { + int rc; + + /* pep_do_rcv() runs from two contexts: from softirq via + * phonet_rcv() -> __sk_receive_skb() with BH disabled, + * and from process context via + * release_sock() -> __release_sock(), which drops + * the listener slock with spin_unlock_bh() before draining + * the backlog. The child pipe slock is taken below via + * bh_lock_sock_nested(), which does not itself disable BH, so + * disable BH here to keep both acquire contexts consistent. + */ + local_bh_disable(); + rc = sk_receive_skb(sknode, skb, 1); + local_bh_enable(); + return rc; + } switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: -- cgit v1.2.3 From 92cc6708f4a2ce15433b8355f363d446429ba88c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 20 May 2026 11:34:43 +1000 Subject: selftests: rds: config: disable modules The run.sh script explicitly checks that CONFIG_MODULES is disabled. By default, this config option is enabled. Explicitly disable it to be able to run the RDS tests. Note that writing '# CONFIG_(...) is not set' is usually recommended to disable an option in the .config, but it looks like selftests usually set 'CONFIG_(...)=n', which looks clearer. Fixes: 0f5d68004780 ("selftests: rds: add tools/testing/selftests/net/rds/config") Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Allison Henderson Link: https://patch.msgid.link/20260520-net-rds-config-modules-v1-1-2100df02fe9a@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/rds/config b/tools/testing/selftests/net/rds/config index 97db7ecb892a..3d62d0c750a8 100644 --- a/tools/testing/selftests/net/rds/config +++ b/tools/testing/selftests/net/rds/config @@ -1,3 +1,4 @@ +CONFIG_MODULES=n CONFIG_NET_NS=y CONFIG_NET_SCH_NETEM=y CONFIG_RDS=y -- cgit v1.2.3 From c1a0ecbf32c4b397353204e2ec94c5bb9f3300ed Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Tue, 19 May 2026 17:25:29 +0530 Subject: usb: dwc3: xilinx: fix error handling in zynqmp init error paths Fix error handling and resource cleanup i.e remove invalid phy_exit() after failed phy_init(), route failures through proper cleanup paths and return 0 explicitly on success. Fixes: 84770f028fab ("usb: dwc3: Add driver for Xilinx platforms") Cc: stable@vger.kernel.org Acked-by: Thinh Nguyen Signed-off-by: Radhey Shyam Pandey Link: https://patch.msgid.link/20260519115529.2980421-1-radhey.shyam.pandey@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-xilinx.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index f41b0da5e89d..9b9525592a85 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -184,15 +184,13 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) } ret = phy_init(priv_data->usb3_phy); - if (ret < 0) { - phy_exit(priv_data->usb3_phy); + if (ret < 0) goto err; - } ret = reset_control_deassert(apbrst); if (ret < 0) { dev_err(dev, "Failed to release APB reset\n"); - goto err; + goto err_phy_exit; } if (priv_data->usb3_phy) { @@ -208,26 +206,24 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) ret = reset_control_deassert(crst); if (ret < 0) { dev_err(dev, "Failed to release core reset\n"); - goto err; + goto err_phy_exit; } ret = reset_control_deassert(hibrst); if (ret < 0) { dev_err(dev, "Failed to release hibernation reset\n"); - goto err; + goto err_phy_exit; } ret = phy_power_on(priv_data->usb3_phy); - if (ret < 0) { - phy_exit(priv_data->usb3_phy); - goto err; - } + if (ret < 0) + goto err_phy_exit; /* ulpi reset via gpio-modepin or gpio-framework driver */ reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset_gpio)) { - return dev_err_probe(dev, PTR_ERR(reset_gpio), - "Failed to request reset GPIO\n"); + ret = PTR_ERR(reset_gpio); + goto err_phy_power_off; } if (reset_gpio) { @@ -237,6 +233,13 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) } dwc3_xlnx_set_coherency(priv_data, XLNX_USB_TRAFFIC_ROUTE_CONFIG); + + return 0; + +err_phy_power_off: + phy_power_off(priv_data->usb3_phy); +err_phy_exit: + phy_exit(priv_data->usb3_phy); err: return ret; } -- cgit v1.2.3 From ca927fc45e4906bdab42426da044fba4d3584f34 Mon Sep 17 00:00:00 2001 From: Felix Gu Date: Tue, 28 Apr 2026 21:18:21 +0800 Subject: usb: typec: fusb302: Fix resource leak when devm_drm_dp_hpd_bridge_add() fails If devm_drm_dp_hpd_bridge_add() fails during fusb302_probe(), the original code returned directly without cleaning up the resources. Move bridge registration before the IRQ is requested and route bridge registration failures through the existing TCPM unregister and fwnode cleanup path. Fixes: 5d79c525405d ("usb: typec: fusb302: add DRM DP HPD bridge support") Signed-off-by: Felix Gu Reviewed-by: Heikki Krogerus Reviewed-by: Sebastian Reichel Link: https://patch.msgid.link/20260428-fusb-v2-1-aa3b5942cabb@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/fusb302.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index 889c4c29c1b8..9ab1277b7ed1 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -1751,19 +1751,22 @@ static int fusb302_probe(struct i2c_client *client) bridge_dev = devm_drm_dp_hpd_bridge_alloc(chip->dev, to_of_node(chip->tcpc_dev.fwnode)); if (IS_ERR(bridge_dev)) { - ret = PTR_ERR(bridge_dev); - dev_err_probe(chip->dev, ret, "failed to alloc bridge\n"); - goto destroy_workqueue; + ret = dev_err_probe(chip->dev, PTR_ERR(bridge_dev), + "failed to alloc bridge\n"); + goto fwnode_put; } chip->tcpm_port = tcpm_register_port(&client->dev, &chip->tcpc_dev); if (IS_ERR(chip->tcpm_port)) { - fwnode_handle_put(chip->tcpc_dev.fwnode); ret = dev_err_probe(dev, PTR_ERR(chip->tcpm_port), "cannot register tcpm port\n"); - goto destroy_workqueue; + goto fwnode_put; } + ret = devm_drm_dp_hpd_bridge_add(chip->dev, bridge_dev); + if (ret) + goto tcpm_unregister_port; + ret = request_threaded_irq(chip->gpio_int_n_irq, NULL, fusb302_irq_intn, IRQF_ONESHOT | IRQF_TRIGGER_LOW, "fsc_interrupt_int_n", chip); @@ -1774,14 +1777,11 @@ static int fusb302_probe(struct i2c_client *client) enable_irq_wake(chip->gpio_int_n_irq); i2c_set_clientdata(client, chip); - ret = devm_drm_dp_hpd_bridge_add(chip->dev, bridge_dev); - if (ret) - return ret; - - return ret; + return 0; tcpm_unregister_port: tcpm_unregister_port(chip->tcpm_port); +fwnode_put: fwnode_handle_put(chip->tcpc_dev.fwnode); destroy_workqueue: fusb302_debugfs_exit(chip); -- cgit v1.2.3 From 1341db322417266fb5845df81d28305b83a37324 Mon Sep 17 00:00:00 2001 From: Yuho Choi Date: Tue, 19 May 2026 23:03:28 -0400 Subject: ipv6: route: Unregister netdevice notifier on BPF init failure ip6_route_init() registers ip6_route_dev_notifier before registering the IPv6 route BPF iterator target. If bpf_iter_register() fails after the notifier has been registered, the error path currently jumps to out_register_late_subsys and unwinds the RTNL handlers and pernet route state without removing the notifier from the netdevice notifier chain. This leaves ip6_route_dev_notify() callable after the IPv6 route state it uses has been torn down. Add a separate unwind label for the BPF iterator failure path and unregister the netdevice notifier before continuing with the existing cleanup. Fixes: 138d0be35b14 ("net: bpf: Add netlink and ipv6_route bpf_iter targets") Signed-off-by: Yuho Choi Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260520030329.1061183-1-dbgh9129@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3d355d1fbd6..b106e5fef9cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6933,7 +6933,7 @@ int __init ip6_route_init(void) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) ret = bpf_iter_register(); if (ret) - goto out_register_late_subsys; + goto out_register_notifier; #endif for_each_possible_cpu(cpu) { @@ -6946,6 +6946,10 @@ int __init ip6_route_init(void) out: return ret; +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +out_register_notifier: + unregister_netdevice_notifier(&ip6_route_dev_notifier); +#endif out_register_late_subsys: rtnl_unregister_all(PF_INET6); unregister_pernet_subsys(&ip6_route_net_late_ops); -- cgit v1.2.3 From dfc077043351a81887d1e4c9ac244e9243f3cbf2 Mon Sep 17 00:00:00 2001 From: Nimrod Oren Date: Wed, 20 May 2026 18:39:28 +0300 Subject: selftests: net: Fix checksums in xdp_native Data adjustment cases failed with "Data exchange failed" when using IPv4 because the program did not update the IP and UDP checksums in the IPv4 branch. The issue was masked when both IPv4 and IPv6 were configured, since the test harness prefers IPv6. While here, generalize csum_fold_helper() to fold twice so it works for any 32-bit input. Fixes: 0b65cfcef9c5 ("selftests: drv-net: Test tail-adjustment support") Reviewed-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Nimrod Oren Link: https://patch.msgid.link/20260520153928.3371765-1-noren@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/xdp_native.bpf.c | 55 +++++++++++++----------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 64f05229ab24..ded3f896e622 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -268,6 +268,17 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; } +static __always_inline __u16 csum_fold_helper(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + return ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline __u16 csum_fold_udp_helper(__u32 csum) +{ + return csum_fold_helper(csum) ? : 0xffff; +} + static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) { void *data_end = (void *)(long)ctx->data_end; @@ -281,21 +292,22 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) if (eth->h_proto == bpf_htons(ETH_P_IP)) { struct iphdr *iph = data + sizeof(*eth); - __u16 total_len; if (iph + 1 > (struct iphdr *)data_end) return NULL; - iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); - udph = (void *)eth + sizeof(*iph) + sizeof(*eth); if (!udph || udph + 1 > (struct udphdr *)data_end) return NULL; - len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + len = iph->tot_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + iph->tot_len = len_new; + iph->check = csum_fold_helper( + bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), ~((__u32)iph->check))); } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { struct ipv6hdr *ipv6h = data + sizeof(*eth); - __u16 payload_len; if (ipv6h + 1 > (struct ipv6hdr *)data_end) return NULL; @@ -304,33 +316,27 @@ static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) if (!udph || udph + 1 > (struct udphdr *)data_end) return NULL; - *udp_csum = ~((__u32)udph->check); - len = ipv6h->payload_len; len_new = bpf_htons(bpf_ntohs(len) + offset); ipv6h->payload_len = len_new; - - *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, - sizeof(len_new), *udp_csum); - - len = udph->len; - len_new = bpf_htons(bpf_ntohs(udph->len) + offset); - *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, - sizeof(len_new), *udp_csum); } else { return NULL; } + len = udph->len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + + *udp_csum = ~((__u32)udph->check); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + udph->len = len_new; return udph; } -static __u16 csum_fold_helper(__u32 csum) -{ - return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; -} - static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, unsigned long hdr_len) { @@ -359,7 +365,7 @@ static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, return -1; udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) return -1; @@ -403,7 +409,7 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) return -1; udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); buff_len = bpf_xdp_get_buff_len(ctx); @@ -484,8 +490,7 @@ static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, return -1; udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); - - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) return -1; @@ -542,7 +547,7 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, return -1; udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); - udph->check = (__u16)csum_fold_helper(udp_csum); + udph->check = (__u16)csum_fold_udp_helper(udp_csum); if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) return -1; -- cgit v1.2.3 From 099258bde1c94d8c8d0988b543436192f9d7438b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 May 2026 17:41:51 -0700 Subject: MAINTAINERS: add missing entry for Bluetooth include files We X-out net/bluetooth/ from "NETWORKING [GENERAL]" so that only the dedicated list is CCed on patches, and networking gets them once already processed by Luiz. We missed include/net/bluetooth. Link: https://patch.msgid.link/20260521004151.625049-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6aa3fe2ee1bb..e225c0d42775 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18632,6 +18632,7 @@ F: tools/testing/selftests/net/ X: Documentation/networking/mac80211-injection.rst X: Documentation/networking/mac80211_hwsim/ X: Documentation/networking/regulatory.rst +X: include/net/bluetooth/ X: include/net/cfg80211.h X: include/net/ieee80211_radiotap.h X: include/net/iw_handler.h -- cgit v1.2.3 From 85fac50b58ca0e96dc8bfa649705cb901400877f Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 21 May 2026 15:49:29 +0200 Subject: MAINTAINERS: Update address for Michael Grzeschik Since I am moving from Pengutronix update my email address for the ARCNET subsystems to point to my kernel.org address. Also update .mailmap. Signed-off-by: Michael Grzeschik Acked-by: Jakub Kicinski Acked-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20260521-maintainer-v1-1-29b5e106682d@kernel.org Signed-off-by: Jakub Kicinski --- .mailmap | 2 ++ MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index eec4a740f7ca..9eaba3570cff 100644 --- a/.mailmap +++ b/.mailmap @@ -584,6 +584,8 @@ Mayuresh Janorkar Md Sadre Alam Miaoqing Pan Michael Buesch +Michal Grzeschik +Michal Grzeschik Michael Riesch Michal Simek Michel Dänzer diff --git a/MAINTAINERS b/MAINTAINERS index e225c0d42775..455a9bf56b65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2064,7 +2064,7 @@ F: Documentation/devicetree/bindings/display/snps,arcpgu.txt F: drivers/gpu/drm/tiny/arcpgu.c ARCNET NETWORK LAYER -M: Michael Grzeschik +M: Michael Grzeschik L: netdev@vger.kernel.org S: Maintained F: drivers/net/arcnet/ -- cgit v1.2.3 From 85686c72966c5ee637893f124ddb31a1cace7bee Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 19 May 2026 18:03:44 -0700 Subject: nvme-pci: fix dma_vecs leak on p2p memory We don't unmap P2P memory, so we don't need to track it. The dma_vec allocation was getting leaked on the completion. Fixes: b8b7570a7ec87 ("nvme-pci: fix dma unmapping when using PRPs and not using the IOVA mapping") Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 139a10cd687f..f423f1718439 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -966,7 +966,8 @@ static bool nvme_pci_prp_save_mapping(struct request *req, { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - if (dma_use_iova(&iod->dma_state) || !dma_need_unmap(dma_dev)) + if (dma_use_iova(&iod->dma_state) || !dma_need_unmap(dma_dev) || + (iod->flags & IOD_DATA_P2P)) return true; if (!iod->nr_dma_vecs) { -- cgit v1.2.3 From 1bf86336e4b6cf40873fda47a7fe191446864937 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 19 May 2026 13:01:57 -0700 Subject: nvme-pci: fix dma mapping leak on data setup error We're leaking the initial DMA mapping during iteration if we fail to allocate the tracking descriptor for both PRP and SGL. Unmap the iterator directly; we can't use the existing unmap helper because it depends on the tracking descriptor being successfully allocated, so a new one for an in-use iterator is provided. The mappings were also leaking when the driver detects an invalid bio_vec when mapping PRPs, so fix that too. Fixes: b8b7570a7ec87 ("nvme-pci: fix dma unmapping when using PRPs and not using the IOVA mapping") Fixes: 7ce3c1dd78fca ("nvme-pci: convert the data mapping to blk_rq_dma_map") Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f423f1718439..b5f846200678 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -997,6 +997,23 @@ static bool nvme_pci_prp_iter_next(struct request *req, struct device *dma_dev, return nvme_pci_prp_save_mapping(req, dma_dev, iter); } +static void nvme_unmap_iter(struct request *req, struct blk_dma_iter *iter, + struct dma_iova_state *state) +{ + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct device *dev = nvmeq->dev->dev; + + if (!blk_rq_dma_unmap(req, dev, state, iter->len, iter->p2pdma.map)) { + unsigned int attrs = 0; + + if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) + attrs |= DMA_ATTR_MMIO; + + dma_unmap_phys(dev, iter->addr, iter->len, rq_dma_dir(req), + attrs); + } +} + static blk_status_t nvme_pci_setup_data_prp(struct request *req, struct blk_dma_iter *iter) { @@ -1007,8 +1024,10 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req, unsigned int prp_len, i; __le64 *prp_list; - if (!nvme_pci_prp_save_mapping(req, nvmeq->dev->dev, iter)) + if (!nvme_pci_prp_save_mapping(req, nvmeq->dev->dev, iter)) { + nvme_unmap_iter(req, iter, &iod->dma_state); return iter->status; + } /* * PRP1 always points to the start of the DMA transfers. @@ -1113,6 +1132,7 @@ bad_sgl: dev_err_once(nvmeq->dev->dev, "Incorrectly formed request for payload:%d nents:%d\n", blk_rq_payload_bytes(req), blk_rq_nr_phys_segments(req)); + nvme_unmap_data(req); return BLK_STS_IOERR; } @@ -1156,8 +1176,11 @@ static blk_status_t nvme_pci_setup_data_sgl(struct request *req, sg_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC, &sgl_dma); - if (!sg_list) + if (!sg_list) { + nvme_unmap_iter(req, iter, &iod->dma_state); return BLK_STS_RESOURCE; + } + iod->descriptors[iod->nr_descriptors++] = sg_list; do { @@ -1314,8 +1337,10 @@ static blk_status_t nvme_pci_setup_meta_iter(struct request *req) sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC, &sgl_dma); - if (!sg_list) + if (!sg_list) { + nvme_unmap_iter(req, &iter, &iod->meta_dma_state); return BLK_STS_RESOURCE; + } iod->meta_descriptor = sg_list; iod->meta_dma = sgl_dma; -- cgit v1.2.3 From 07466fc91c55532edcfb5c6a7ccd2ea52728d6bd Mon Sep 17 00:00:00 2001 From: hlleng Date: Tue, 12 May 2026 09:57:37 +0800 Subject: HID: quirks: Add ALWAYS_POLL quirk for SIGMACHIP USB mouse The SIGMACHIP USB mouse with VID/PID 1c4f:0034 can disconnect and re-enumerate repeatedly after it has been enumerated if its interrupt endpoint is not continuously polled. This was observed with the device reporting itself as "SIGMACHIP Usb Mouse". Keeping the input event device open avoids the disconnects. Add HID_QUIRK_ALWAYS_POLL for this device so the HID core keeps polling it even when there is no userspace input consumer. Cc: stable@vger.kernel.org Signed-off-by: hlleng Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4657d96fb083..426ff78c1c03 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1284,6 +1284,7 @@ #define USB_VENDOR_ID_SIGMA_MICRO 0x1c4f #define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD 0x0002 +#define USB_DEVICE_ID_SIGMA_MICRO_USB_MOUSE 0x0034 #define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD2 0x0059 #define USB_VENDOR_ID_SIGMATEL 0x066F diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 512049963978..57d8efdd9b89 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -187,6 +187,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_USB_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_SIGMATEL, USB_DEVICE_ID_SIGMATEL_STMP3780), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS1030_TOUCH), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS817_TOUCH), HID_QUIRK_NOGET }, -- cgit v1.2.3 From e6970cda63fd4b4546aeed9d0e2f53a7c95cd09c Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 13 May 2026 16:53:09 +0800 Subject: usb: cdns3: plat: fix leaked usb2_phy initialization on usb3_phy acquisition failure Move usb2_phy initialization after usb3_phy acquisition. Fixes: f738957277ba ("usb: cdns3: Split core.c into cdns3-plat and core.c file") Cc: stable Reported-by: sashiko-bot Closes: https://lore.kernel.org/linux-devicetree/agKaEePSFknhDBg2@nchen-desktop/T/#m21e1d9c1574eb127ce03c0c2a1a49002ce435b52 Signed-off-by: Peter Chen Link: https://patch.msgid.link/20260513085310.2217547-2-peter.chen@cixtech.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-plat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/cdns3/cdns3-plat.c b/drivers/usb/cdns3/cdns3-plat.c index 735df88774e4..d2e8d1e9007b 100644 --- a/drivers/usb/cdns3/cdns3-plat.c +++ b/drivers/usb/cdns3/cdns3-plat.c @@ -126,15 +126,15 @@ static int cdns3_plat_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(cdns->usb2_phy), "Failed to get cdn3,usb2-phy\n"); - ret = phy_init(cdns->usb2_phy); - if (ret) - return ret; - cdns->usb3_phy = devm_phy_optional_get(dev, "cdns3,usb3-phy"); if (IS_ERR(cdns->usb3_phy)) return dev_err_probe(dev, PTR_ERR(cdns->usb3_phy), "Failed to get cdn3,usb3-phy\n"); + ret = phy_init(cdns->usb2_phy); + if (ret) + return ret; + ret = phy_init(cdns->usb3_phy); if (ret) goto err_phy3_init; -- cgit v1.2.3 From ae6f3b82324e4f39ad8443c9020787e6fc889637 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 13 May 2026 16:53:10 +0800 Subject: usb: cdns3: plat: fix unbalanced pm_runtime_forbid() call permanently leaks the runtime PM usage counter across bind/unbind cycles Call pm_runtime_allow(dev) conditionally at cdns3_plat_remove. Fixes: f738957277ba ("usb: cdns3: Split core.c into cdns3-plat and core.c file") Cc: stable Reported-by: sashiko-bot Closes: https://lore.kernel.org/linux-devicetree/agKaEePSFknhDBg2@nchen-desktop/T/#m21e1d9c1574eb127ce03c0c2a1a49002ce435b52 Signed-off-by: Peter Chen Link: https://patch.msgid.link/20260513085310.2217547-3-peter.chen@cixtech.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-plat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-plat.c b/drivers/usb/cdns3/cdns3-plat.c index d2e8d1e9007b..94e9706a1806 100644 --- a/drivers/usb/cdns3/cdns3-plat.c +++ b/drivers/usb/cdns3/cdns3-plat.c @@ -186,6 +186,9 @@ static void cdns3_plat_remove(struct platform_device *pdev) struct device *dev = cdns->dev; pm_runtime_get_sync(dev); + if (!(cdns->pdata && (cdns->pdata->quirks & CDNS3_DEFAULT_PM_RUNTIME_ALLOW))) + pm_runtime_allow(dev); + pm_runtime_disable(dev); pm_runtime_put_noidle(dev); cdns_remove(cdns); -- cgit v1.2.3 From c8778ff817a7047d6848fefba99dcb27b1bf01fe Mon Sep 17 00:00:00 2001 From: Yongchao Wu Date: Thu, 14 May 2026 00:00:12 +0800 Subject: usb: cdns3: gadget: fix request skipping after clearing halt According to the cdns3 datasheet, the EPRST (Endpoint Reset) command causes the DMA engine to reposition its internal pointer to the next Transfer Descriptor (TD) if it was already processing one. This issue is consistently observed during the ADB identification process on macOS hosts, where the host issues a Clear_Halt. Although commit 4bf2dd65135a ("usb: cdns3: gadget: toggle cycle bit before reset endpoint") attempted to avoid DMA advance by toggling the cycle bit, trace logs show that on certain hosts like macOS, the DMA pointer (EP_TRADDR) still shifts after EPRST: cdns3_ctrl_req: Clear Endpoint Feature(Halt ep1out) cdns3_doorbell_epx: ep1out, ep_trbaddr f9c04030 <-- Should be f9c04000 cdns3_gadget_giveback: ep1out: req: ... length: 16384/16384 As shown above, the DMA pointer jumped to the next TD, causing the controller to skip the initial TRBs of the request. This leads to data misalignment and ADB protocol hangs on macOS. Fix this by manually restoring the EP_TRADDR register to the starting physical address of the current request after the EPRST operation is complete. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Cc: stable Cc: Peter Chen Signed-off-by: Yongchao Wu Acked-by: Peter Chen Link: https://patch.msgid.link/20260513160012.2547894-1-yongchao.wu@autochips.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 8382231af357..1db8db1b7cc3 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2817,9 +2817,19 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) priv_ep->flags &= ~(EP_STALLED | EP_STALL_PENDING); if (request) { - if (trb) + if (trb) { *trb = trb_tmp; + /* + * Per datasheet, EPRST causes DMA to reposition to the next TD. + * Manually reset EP_TRADDR to the current TRB to prevent + * the hardware from skipping the interrupted request. + */ + writel(EP_TRADDR_TRADDR(priv_ep->trb_pool_dma + + priv_req->start_trb * TRB_SIZE), + &priv_dev->regs->ep_traddr); + } + cdns3_rearm_transfer(priv_ep, 1); } -- cgit v1.2.3 From c06e6cd488194e37ed4dc29d1488d1ffb760de60 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 29 Apr 2026 18:33:32 +0200 Subject: usb: typec: tcpm: improve handling of DISCOVER_MODES failures UGREEN USB-C Multifunction Adapter Model CM512 (AKA "Revodok 107") exposes two SVIDs: 0xff01 (DP Alt Mode) and 0x1d5c. The DISCOVER_MODES step succeeds for 0xff01 and gets a NAK for 0x1d5c. Currently this results in DP Alt Mode not being registered either, since the modes are only registered once all of them have been discovered. The NAK results in the processing being stopped and thus no Alt modes being registered. Improve the situation by handling the NAK gracefully and continue processing the other modes. Before this change, the TCPM log ends like this: (more log entries before this) [ 5.028287] AMS DISCOVER_SVIDS finished [ 5.028291] cc:=4 [ 5.040040] SVID 1: 0xff01 [ 5.040054] SVID 2: 0x1d5c [ 5.040082] AMS DISCOVER_MODES start [ 5.040096] PD TX, header: 0x1b6f [ 5.050946] PD TX complete, status: 0 [ 5.059609] PD RX, header: 0x264f [1] [ 5.059626] Rx VDM cmd 0xff018043 type 1 cmd 3 len 2 [ 5.059640] AMS DISCOVER_MODES finished [ 5.059644] cc:=4 [ 5.069994] Alternate mode 0: SVID 0xff01, VDO 1: 0x000c0045 [ 5.070029] AMS DISCOVER_MODES start [ 5.070043] PD TX, header: 0x1d6f [ 5.081139] PD TX complete, status: 0 [ 5.087498] PD RX, header: 0x184f [1] [ 5.087515] Rx VDM cmd 0x1d5c8083 type 2 cmd 3 len 1 [ 5.087529] AMS DISCOVER_MODES finished [ 5.087534] cc:=4 (no further log entries after this point) After this patch the TCPM log looks exactly the same, but then continues like this: [ 5.100222] Skip SVID 0x1d5c (failed to discover mode) [ 5.101699] AMS DFP_TO_UFP_ENTER_MODE start (log goes on as the system initializes DP AltMode) Cc: stable Fixes: 41d9d75344d9 ("usb: typec: tcpm: add discover svids and discover modes support for sop'") Reviewed-by: Heikki Krogerus Signed-off-by: Sebastian Reichel Reviewed-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Link: https://patch.msgid.link/20260429-tcpm-discover-modes-nak-fix-v4-1-75945d0ed30f@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 97 +++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ed5f745a8231..7ef746a90a17 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2149,6 +2149,55 @@ static bool tcpm_cable_vdm_supported(struct tcpm_port *port) tcpm_can_communicate_sop_prime(port); } +static int tcpm_handle_discover_mode(struct tcpm_port *port, u32 *response, + enum tcpm_transmit_type rx_sop_type, + enum tcpm_transmit_type *response_tx_sop_type) +{ + struct typec_port *typec = port->typec_port; + struct pd_mode_data *modep; + + if (rx_sop_type == TCPC_TX_SOP) { + modep = &port->mode_data; + modep->svid_index++; + + if (modep->svid_index < modep->nsvids) { + u16 svid = modep->svids[modep->svid_index]; + *response_tx_sop_type = TCPC_TX_SOP; + response[0] = VDO(svid, 1, + typec_get_negotiated_svdm_version(typec), + CMD_DISCOVER_MODES); + return 1; + } + + if (tcpm_cable_vdm_supported(port)) { + *response_tx_sop_type = TCPC_TX_SOP_PRIME; + response[0] = VDO(USB_SID_PD, 1, + typec_get_cable_svdm_version(typec), + CMD_DISCOVER_SVID); + return 1; + } + + tcpm_register_partner_altmodes(port); + } else if (rx_sop_type == TCPC_TX_SOP_PRIME) { + modep = &port->mode_data_prime; + modep->svid_index++; + + if (modep->svid_index < modep->nsvids) { + u16 svid = modep->svids[modep->svid_index]; + *response_tx_sop_type = TCPC_TX_SOP_PRIME; + response[0] = VDO(svid, 1, + typec_get_cable_svdm_version(typec), + CMD_DISCOVER_MODES); + return 1; + } + + tcpm_register_plug_altmodes(port); + tcpm_register_partner_altmodes(port); + } + + return 0; +} + static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev, const u32 *p, int cnt, u32 *response, enum adev_actions *adev_action, @@ -2406,41 +2455,11 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev, } break; case CMD_DISCOVER_MODES: - if (rx_sop_type == TCPC_TX_SOP) { - /* 6.4.4.3.3 */ - svdm_consume_modes(port, p, cnt, rx_sop_type); - modep->svid_index++; - if (modep->svid_index < modep->nsvids) { - u16 svid = modep->svids[modep->svid_index]; - *response_tx_sop_type = TCPC_TX_SOP; - response[0] = VDO(svid, 1, svdm_version, - CMD_DISCOVER_MODES); - rlen = 1; - } else if (tcpm_cable_vdm_supported(port)) { - *response_tx_sop_type = TCPC_TX_SOP_PRIME; - response[0] = VDO(USB_SID_PD, 1, - typec_get_cable_svdm_version(typec), - CMD_DISCOVER_SVID); - rlen = 1; - } else { - tcpm_register_partner_altmodes(port); - } - } else if (rx_sop_type == TCPC_TX_SOP_PRIME) { - /* 6.4.4.3.3 */ - svdm_consume_modes(port, p, cnt, rx_sop_type); - modep_prime->svid_index++; - if (modep_prime->svid_index < modep_prime->nsvids) { - u16 svid = modep_prime->svids[modep_prime->svid_index]; - *response_tx_sop_type = TCPC_TX_SOP_PRIME; - response[0] = VDO(svid, 1, - typec_get_cable_svdm_version(typec), - CMD_DISCOVER_MODES); - rlen = 1; - } else { - tcpm_register_plug_altmodes(port); - tcpm_register_partner_altmodes(port); - } - } + /* 6.4.4.3.3 */ + svdm_consume_modes(port, p, cnt, rx_sop_type); + rlen = tcpm_handle_discover_mode(port, response, + rx_sop_type, + response_tx_sop_type); break; case CMD_ENTER_MODE: *response_tx_sop_type = rx_sop_type; @@ -2483,9 +2502,15 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev, switch (cmd) { case CMD_DISCOVER_IDENT: case CMD_DISCOVER_SVID: - case CMD_DISCOVER_MODES: case VDO_CMD_VENDOR(0) ... VDO_CMD_VENDOR(15): break; + case CMD_DISCOVER_MODES: + tcpm_log(port, "Skip SVID 0x%04x (failed to discover mode)", + PD_VDO_SVID_SVID0(p[0])); + rlen = tcpm_handle_discover_mode(port, response, + rx_sop_type, + response_tx_sop_type); + break; case CMD_ENTER_MODE: /* Back to USB Operation */ *adev_action = ADEV_NOTIFY_USB_AND_QUEUE_VDM; -- cgit v1.2.3 From b80e7d34c7ea6a564525119d6138fbb577a23dba Mon Sep 17 00:00:00 2001 From: Myrrh Periwinkle Date: Tue, 19 May 2026 18:41:39 +0700 Subject: usb: typec: ucsi: Check if power role change actually happened before handling The CrOS EC may send a connector status change event with the power direction changed flag set even if the power direction hasn't actually changed after initiating a SET_PDR command internally [1]. In practice this happens on every system suspend due to other changes performed by the EC [2][3][4], causing suspend to fail. Fix this by checking if the power role change actually happened before handling it. [1]: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/ec/zephyr/subsys/pd_controller/pdc_power_mgmt.c;l=1689;drc=2d5a1cffce4e5ac8a39442cb3b764d2d5e1cf794 [2]: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/ec/zephyr/subsys/pd_controller/pdc_power_mgmt.c;l=3923;drc=2d5a1cffce4e5ac8a39442cb3b764d2d5e1cf794 [3]: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/ec/zephyr/subsys/pd_controller/pdc_power_mgmt.c;l=5094;drc=2d5a1cffce4e5ac8a39442cb3b764d2d5e1cf794 [4]: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/ec/zephyr/subsys/pd_controller/pdc_power_mgmt.c;l=2229;drc=2d5a1cffce4e5ac8a39442cb3b764d2d5e1cf794 Cc: stable Fixes: 7616f006db07 ("usb: typec: ucsi: Update power_supply on power role change") Signed-off-by: Myrrh Periwinkle Reported-and-tested-by: Sergey Senozhatsky Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/20260519-ucsi-fix-2-v1-1-6f1239535187@qtmlabs.xyz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 539dc706798d..53c101bd48e2 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1277,7 +1277,7 @@ static void ucsi_handle_connector_change(struct work_struct *work) work); struct ucsi *ucsi = con->ucsi; u8 curr_scale, volt_scale; - enum typec_role role; + enum typec_role role, prev_role; u16 change; int ret; u32 val; @@ -1288,6 +1288,8 @@ static void ucsi_handle_connector_change(struct work_struct *work) dev_err_once(ucsi->dev, "%s entered without EVENT_PENDING\n", __func__); + prev_role = UCSI_CONSTAT(con, PWR_DIR); + ret = ucsi_get_connector_status(con, true); if (ret) { dev_err(ucsi->dev, "%s: GET_CONNECTOR_STATUS failed (%d)\n", @@ -1304,7 +1306,7 @@ static void ucsi_handle_connector_change(struct work_struct *work) change = UCSI_CONSTAT(con, CHANGE); role = UCSI_CONSTAT(con, PWR_DIR); - if (change & UCSI_CONSTAT_POWER_DIR_CHANGE) { + if ((change & UCSI_CONSTAT_POWER_DIR_CHANGE) && role != prev_role) { typec_set_pwr_role(con->port, role); ucsi_port_psy_changed(con); -- cgit v1.2.3 From d98d413ca65d0790a8f3695d0a5845538958ab84 Mon Sep 17 00:00:00 2001 From: Myrrh Periwinkle Date: Tue, 19 May 2026 18:41:40 +0700 Subject: usb: typec: ucsi: Don't update power_supply on power role change if not connected We only need to update the power_supply on power role change if the port is connected, because otherwise the online status should be the same for both cases. Cc: stable Fixes: 7616f006db07 ("usb: typec: ucsi: Update power_supply on power role change") Signed-off-by: Myrrh Periwinkle Reported-and-tested-by: Sergey Senozhatsky Link: https://patch.msgid.link/20260519-ucsi-fix-2-v1-2-6f1239535187@qtmlabs.xyz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 53c101bd48e2..61cb24ed820f 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1308,7 +1308,12 @@ static void ucsi_handle_connector_change(struct work_struct *work) if ((change & UCSI_CONSTAT_POWER_DIR_CHANGE) && role != prev_role) { typec_set_pwr_role(con->port, role); - ucsi_port_psy_changed(con); + + /* Some power_supply properties vary depending on the power direction when + * connected + */ + if (UCSI_CONSTAT(con, CONNECTED)) + ucsi_port_psy_changed(con); /* Complete pending power role swap */ if (!completion_done(&con->complete)) -- cgit v1.2.3 From 373452ac0649846431ca0f88574a2fa6382d2045 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 20 May 2026 23:08:30 +0100 Subject: KVM: arm64: Fix CONFIG_PKVM_DISABLE_STAGE2_ON_PANIC A typo in the config guard in __hyp_do_panic broke the stage-2 disabling and made backtraces for pKVM quite unreliable. Fix that typo. Fixes: 9019e82c7e46 ("KVM: arm64: Add PKVM_DISABLE_STAGE2_ON_PANIC") Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20260520220830.273289-1-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/host.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index f337770ec459..9393fe3ea6a1 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -120,7 +120,7 @@ SYM_FUNC_START(__hyp_do_panic) mov x29, x0 -#ifdef PKVM_DISABLE_STAGE2_ON_PANIC +#ifdef CONFIG_PKVM_DISABLE_STAGE2_ON_PANIC /* Ensure host stage-2 is disabled */ mrs x0, hcr_el2 bic x0, x0, #HCR_VM -- cgit v1.2.3 From c5d93b2c40355e999715262a824965aac025a427 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 19 May 2026 11:57:39 +0530 Subject: net: wwan: iosm: fix potential memory leaks in ipc_imem_init() The memory allocated in ipc_protocol_init() is not freed on the error paths that follow in ipc_imem_init(). Fix that by calling the corresponding release function ipc_protocol_deinit() in the error path. Fixes: 3670970dd8c6 ("net: iosm: shared memory IPC interface") Cc: stable@vger.kernel.org Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20260519062815.55545-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index 1b7bc7d63a2e..4405c8531888 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -1425,6 +1425,8 @@ imem_config_fail: protocol_init_fail: cancel_work_sync(&ipc_imem->run_state_worker); ipc_task_deinit(ipc_imem->ipc_task); + if (ipc_imem->ipc_protocol) + ipc_protocol_deinit(ipc_imem->ipc_protocol); ipc_task_init_fail: kfree(ipc_imem->ipc_task); ipc_task_fail: -- cgit v1.2.3 From c367b9082194d01cb38bdefac6e887ebf1ab017d Mon Sep 17 00:00:00 2001 From: Zhang Cen Date: Tue, 19 May 2026 18:46:47 +0800 Subject: netpoll: normalize skb->dev to the netpoll device __netpoll_send_skb() always transmits through np->dev and queues busy packets on np->dev->npinfo->txq, but it leaves skb->dev unchanged. Stacked callers such as DSA and macvlan can reach netpoll with skb->dev still naming the upper device while np->dev is the lower device that owns the netpoll state. If the skb has to be deferred, queue_process() later dequeues it from the lower device's txq but retries it through skb->dev. That can re-enter the upper ndo_start_xmit path on an already transformed skb, and if the upper device disappears before the lower txq drains the workqueue can dereference a stale skb->dev pointer. The buggy scenario involves two paths, with each column showing the order within that path: path A label: netpoll enqueue path path B label: upper-device teardown 1. Stacked xmit calls netpoll 1. Teardown unregisters the upper with lower np->dev and upper net_device while lower npinfo skb->dev. stays alive. 2. __netpoll_send_skb() uses 2. netdev_release() runs for the np->dev->npinfo as the txq upper net_device. owner. 3. Busy transmit queues the skb 3. The lower txq still owns the on that lower txq with upper deferred skb. skb->dev. 4. queue_process() drains the 4. queue_process() dereferences lower txq and reads skb->dev. that stale upper skb->dev. Normalize skb->dev to np->dev after loading np->dev from the netpoll instance, before either the direct transmit path or the fallback enqueue. This keeps the queued skb in the same device and txq domain as the netpoll state that owns it. KASAN report as below: KASAN slab-use-after-free in queue_process+0x7c/0x480 Workqueue: events queue_process The buggy address belongs to the object at ffff88810906c000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 168 bytes inside of freed 4096-byte region [ffff88810906c000, ffff88810906d000) Read of size 8 Call trace: dump_stack_lvl+0x73/0xb0 (?:?) print_report+0xd1/0x620 (?:?) srso_alias_return_thunk+0x5/0xfbef5 (?:?) __virt_addr_valid+0x215/0x420 (?:?) kasan_complete_mode_report_info+0x64/0x200 (?:?) kasan_report+0xf7/0x130 (?:?) queue_process+0x7c/0x480 (net/core/netpoll.c:88) kasan_check_range+0x10c/0x1c0 (?:?) __kasan_check_read+0x15/0x20 (?:?) process_one_work+0x8b7/0x1af0 (kernel/workqueue.c:3200) assign_work+0x170/0x3f0 (?:?) worker_thread+0x574/0xf10 (?:?) _raw_spin_unlock_irqrestore+0x4b/0x60 (?:?) trace_hardirqs_on+0x2a/0x180 (?:?) kthread+0x2fc/0x3f0 (?:?) ret_from_fork+0x58b/0x830 (?:?) __switch_to+0x58e/0xe90 (?:?) __switch_to_asm+0x39/0x70 (?:?) ret_from_fork_asm+0x1a/0x30 (?:?) Freed by task stack: kasan_save_stack+0x3d/0x60 (?:?) kasan_save_track+0x18/0x40 (?:?) kasan_save_free_info+0x3f/0x60 (?:?) __kasan_slab_free+0x48/0x70 (?:?) kfree+0x20e/0x4e0 (?:?) kvfree+0x31/0x40 (?:?) netdev_release+0x71/0x90 (net/core/net-sysfs.c:2227) device_release+0xd2/0x250 (?:?) kobject_put+0x181/0x4c0 (lib/kobject.c:730) netdev_run_todo+0x700/0x1000 (net/core/dev.c:11666) rtnl_dellink+0x396/0xc00 (net/core/rtnetlink.c:3558) rtnetlink_rcv_msg+0x740/0xc20 (net/core/rtnetlink.c:6897) netlink_rcv_skb+0x147/0x3a0 (?:?) rtnetlink_rcv+0x19/0x20 (net/core/rtnetlink.c:7021) netlink_unicast+0x4d1/0x830 (net/netlink/af_netlink.c:1327) netlink_sendmsg+0x840/0xe10 (net/netlink/af_netlink.c:1812) ____sys_sendmsg+0x8a7/0xb50 (?:?) ___sys_sendmsg+0x104/0x190 (?:?) __sys_sendmsg+0x135/0x1d0 (?:?) __x64_sys_sendmsg+0x7b/0xc0 (?:?) x64_sys_call+0x205c/0x2130 (?:?) do_syscall_64+0x115/0x6a0 (arch/x86/entry/syscall_64.c:87) entry_SYSCALL_64_after_hwframe+0x77/0x7f (?:?) Fixes: 5de4a473bda4 ("netpoll queue cleanup") Signed-off-by: Zhang Cen Link: https://patch.msgid.link/20260519104647.3517990-1-rollkingzzc@gmail.com Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 84faace50ac2..3f4a17fa5713 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -319,6 +319,8 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) lockdep_assert_irqs_disabled(); dev = np->dev; + /* npinfo->txq belongs to np->dev, so retries must stay bound to it. */ + skb->dev = dev; rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); -- cgit v1.2.3 From c7ee0b73c8c4dfb7eafa49aaef5247890862a948 Mon Sep 17 00:00:00 2001 From: Kean Date: Thu, 14 May 2026 20:58:38 +0800 Subject: HID: lenovo: Fix buffer over-read and unaligned access in X12 Tab raw_event handler In lenovo_raw_event(), the X12 Tab keyboard handler reads a 4-byte little-endian value from the raw HID report buffer but: 1. The size guard is size >= 3, while the access reads 4 bytes. A malformed 3-byte report with ID 0x03 would over-read the buffer by one byte. 2. Casting u8 *data directly to __le32 * can trigger unaligned access faults on architectures like ARM, MIPS, and SPARC, because HID input buffers carry no alignment guarantee. (e.g. uhid payloads start at offset 6 in struct uhid_event, giving only 2-byte alignment.) Fix both by tightening the size check to >= 4 and replacing the open-coded cast + le32_to_cpu() with get_unaligned_le32(), which handles the LE-to-CPU conversion safely regardless of alignment. Link: https://sashiko.dev/#/message/20260512044911.99B6DC2BCB0%40smtp.kernel.org Assisted-by: CLAUDE:claude-4-sonnet Signed-off-by: Kean Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-lenovo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index a6b73e03c16b..c11957ae8b77 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "hid-ids.h" @@ -793,8 +794,8 @@ static int lenovo_raw_event(struct hid_device *hdev, */ if (unlikely((hdev->product == USB_DEVICE_ID_LENOVO_X12_TAB || hdev->product == USB_DEVICE_ID_LENOVO_X12_TAB2) - && size >= 3 && report->id == 0x03)) - return lenovo_raw_event_TP_X12_tab(hdev, le32_to_cpu(*(__le32 *)data)); + && size >= 4 && report->id == 0x03)) + return lenovo_raw_event_TP_X12_tab(hdev, get_unaligned_le32(data)); return 0; } -- cgit v1.2.3 From 2ee7e632405b022319f42c01635eb6fbbd86414a Mon Sep 17 00:00:00 2001 From: Louis Clinckx Date: Fri, 15 May 2026 14:57:39 +0000 Subject: HID: lenovo-go: reject non-USB transports in probe These drivers only match HID_USB_DEVICE() entries and assume the underlying bus is USB. Make that explicit at probe by rejecting any non-USB hdev, following the pattern used by other HID drivers. Signed-off-by: Louis Clinckx Reviewed-by: Derek J. Clark Tested-by: Derek J. Clark Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-lenovo-go-s.c | 3 +++ drivers/hid/hid-lenovo-go.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-lenovo-go-s.c b/drivers/hid/hid-lenovo-go-s.c index ff1782a75191..0444d84498bd 100644 --- a/drivers/hid/hid-lenovo-go-s.c +++ b/drivers/hid/hid-lenovo-go-s.c @@ -1461,6 +1461,9 @@ static int hid_gos_probe(struct hid_device *hdev, { int ret, ep; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "Parse failed\n"); diff --git a/drivers/hid/hid-lenovo-go.c b/drivers/hid/hid-lenovo-go.c index d4d26c783356..3fa1fe83f7e5 100644 --- a/drivers/hid/hid-lenovo-go.c +++ b/drivers/hid/hid-lenovo-go.c @@ -2419,6 +2419,9 @@ static int hid_go_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret, ep; + if (!hid_is_usb(hdev)) + return -EINVAL; + hdev->quirks |= HID_QUIRK_INPUT_PER_APP | HID_QUIRK_MULTI_INPUT; ret = hid_parse(hdev); -- cgit v1.2.3 From da7f96a68c39de9eb1c351a261e7fbf716375c91 Mon Sep 17 00:00:00 2001 From: Louis Clinckx Date: Fri, 15 May 2026 14:57:40 +0000 Subject: HID: lenovo-go: drop dead NULL check on to_usb_interface() to_usb_interface() is a container_of_const() macro: it performs pointer arithmetic and never returns NULL. The if (!intf) and if (intf) tests in get_endpoint_address() can never fire. Remove them in both drivers. No functional change. Suggested-by: Derek J. Clark Signed-off-by: Louis Clinckx Reviewed-by: Derek J. Clark Tested-by: Derek J. Clark Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-lenovo-go-s.c | 8 +++----- drivers/hid/hid-lenovo-go.c | 3 --- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-lenovo-go-s.c b/drivers/hid/hid-lenovo-go-s.c index 0444d84498bd..a72f7f748cb5 100644 --- a/drivers/hid/hid-lenovo-go-s.c +++ b/drivers/hid/hid-lenovo-go-s.c @@ -382,11 +382,9 @@ static int get_endpoint_address(struct hid_device *hdev) struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_host_endpoint *ep; - if (intf) { - ep = intf->cur_altsetting->endpoint; - if (ep) - return ep->desc.bEndpointAddress; - } + ep = intf->cur_altsetting->endpoint; + if (ep) + return ep->desc.bEndpointAddress; return -ENODEV; } diff --git a/drivers/hid/hid-lenovo-go.c b/drivers/hid/hid-lenovo-go.c index 3fa1fe83f7e5..e0c9d5ec9451 100644 --- a/drivers/hid/hid-lenovo-go.c +++ b/drivers/hid/hid-lenovo-go.c @@ -641,9 +641,6 @@ static int get_endpoint_address(struct hid_device *hdev) struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_host_endpoint *ep; - if (!intf) - return -ENODEV; - ep = intf->cur_altsetting->endpoint; if (!ep) return -ENODEV; -- cgit v1.2.3 From 9eddc819f00b5b74bb4ac91396f80bd35f5f3561 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 20 May 2026 10:00:36 +0530 Subject: octeontx2-af: npc: Fix allmulticast skip logic for LBK and SDP VFs When installing the allmulticast NPC rule, rvu_npc_install_allmulti_entry() should skip LBK and SDP VFs (only CGX PF/VF may add the entry). The code combined is_lbk_vf() and is_sdp_vf() with logical AND, which is never true for a single pcifunc, so the intended early return never ran. Use logical OR instead. Cc: Geetha sowjanya Fixes: ae703539f49d2 ("octeontx2-af: Cleanup loopback device checks") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260520043036.1523798-1-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3c814d157ab9..607d0cf1a778 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -990,7 +990,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u16 vf_func; /* Only CGX PF/VF can add allmulticast entry */ - if (is_lbk_vf(rvu, pcifunc) && is_sdp_vf(rvu, pcifunc)) + if (is_lbk_vf(rvu, pcifunc) || is_sdp_vf(rvu, pcifunc)) return; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); -- cgit v1.2.3 From b809d0409991b75a6cff846a5ac27c3062953f84 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Tue, 19 May 2026 22:15:53 -0700 Subject: net: mana: validate rx_req_idx to prevent out-of-bounds array access In mana_hwc_rx_event_handler(), rx_req_idx is derived from sge->address in DMA-coherent memory. In Confidential VMs (SEV-SNP/TDX), this memory is shared unencrypted and HW can modify WQE contents at any time. No bounds check exists on rx_req_idx, which can lead to an out-of-bounds access into reqs[]. Add bounds check on rx_req_idx in mana_hwc_rx_event_handler() before using it to index the reqs[] array. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Aditya Garg Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/20260520051553.857120-1-gargaditya@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index fd8b324d7fb6..e3c24d50dad0 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -265,6 +265,12 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; + if (rx_req_idx >= hwc_rxq->msg_buf->num_reqs) { + dev_err(hwc->dev, "HWC RX: wrong rx_req_idx=%llu, num_reqs=%u\n", + rx_req_idx, hwc_rxq->msg_buf->num_reqs); + return; + } + rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; resp = (struct gdma_resp_hdr *)rx_req->buf_va; -- cgit v1.2.3 From 2bccfb8476ca5f3548afbd623dc7a6980d4e77de Mon Sep 17 00:00:00 2001 From: Dawei Feng Date: Wed, 20 May 2026 15:03:23 +0800 Subject: qed: fix double free in qed_cxt_tables_alloc() If one of the later PF or VF CID bitmap allocations fails, qed_cid_map_alloc() jumps to cid_map_fail and frees the previously allocated CID bitmaps before returning an error. qed_cxt_tables_alloc() then calls qed_cxt_mngr_free(), which invokes qed_cid_map_free() again. Fix this by setting each CID bitmap pointer to NULL after bitmap_free() to avoid double free. The bug was first flagged by an experimental analysis tool we are developing for kernel memory-management bugs while analyzing v6.13-rc1. The tool is still under development and is not yet publicly available. Manual inspection confirms that the bug is still present in v7.1-rc3. Runtime reproduction was not attempted because exercising the failing allocation path requires device-specific setup. Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support") Cc: stable@vger.kernel.org Signed-off-by: Zilin Guan Signed-off-by: Dawei Feng Link: https://patch.msgid.link/20260520070323.2762379-1-dawei.feng@seu.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 9861daa82d9e..b70262e70baf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -1036,11 +1036,13 @@ static void qed_cid_map_free(struct qed_hwfn *p_hwfn) for (type = 0; type < MAX_CONN_TYPES; type++) { bitmap_free(p_mngr->acquired[type].cid_map); + p_mngr->acquired[type].cid_map = NULL; p_mngr->acquired[type].max_count = 0; p_mngr->acquired[type].start_cid = 0; for (vf = 0; vf < MAX_NUM_VFS; vf++) { bitmap_free(p_mngr->acquired_vf[type][vf].cid_map); + p_mngr->acquired_vf[type][vf].cid_map = NULL; p_mngr->acquired_vf[type][vf].max_count = 0; p_mngr->acquired_vf[type][vf].start_cid = 0; } -- cgit v1.2.3 From bddc09212c24934643bd44fc794748d2bbb3b6cd Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 20 May 2026 00:57:38 -0700 Subject: tap: fix stack info leak in tap_ioctl() SIOCGIFHWADDR In the SIOCGIFHWADDR path, tap_ioctl() copies 16 bytes of an uninitialised on-stack struct sockaddr_storage to userspace via ifr_hwaddr, but netif_get_mac_address() only writes sa_family and dev->addr_len (6 for Ethernet) bytes, leaving sa_data[6..13] uninitialised. Those 8 trailing bytes leak kernel stack contents; SIOCGIFHWADDR on a macvtap chardev returns kernel .text and direct-map pointers, defeating KASLR. Initialise ss at declaration. Fixes: 3b23a32a6321 ("net: fix dev_ifsioc_locked() race condition") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260520075736.3415676-3-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index b8240737dc51..a590e07ce0a9 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -919,11 +919,11 @@ static long tap_ioctl(struct file *file, unsigned int cmd, struct tap_queue *q = file->private_data; struct tap_dev *tap; void __user *argp = (void __user *)arg; + struct sockaddr_storage ss = {}; struct ifreq __user *ifr = argp; unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; - struct sockaddr_storage ss; int s; int ret; -- cgit v1.2.3 From e46e6bc97fb1f339730ff1ba74267fbf48e7a422 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 20 May 2026 14:42:42 +0200 Subject: ipv6: ioam: refresh hdr pointer before ioam6_event() Reported by Sashiko: In ipv6_hop_ioam(), the hdr pointer is initialized to point into the skb's linear data buffer. Later, the code calls skb_ensure_writable(), which might reallocate the buffer: if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) goto drop; /* Trace pointer may have changed */ trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + optoff + sizeof(*hdr)); ioam6_fill_trace_data(skb, ns, trace, true); ioam6_event(IOAM6_EVENT_TRACE, dev_net(skb->dev), GFP_ATOMIC, (void *)trace, hdr->opt_len - 2); If the skb is cloned or lacks sufficient linear headroom, skb_ensure_writable() will invoke pskb_expand_head(), which reallocates the skb's data buffer and frees the old one, invalidating pointers to it. While the code recalculates the trace pointer immediately after the call to skb_ensure_writable(), it fails to recalculate the hdr pointer. This patch fixes the above by recalculating the hdr pointer before passing hdr->opt_len to ioam6_event(), so that we avoid any UaF. Fixes: f655c78d6225 ("net: exthdrs: ioam6: send trace event") Cc: stable@vger.kernel.org Signed-off-by: Justin Iurman Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260520124242.32320-1-justin.iurman@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 47c5502a34a2..cf90f933ca1a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -966,9 +966,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) goto drop; - /* Trace pointer may have changed */ - trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) - + optoff + sizeof(*hdr)); + /* Trace and hdr pointers may have changed */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); ioam6_fill_trace_data(skb, ns, trace, true); -- cgit v1.2.3 From 985d4a55e64e43bd86eeb896b81ceba453301989 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 20 May 2026 15:12:02 +0200 Subject: net: airoha: Disable GDM2 forwarding before configuring GDM2 loopback Hw design requires to disable GDM2 forwarding before configuring GDM2 loopback in airoha_set_gdm2_loopback routine. Fixes: 9cd451d414f6e ("net: airoha: Add loopback support for GDM2") Tested-by: Madhur Agrawal Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260520-airoha-disable-gdm2-fwd-v1-1-1eeea5dffc2f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d0c0c0ec8a80..cecd66251dba 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1793,11 +1793,8 @@ static int airoha_set_gdm2_loopback(struct airoha_gdm_port *port) u32 val, pse_port, chan; int i, src_port; - /* Forward the traffic to the proper GDM port */ - pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 - : FE_PSE_PORT_GDM4; airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX), - pse_port); + FE_PSE_PORT_DROP); airoha_fe_clear(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX), GDM_STRIP_CRC_MASK); @@ -1815,6 +1812,11 @@ static int airoha_set_gdm2_loopback(struct airoha_gdm_port *port) GDM_SHORT_LEN_MASK | GDM_LONG_LEN_MASK, FIELD_PREP(GDM_SHORT_LEN_MASK, 60) | FIELD_PREP(GDM_LONG_LEN_MASK, AIROHA_MAX_MTU)); + /* Forward the traffic to the proper GDM port */ + pse_port = port->id == AIROHA_GDM3_IDX ? FE_PSE_PORT_GDM3 + : FE_PSE_PORT_GDM4; + airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(AIROHA_GDM2_IDX), + pse_port); /* Disable VIP and IFC for GDM2 */ airoha_fe_clear(eth, REG_FE_VIP_PORT_EN, BIT(AIROHA_GDM2_IDX)); -- cgit v1.2.3 From 3d4432d34c1992701289cbe12df9fd024f315998 Mon Sep 17 00:00:00 2001 From: "Nikhil P. Rao" Date: Wed, 20 May 2026 20:58:42 +0000 Subject: pds_core: ensure null-termination for firmware version strings The driver passes fw_version directly to devlink_info_version_stored_put() without ensuring null-termination. While current firmware null-terminates these strings, the driver should not rely on this behavior. Add explicit null-termination to prevent potential issues if firmware behavior changes. Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Nikhil P. Rao Link: https://patch.msgid.link/20260520205842.1486718-1-nikhil.rao@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/devlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index b576be626a29..3f0e56b951bf 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -122,12 +122,14 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names)); for (i = 0; i < listlen; i++) { + char *fw_ver = fw_list.fw_names[i].fw_version; + if (i < ARRAY_SIZE(fw_slotnames)) strscpy(buf, fw_slotnames[i], sizeof(buf)); else snprintf(buf, sizeof(buf), "fw.slot_%d", i); - err = devlink_info_version_stored_put(req, buf, - fw_list.fw_names[i].fw_version); + fw_ver[sizeof(fw_list.fw_names[i].fw_version) - 1] = '\0'; + err = devlink_info_version_stored_put(req, buf, fw_ver); if (err) return err; } -- cgit v1.2.3 From 4db79a322db8c97f7b73b8a347395ef4d685eb40 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 20 May 2026 22:44:42 +0200 Subject: net: gro: don't merge zcopy skbs skb_gro_receive() can currently copy frags between the source and GRO skb, without checking the zerocopy status, and in particular the SKBFL_MANAGED_FRAG_REFS flag. When SKBFL_MANAGED_FRAG_REFS is set, the skb doesn't hold a reference on the pages in shinfo->frags. Appending those frags to another skb's frags without fixing up the page refcount can lead to UAF. When either the last skb in the GRO chain (the one we would append frags to) or the source skb is zerocopy, don't merge the skbs. Fixes: 753f1ca4e1e5 ("net: introduce managed frags infrastructure") Reported-by: Huzaifa Sidhpurwala Signed-off-by: Sabrina Dubroca Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/c3b7f906bbfcbdfd7b4fa9d6c18a438870df85be.1779307748.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/core/gro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/gro.c b/net/core/gro.c index 9f8960789b2c..a84753983467 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -109,6 +109,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; + if (skb_zcopy(p) || skb_zcopy(skb)) + return -ETOOMANYREFS; + if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || NAPI_GRO_CB(skb)->flush)) return -E2BIG; -- cgit v1.2.3 From 3287e81292f49dca2f253113c458e8f3d4ea091b Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 20 May 2026 19:22:37 +0200 Subject: tools: ynl: support listening on all nsids A new method ntf_listen_all_nsid() to enable listening on events from all namespaces. Useful for testing cross-namespace functionality. recv() replaced with recvmsg() to be able to receive NSID through the ancillary data. Signed-off-by: Ilya Maximets Link: https://patch.msgid.link/20260520172317.175168-4-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index f63c6f828735..010aac0c6c67 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -42,6 +42,7 @@ class Netlink: SOL_NETLINK = 270 NETLINK_ADD_MEMBERSHIP = 1 + NETLINK_LISTEN_ALL_NSID = 8 NETLINK_CAP_ACK = 10 NETLINK_EXT_ACK = 11 NETLINK_GET_STRICT_CHK = 12 @@ -680,6 +681,7 @@ class YnlFamily(SpecFamily): Notification API: ynl.ntf_subscribe(mcast_name) -- join a multicast group + ynl.ntf_listen_all_nsid() -- listen on all netns ynl.check_ntf() -- drain pending notifications ynl.poll_ntf(duration=None) -- yield notifications @@ -748,6 +750,23 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, mcast_id) + def ntf_listen_all_nsid(self): + """Enable NETLINK_LISTEN_ALL_NSID to receive notifications from all + namespaces that have an nsid mapped in the current one.""" + self.sock.setsockopt(Netlink.SOL_NETLINK, + Netlink.NETLINK_LISTEN_ALL_NSID, 1) + + @staticmethod + def _decode_nsid(ancdata): + for cmsg_level, cmsg_type, cmsg_data in ancdata: + if (cmsg_level == Netlink.SOL_NETLINK and + cmsg_type == Netlink.NETLINK_LISTEN_ALL_NSID): + nsid = struct.unpack('i', cmsg_data)[0] + if nsid >= 0: + return nsid + return None + return None + def set_recv_dbg(self, enabled): self._recv_dbg = enabled @@ -1235,7 +1254,7 @@ class YnlFamily(SpecFamily): f" when parsing '{attr_spec['name']}'") return raw - def handle_ntf(self, decoded): + def handle_ntf(self, decoded, nsid=None): msg = {} if self.include_raw: msg['raw'] = decoded @@ -1246,15 +1265,22 @@ class YnlFamily(SpecFamily): msg['name'] = op['name'] msg['msg'] = attrs + if nsid is not None: + msg['nsid'] = nsid self.async_msg_queue.put(msg) + def _recvmsg(self, flags=0): + reply, ancdata, _, _ = self.sock.recvmsg(self._recv_size, 4096, flags) + return reply, ancdata + def check_ntf(self): while True: try: - reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) + reply, ancdata = self._recvmsg(socket.MSG_DONTWAIT) except BlockingIOError: return + nsid = self._decode_nsid(ancdata) nms = NlMsgs(reply) self._recv_dbg_print(reply, nms) for nl_msg in nms: @@ -1271,7 +1297,7 @@ class YnlFamily(SpecFamily): print("Unexpected msg id while checking for ntf", decoded) continue - self.handle_ntf(decoded) + self.handle_ntf(decoded, nsid) def poll_ntf(self, duration=None): start_time = time.time() @@ -1335,7 +1361,8 @@ class YnlFamily(SpecFamily): rsp = [] op_rsp = [] while not done: - reply = self.sock.recv(self._recv_size) + reply, ancdata = self._recvmsg() + nsid = self._decode_nsid(ancdata) nms = NlMsgs(reply) self._recv_dbg_print(reply, nms) for nl_msg in nms: @@ -1374,7 +1401,7 @@ class YnlFamily(SpecFamily): # Check if this is a reply to our request if nl_msg.nl_seq not in reqs_by_seq or decoded.cmd() != op.rsp_value: if decoded.cmd() in self.async_msg_ids: - self.handle_ntf(decoded) + self.handle_ntf(decoded, nsid) continue print('Unexpected message: ' + repr(decoded)) continue -- cgit v1.2.3 From 28db0338db61ec75d146192463907351907a0dbc Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 20 May 2026 12:18:49 +0100 Subject: Revert "drivers: net: 3com: 3c509: Remove this driver" This reverts commit 91f3a27ae9f66d81a5906461762c37c8a2bcab06. Contrary to the assumption stated with the original commit description this driver is in use and I'm going to maintain it for the foreseeable future. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2605201204260.1450@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- Documentation/.renames.txt | 1 + .../device_drivers/ethernet/3com/3c509.rst | 249 ++++ .../networking/device_drivers/ethernet/index.rst | 1 + arch/powerpc/configs/ppc6xx_defconfig | 1 + drivers/net/ethernet/3com/3c509.c | 1448 ++++++++++++++++++++ drivers/net/ethernet/3com/Kconfig | 14 + drivers/net/ethernet/3com/Makefile | 1 + 7 files changed, 1715 insertions(+) create mode 100644 Documentation/networking/device_drivers/ethernet/3com/3c509.rst create mode 100644 drivers/net/ethernet/3com/3c509.c diff --git a/Documentation/.renames.txt b/Documentation/.renames.txt index 43d44753ab93..aa7e5aa4a81b 100644 --- a/Documentation/.renames.txt +++ b/Documentation/.renames.txt @@ -786,6 +786,7 @@ networking/altera_tse networking/device_drivers/ethernet/altera/altera_tse networking/bpf_flow_dissector bpf/prog_flow_dissector networking/cxacru networking/device_drivers/atm/cxacru networking/defza networking/device_drivers/fddi/defza +networking/device_drivers/3com/3c509 networking/device_drivers/ethernet/3com/3c509 networking/device_drivers/3com/vortex networking/device_drivers/ethernet/3com/vortex networking/device_drivers/amazon/ena networking/device_drivers/ethernet/amazon/ena networking/device_drivers/aquantia/atlantic networking/device_drivers/ethernet/aquantia/atlantic diff --git a/Documentation/networking/device_drivers/ethernet/3com/3c509.rst b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst new file mode 100644 index 000000000000..47f706bacdd9 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst @@ -0,0 +1,249 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================================= +Linux and the 3Com EtherLink III Series Ethercards (driver v1.18c and higher) +============================================================================= + +This file contains the instructions and caveats for v1.18c and higher versions +of the 3c509 driver. You should not use the driver without reading this file. + +release 1.0 + +28 February 2002 + +Current maintainer (corrections to): + David Ruggiero + +Introduction +============ + +The following are notes and information on using the 3Com EtherLink III series +ethercards in Linux. These cards are commonly known by the most widely-used +card's 3Com model number, 3c509. They are all 10mb/s ISA-bus cards and shouldn't +be (but sometimes are) confused with the similarly-numbered PCI-bus "3c905" +(aka "Vortex" or "Boomerang") series. Kernel support for the 3c509 family is +provided by the module 3c509.c, which has code to support all of the following +models: + + - 3c509 (original ISA card) + - 3c509B (later revision of the ISA card; supports full-duplex) + - 3c589 (PCMCIA) + - 3c589B (later revision of the 3c589; supports full-duplex) + - 3c579 (EISA) + +Large portions of this documentation were heavily borrowed from the guide +written the original author of the 3c509 driver, Donald Becker. The master +copy of that document, which contains notes on older versions of the driver, +currently resides on Scyld web server: http://www.scyld.com/. + + +Special Driver Features +======================= + +Overriding card settings + +The driver allows boot- or load-time overriding of the card's detected IOADDR, +IRQ, and transceiver settings, although this capability shouldn't generally be +needed except to enable full-duplex mode (see below). An example of the syntax +for LILO parameters for doing this:: + + ether=10,0x310,3,0x3c509,eth0 + +This configures the first found 3c509 card for IRQ 10, base I/O 0x310, and +transceiver type 3 (10base2). The flag "0x3c509" must be set to avoid conflicts +with other card types when overriding the I/O address. When the driver is +loaded as a module, only the IRQ may be overridden. For example, +setting two cards to IRQ10 and IRQ11 is done by using the irq module +option:: + + options 3c509 irq=10,11 + + +Full-duplex mode +================ + +The v1.18c driver added support for the 3c509B's full-duplex capabilities. +In order to enable and successfully use full-duplex mode, three conditions +must be met: + +(a) You must have a Etherlink III card model whose hardware supports full- +duplex operations. Currently, the only members of the 3c509 family that are +positively known to support full-duplex are the 3c509B (ISA bus) and 3c589B +(PCMCIA) cards. Cards without the "B" model designation do *not* support +full-duplex mode; these include the original 3c509 (no "B"), the original +3c589, the 3c529 (MCA bus), and the 3c579 (EISA bus). + +(b) You must be using your card's 10baseT transceiver (i.e., the RJ-45 +connector), not its AUI (thick-net) or 10base2 (thin-net/coax) interfaces. +AUI and 10base2 network cabling is physically incapable of full-duplex +operation. + +(c) Most importantly, your 3c509B must be connected to a link partner that is +itself full-duplex capable. This is almost certainly one of two things: a full- +duplex-capable Ethernet switch (*not* a hub), or a full-duplex-capable NIC on +another system that's connected directly to the 3c509B via a crossover cable. + +Full-duplex mode can be enabled using 'ethtool'. + +.. warning:: + + Extremely important caution concerning full-duplex mode + + Understand that the 3c509B's hardware's full-duplex support is much more + limited than that provide by more modern network interface cards. Although + at the physical layer of the network it fully supports full-duplex operation, + the card was designed before the current Ethernet auto-negotiation (N-way) + spec was written. This means that the 3c509B family ***cannot and will not + auto-negotiate a full-duplex connection with its link partner under any + circumstances, no matter how it is initialized***. If the full-duplex mode + of the 3c509B is enabled, its link partner will very likely need to be + independently _forced_ into full-duplex mode as well; otherwise various nasty + failures will occur - at the very least, you'll see massive numbers of packet + collisions. This is one of very rare circumstances where disabling auto- + negotiation and forcing the duplex mode of a network interface card or switch + would ever be necessary or desirable. + + +Available Transceiver Types +=========================== + +For versions of the driver v1.18c and above, the available transceiver types are: + +== ========================================================================= +0 transceiver type from EEPROM config (normally 10baseT); force half-duplex +1 AUI (thick-net / DB15 connector) +2 (undefined) +3 10base2 (thin-net == coax / BNC connector) +4 10baseT (RJ-45 connector); force half-duplex mode +8 transceiver type and duplex mode taken from card's EEPROM config settings +12 10baseT (RJ-45 connector); force full-duplex mode +== ========================================================================= + +Prior to driver version 1.18c, only transceiver codes 0-4 were supported. Note +that the new transceiver codes 8 and 12 are the *only* ones that will enable +full-duplex mode, no matter what the card's detected EEPROM settings might be. +This insured that merely upgrading the driver from an earlier version would +never automatically enable full-duplex mode in an existing installation; +it must always be explicitly enabled via one of these code in order to be +activated. + +The transceiver type can be changed using 'ethtool'. + + +Interpretation of error messages and common problems +---------------------------------------------------- + +Error Messages +^^^^^^^^^^^^^^ + +eth0: Infinite loop in interrupt, status 2011. +These are "mostly harmless" message indicating that the driver had too much +work during that interrupt cycle. With a status of 0x2011 you are receiving +packets faster than they can be removed from the card. This should be rare +or impossible in normal operation. Possible causes of this error report are: + + - a "green" mode enabled that slows the processor down when there is no + keyboard activity. + + - some other device or device driver hogging the bus or disabling interrupts. + Check /proc/interrupts for excessive interrupt counts. The timer tick + interrupt should always be incrementing faster than the others. + +No received packets +^^^^^^^^^^^^^^^^^^^ + +If a 3c509, 3c562 or 3c589 can successfully transmit packets, but never +receives packets (as reported by /proc/net/dev or 'ifconfig') you likely +have an interrupt line problem. Check /proc/interrupts to verify that the +card is actually generating interrupts. If the interrupt count is not +increasing you likely have a physical conflict with two devices trying to +use the same ISA IRQ line. The common conflict is with a sound card on IRQ10 +or IRQ5, and the easiest solution is to move the 3c509 to a different +interrupt line. If the device is receiving packets but 'ping' doesn't work, +you have a routing problem. + +Tx Carrier Errors Reported in /proc/net/dev +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +If an EtherLink III appears to transmit packets, but the "Tx carrier errors" +field in /proc/net/dev increments as quickly as the Tx packet count, you +likely have an unterminated network or the incorrect media transceiver selected. + +3c509B card is not detected on machines with an ISA PnP BIOS. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While the updated driver works with most PnP BIOS programs, it does not work +with all. This can be fixed by disabling PnP support using the 3Com-supplied +setup program. + +3c509 card is not detected on overclocked machines +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Increase the delay time in id_read_eeprom() from the current value, 500, +to an absurdly high value, such as 5000. + + +Decoding Status and Error Messages +---------------------------------- + + +The bits in the main status register are: + +===== ====================================== +value description +===== ====================================== +0x01 Interrupt latch +0x02 Tx overrun, or Rx underrun +0x04 Tx complete +0x08 Tx FIFO room available +0x10 A complete Rx packet has arrived +0x20 A Rx packet has started to arrive +0x40 The driver has requested an interrupt +0x80 Statistics counter nearly full +===== ====================================== + +The bits in the transmit (Tx) status word are: + +===== ============================================ +value description +===== ============================================ +0x02 Out-of-window collision. +0x04 Status stack overflow (normally impossible). +0x08 16 collisions. +0x10 Tx underrun (not enough PCI bus bandwidth). +0x20 Tx jabber. +0x40 Tx interrupt requested. +0x80 Status is valid (this should always be set). +===== ============================================ + + +When a transmit error occurs the driver produces a status message such as:: + + eth0: Transmit error, Tx status register 82 + +The two values typically seen here are: + +0x82 +^^^^ + +Out of window collision. This typically occurs when some other Ethernet +host is incorrectly set to full duplex on a half duplex network. + +0x88 +^^^^ + +16 collisions. This typically occurs when the network is exceptionally busy +or when another host doesn't correctly back off after a collision. If this +error is mixed with 0x82 errors it is the result of a host incorrectly set +to full duplex (see above). + +Both of these errors are the result of network problems that should be +corrected. They do not represent driver malfunction. + + +Revision history (this file) +============================ + +28Feb02 v1.0 DR New; major portions based on Becker original 3c509 docs + diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 64621c21fd78..1d25be493ae9 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -10,6 +10,7 @@ Contents: .. toctree:: :maxdepth: 2 + 3com/3c509 3com/vortex amazon/ena altera/altera_tse diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index ccabc6e17168..eda1fec7ffd9 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -393,6 +393,7 @@ CONFIG_NETCONSOLE=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m +CONFIG_EL3=m CONFIG_VORTEX=m CONFIG_TYPHOON=m CONFIG_ADAPTEC_STARFIRE=m diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c new file mode 100644 index 000000000000..fb68339e1511 --- /dev/null +++ b/drivers/net/ethernet/3com/3c509.c @@ -0,0 +1,1448 @@ +/* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */ +/* + Written 1993-2000 by Donald Becker. + + Copyright 1994-2000 by Donald Becker. + Copyright 1993 United States Government as represented by the + Director, National Security Agency. This software may be used and + distributed according to the terms of the GNU General Public License, + incorporated herein by reference. + + This driver is for the 3Com EtherLinkIII series. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Known limitations: + Because of the way 3c509 ISA detection works it's difficult to predict + a priori which of several ISA-mode cards will be detected first. + + This driver does not use predictive interrupt mode, resulting in higher + packet latency but lower overhead. If interrupts are disabled for an + unusually long time it could also result in missed packets, but in + practice this rarely happens. + + + FIXES: + Alan Cox: Removed the 'Unexpected interrupt' bug. + Michael Meskes: Upgraded to Donald Becker's version 1.07. + Alan Cox: Increased the eeprom delay. Regardless of + what the docs say some people definitely + get problems with lower (but in card spec) + delays + v1.10 4/21/97 Fixed module code so that multiple cards may be detected, + other cleanups. -djb + Andrea Arcangeli: Upgraded to Donald Becker's version 1.12. + Rick Payne: Fixed SMP race condition + v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable variable -djb + v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb + v1.15 1/31/98 Faster recovery for Tx errors. -djb + v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb + v1.18 12Mar2001 Andrew Morton + - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz) + - Reviewed against 1.18 from scyld.com + v1.18a 17Nov2001 Jeff Garzik + - ethtool support + v1.18b 1Mar2002 Zwane Mwaikambo + - Power Management support + v1.18c 1Mar2002 David Ruggiero + - Full duplex support + v1.19 16Oct2002 Zwane Mwaikambo + - Additional ethtool features + v1.19a 28Oct2002 Davud Ruggiero + - Increase *read_eeprom udelay to workaround oops with 2 cards. + v1.19b 08Nov2002 Marc Zyngier + - Introduce driver model for EISA cards. + v1.20 04Feb2008 Ondrej Zary + - convert to isa_driver and pnp_driver and some cleanups +*/ + +#define DRV_NAME "3c509" + +/* A few values that may be tweaked. */ + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (400*HZ/1000) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for udelay() */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef EL3_DEBUG +static int el3_debug = EL3_DEBUG; +#else +static int el3_debug = 2; +#endif + +/* Used to do a global count of all the cards in the system. Must be + * a global variable so that the eisa probe routines can increment + * it */ +static int el3_cards = 0; +#define EL3_MAX_CARDS 8 + +/* To minimize the size of the driver source I only define operating + constants if they are used several times. You'll need the manual + anyway if you want to understand driver details. */ +/* Offsets from base I/O address. */ +#define EL3_DATA 0x00 +#define EL3_CMD 0x0e +#define EL3_STATUS 0x0e +#define EEPROM_READ 0x80 + +#define EL3_IO_EXTENT 16 + +#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD) + + +/* The top five bits written to EL3_CMD are a command, the lower + 11 bits are the parameter, if applicable. */ +enum c509cmd { + TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11, + RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, RxDiscard = 8<<11, + TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11, + FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11, + SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11, + SetTxThreshold = 18<<11, SetTxStart = 19<<11, StatsEnable = 21<<11, + StatsDisable = 22<<11, StopCoax = 23<<11, PowerUp = 27<<11, + PowerDown = 28<<11, PowerAuto = 29<<11}; + +enum c509status { + IntLatch = 0x0001, AdapterFailure = 0x0002, TxComplete = 0x0004, + TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020, + IntReq = 0x0040, StatsFull = 0x0080, CmdBusy = 0x1000, }; + +/* The SetRxFilter command accepts the following classes: */ +enum RxFilter { + RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 }; + +/* Register window 1 offsets, the window used in normal operation. */ +#define TX_FIFO 0x00 +#define RX_FIFO 0x00 +#define RX_STATUS 0x08 +#define TX_STATUS 0x0B +#define TX_FREE 0x0C /* Remaining free bytes in Tx buffer. */ + +#define WN0_CONF_CTRL 0x04 /* Window 0: Configuration control register */ +#define WN0_ADDR_CONF 0x06 /* Window 0: Address configuration register */ +#define WN0_IRQ 0x08 /* Window 0: Set IRQ line in bits 12-15. */ +#define WN4_MEDIA 0x0A /* Window 4: Various transcvr/media bits. */ +#define MEDIA_TP 0x00C0 /* Enable link beat and jabber for 10baseT. */ +#define WN4_NETDIAG 0x06 /* Window 4: Net diagnostic */ +#define FD_ENABLE 0x8000 /* Enable full-duplex ("external loopback") */ + +/* + * Must be a power of two (we use a binary and in the + * circular queue) + */ +#define SKB_QUEUE_SIZE 64 + +enum el3_cardtype { EL3_ISA, EL3_PNP, EL3_EISA }; + +struct el3_private { + spinlock_t lock; + /* skb send-queue */ + int head, size; + struct sk_buff *queue[SKB_QUEUE_SIZE]; + enum el3_cardtype type; +}; +static int id_port; +static int current_tag; +static struct net_device *el3_devs[EL3_MAX_CARDS]; + +/* Parameters that may be passed into the module. */ +static int debug = -1; +static int irq[] = {-1, -1, -1, -1, -1, -1, -1, -1}; +/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ +static int max_interrupt_work = 10; +#ifdef CONFIG_PNP +static int nopnp; +#endif + +static int el3_common_init(struct net_device *dev); +static void el3_common_remove(struct net_device *dev); +static ushort id_read_eeprom(int index); +static ushort read_eeprom(int ioaddr, int index); +static int el3_open(struct net_device *dev); +static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct net_device *dev); +static irqreturn_t el3_interrupt(int irq, void *dev_id); +static void update_stats(struct net_device *dev); +static struct net_device_stats *el3_get_stats(struct net_device *dev); +static int el3_rx(struct net_device *dev); +static int el3_close(struct net_device *dev); +static void set_multicast_list(struct net_device *dev); +static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue); +static void el3_down(struct net_device *dev); +static void el3_up(struct net_device *dev); +static const struct ethtool_ops ethtool_ops; +#ifdef CONFIG_PM +static int el3_suspend(struct device *, pm_message_t); +static int el3_resume(struct device *); +#else +#define el3_suspend NULL +#define el3_resume NULL +#endif + + +/* generic device remove for all device types */ +static int el3_device_remove (struct device *device); +#ifdef CONFIG_NET_POLL_CONTROLLER +static void el3_poll_controller(struct net_device *dev); +#endif + +/* Return 0 on success, 1 on error, 2 when found already detected PnP card */ +static int el3_isa_id_sequence(__be16 *phys_addr) +{ + short lrs_state = 0xff; + int i; + + /* ISA boards are detected by sending the ID sequence to the + ID_PORT. We find cards past the first by setting the 'current_tag' + on cards as they are found. Cards with their tag set will not + respond to subsequent ID sequences. */ + + outb(0x00, id_port); + outb(0x00, id_port); + for (i = 0; i < 255; i++) { + outb(lrs_state, id_port); + lrs_state <<= 1; + lrs_state = lrs_state & 0x100 ? lrs_state ^ 0xcf : lrs_state; + } + /* For the first probe, clear all board's tag registers. */ + if (current_tag == 0) + outb(0xd0, id_port); + else /* Otherwise kill off already-found boards. */ + outb(0xd8, id_port); + if (id_read_eeprom(7) != 0x6d50) + return 1; + /* Read in EEPROM data, which does contention-select. + Only the lowest address board will stay "on-line". + 3Com got the byte order backwards. */ + for (i = 0; i < 3; i++) + phys_addr[i] = htons(id_read_eeprom(i)); +#ifdef CONFIG_PNP + if (!nopnp) { + /* The ISA PnP 3c509 cards respond to the ID sequence too. + This check is needed in order not to register them twice. */ + for (i = 0; i < el3_cards; i++) { + struct el3_private *lp = netdev_priv(el3_devs[i]); + if (lp->type == EL3_PNP && + ether_addr_equal((u8 *)phys_addr, el3_devs[i]->dev_addr)) { + if (el3_debug > 3) + pr_debug("3c509 with address %02x %02x %02x %02x %02x %02x was found by ISAPnP\n", + phys_addr[0] & 0xff, phys_addr[0] >> 8, + phys_addr[1] & 0xff, phys_addr[1] >> 8, + phys_addr[2] & 0xff, phys_addr[2] >> 8); + /* Set the adaptor tag so that the next card can be found. */ + outb(0xd0 + ++current_tag, id_port); + return 2; + } + } + } +#endif /* CONFIG_PNP */ + return 0; + +} + +static void el3_dev_fill(struct net_device *dev, __be16 *phys_addr, int ioaddr, + int irq, int if_port, enum el3_cardtype type) +{ + struct el3_private *lp = netdev_priv(dev); + + eth_hw_addr_set(dev, (u8 *)phys_addr); + dev->base_addr = ioaddr; + dev->irq = irq; + dev->if_port = if_port; + lp->type = type; +} + +static int el3_isa_match(struct device *pdev, unsigned int ndev) +{ + struct net_device *dev; + int ioaddr, isa_irq, if_port, err; + unsigned int iobase; + __be16 phys_addr[3]; + + while ((err = el3_isa_id_sequence(phys_addr)) == 2) + ; /* Skip to next card when PnP card found */ + if (err == 1) + return 0; + + iobase = id_read_eeprom(8); + if_port = iobase >> 14; + ioaddr = 0x200 + ((iobase & 0x1f) << 4); + if (irq[el3_cards] > 1 && irq[el3_cards] < 16) + isa_irq = irq[el3_cards]; + else + isa_irq = id_read_eeprom(9) >> 12; + + dev = alloc_etherdev(sizeof(struct el3_private)); + if (!dev) + return -ENOMEM; + + SET_NETDEV_DEV(dev, pdev); + + if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) { + free_netdev(dev); + return 0; + } + + /* Set the adaptor tag so that the next card can be found. */ + outb(0xd0 + ++current_tag, id_port); + + /* Activate the adaptor at the EEPROM location. */ + outb((ioaddr >> 4) | 0xe0, id_port); + + EL3WINDOW(0); + if (inw(ioaddr) != 0x6d50) { + free_netdev(dev); + return 0; + } + + /* Free the interrupt so that some other card can use it. */ + outw(0x0f00, ioaddr + WN0_IRQ); + + el3_dev_fill(dev, phys_addr, ioaddr, isa_irq, if_port, EL3_ISA); + dev_set_drvdata(pdev, dev); + if (el3_common_init(dev)) { + free_netdev(dev); + return 0; + } + + el3_devs[el3_cards++] = dev; + return 1; +} + +static void el3_isa_remove(struct device *pdev, + unsigned int ndev) +{ + el3_device_remove(pdev); + dev_set_drvdata(pdev, NULL); +} + +#ifdef CONFIG_PM +static int el3_isa_suspend(struct device *dev, unsigned int n, + pm_message_t state) +{ + current_tag = 0; + return el3_suspend(dev, state); +} + +static int el3_isa_resume(struct device *dev, unsigned int n) +{ + struct net_device *ndev = dev_get_drvdata(dev); + int ioaddr = ndev->base_addr, err; + __be16 phys_addr[3]; + + while ((err = el3_isa_id_sequence(phys_addr)) == 2) + ; /* Skip to next card when PnP card found */ + if (err == 1) + return 0; + /* Set the adaptor tag so that the next card can be found. */ + outb(0xd0 + ++current_tag, id_port); + /* Enable the card */ + outb((ioaddr >> 4) | 0xe0, id_port); + EL3WINDOW(0); + if (inw(ioaddr) != 0x6d50) + return 1; + /* Free the interrupt so that some other card can use it. */ + outw(0x0f00, ioaddr + WN0_IRQ); + return el3_resume(dev); +} +#endif + +static struct isa_driver el3_isa_driver = { + .match = el3_isa_match, + .remove = el3_isa_remove, +#ifdef CONFIG_PM + .suspend = el3_isa_suspend, + .resume = el3_isa_resume, +#endif + .driver = { + .name = "3c509" + }, +}; +static int isa_registered; + +#ifdef CONFIG_PNP +static const struct pnp_device_id el3_pnp_ids[] = { + { .id = "TCM5090" }, /* 3Com Etherlink III (TP) */ + { .id = "TCM5091" }, /* 3Com Etherlink III */ + { .id = "TCM5094" }, /* 3Com Etherlink III (combo) */ + { .id = "TCM5095" }, /* 3Com Etherlink III (TPO) */ + { .id = "TCM5098" }, /* 3Com Etherlink III (TPC) */ + { .id = "PNP80f7" }, /* 3Com Etherlink III compatible */ + { .id = "PNP80f8" }, /* 3Com Etherlink III compatible */ + { .id = "" } +}; +MODULE_DEVICE_TABLE(pnp, el3_pnp_ids); + +static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id) +{ + short i; + int ioaddr, irq, if_port; + __be16 phys_addr[3]; + struct net_device *dev = NULL; + int err; + + ioaddr = pnp_port_start(pdev, 0); + if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-pnp")) + return -EBUSY; + irq = pnp_irq(pdev, 0); + EL3WINDOW(0); + for (i = 0; i < 3; i++) + phys_addr[i] = htons(read_eeprom(ioaddr, i)); + if_port = read_eeprom(ioaddr, 8) >> 14; + dev = alloc_etherdev(sizeof(struct el3_private)); + if (!dev) { + release_region(ioaddr, EL3_IO_EXTENT); + return -ENOMEM; + } + SET_NETDEV_DEV(dev, &pdev->dev); + + el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP); + pnp_set_drvdata(pdev, dev); + err = el3_common_init(dev); + + if (err) { + pnp_set_drvdata(pdev, NULL); + free_netdev(dev); + return err; + } + + el3_devs[el3_cards++] = dev; + return 0; +} + +static void el3_pnp_remove(struct pnp_dev *pdev) +{ + el3_common_remove(pnp_get_drvdata(pdev)); + pnp_set_drvdata(pdev, NULL); +} + +#ifdef CONFIG_PM +static int el3_pnp_suspend(struct pnp_dev *pdev, pm_message_t state) +{ + return el3_suspend(&pdev->dev, state); +} + +static int el3_pnp_resume(struct pnp_dev *pdev) +{ + return el3_resume(&pdev->dev); +} +#endif + +static struct pnp_driver el3_pnp_driver = { + .name = "3c509", + .id_table = el3_pnp_ids, + .probe = el3_pnp_probe, + .remove = el3_pnp_remove, +#ifdef CONFIG_PM + .suspend = el3_pnp_suspend, + .resume = el3_pnp_resume, +#endif +}; +static int pnp_registered; +#endif /* CONFIG_PNP */ + +#ifdef CONFIG_EISA +static const struct eisa_device_id el3_eisa_ids[] = { + { "TCM5090" }, + { "TCM5091" }, + { "TCM5092" }, + { "TCM5093" }, + { "TCM5094" }, + { "TCM5095" }, + { "TCM5098" }, + { "" } +}; +MODULE_DEVICE_TABLE(eisa, el3_eisa_ids); + +static int el3_eisa_probe (struct device *device); + +static struct eisa_driver el3_eisa_driver = { + .id_table = el3_eisa_ids, + .driver = { + .name = "3c579", + .probe = el3_eisa_probe, + .remove = el3_device_remove, + .suspend = el3_suspend, + .resume = el3_resume, + } +}; +static int eisa_registered; +#endif + +static const struct net_device_ops netdev_ops = { + .ndo_open = el3_open, + .ndo_stop = el3_close, + .ndo_start_xmit = el3_start_xmit, + .ndo_get_stats = el3_get_stats, + .ndo_set_rx_mode = set_multicast_list, + .ndo_tx_timeout = el3_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = el3_poll_controller, +#endif +}; + +static int el3_common_init(struct net_device *dev) +{ + struct el3_private *lp = netdev_priv(dev); + int err; + static const char * const if_names[] = { + "10baseT", "AUI", "undefined", "BNC" + }; + + spin_lock_init(&lp->lock); + + if (dev->mem_start & 0x05) { /* xcvr codes 1/3/4/12 */ + dev->if_port = (dev->mem_start & 0x0f); + } else { /* xcvr codes 0/8 */ + /* use eeprom value, but save user's full-duplex selection */ + dev->if_port |= (dev->mem_start & 0x08); + } + + /* The EL3-specific entries in the device structure. */ + dev->netdev_ops = &netdev_ops; + dev->watchdog_timeo = TX_TIMEOUT; + dev->ethtool_ops = ðtool_ops; + + err = register_netdev(dev); + if (err) { + pr_err("Failed to register 3c5x9 at %#3.3lx, IRQ %d.\n", + dev->base_addr, dev->irq); + release_region(dev->base_addr, EL3_IO_EXTENT); + return err; + } + + pr_info("%s: 3c5x9 found at %#3.3lx, %s port, address %pM, IRQ %d.\n", + dev->name, dev->base_addr, if_names[(dev->if_port & 0x03)], + dev->dev_addr, dev->irq); + + return 0; + +} + +static void el3_common_remove (struct net_device *dev) +{ + unregister_netdev (dev); + release_region(dev->base_addr, EL3_IO_EXTENT); + free_netdev (dev); +} + +#ifdef CONFIG_EISA +static int el3_eisa_probe(struct device *device) +{ + short i; + int ioaddr, irq, if_port; + __be16 phys_addr[3]; + struct net_device *dev = NULL; + struct eisa_device *edev; + int err; + + /* Yeepee, The driver framework is calling us ! */ + edev = to_eisa_device (device); + ioaddr = edev->base_addr; + + if (!request_region(ioaddr, EL3_IO_EXTENT, "3c579-eisa")) + return -EBUSY; + + /* Change the register set to the configuration window 0. */ + outw(SelectWindow | 0, ioaddr + 0xC80 + EL3_CMD); + + irq = inw(ioaddr + WN0_IRQ) >> 12; + if_port = inw(ioaddr + 6)>>14; + for (i = 0; i < 3; i++) + phys_addr[i] = htons(read_eeprom(ioaddr, i)); + + /* Restore the "Product ID" to the EEPROM read register. */ + read_eeprom(ioaddr, 3); + + dev = alloc_etherdev(sizeof (struct el3_private)); + if (dev == NULL) { + release_region(ioaddr, EL3_IO_EXTENT); + return -ENOMEM; + } + + SET_NETDEV_DEV(dev, device); + + el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA); + eisa_set_drvdata (edev, dev); + err = el3_common_init(dev); + + if (err) { + eisa_set_drvdata (edev, NULL); + free_netdev(dev); + return err; + } + + el3_devs[el3_cards++] = dev; + return 0; +} +#endif + +/* This remove works for all device types. + * + * The net dev must be stored in the driver data field */ +static int el3_device_remove(struct device *device) +{ + struct net_device *dev; + + dev = dev_get_drvdata(device); + + el3_common_remove (dev); + return 0; +} + +/* Read a word from the EEPROM using the regular EEPROM access register. + Assume that we are in register window zero. + */ +static ushort read_eeprom(int ioaddr, int index) +{ + outw(EEPROM_READ + index, ioaddr + 10); + /* Pause for at least 162 us. for the read to take place. + Some chips seem to require much longer */ + mdelay(2); + return inw(ioaddr + 12); +} + +/* Read a word from the EEPROM when in the ISA ID probe state. */ +static ushort id_read_eeprom(int index) +{ + int bit, word = 0; + + /* Issue read command, and pause for at least 162 us. for it to complete. + Assume extra-fast 16Mhz bus. */ + outb(EEPROM_READ + index, id_port); + + /* Pause for at least 162 us. for the read to take place. */ + /* Some chips seem to require much longer */ + mdelay(4); + + for (bit = 15; bit >= 0; bit--) + word = (word << 1) + (inb(id_port) & 0x01); + + if (el3_debug > 3) + pr_debug(" 3c509 EEPROM word %d %#4.4x.\n", index, word); + + return word; +} + + +static int +el3_open(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + int i; + + outw(TxReset, ioaddr + EL3_CMD); + outw(RxReset, ioaddr + EL3_CMD); + outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD); + + i = request_irq(dev->irq, el3_interrupt, 0, dev->name, dev); + if (i) + return i; + + EL3WINDOW(0); + if (el3_debug > 3) + pr_debug("%s: Opening, IRQ %d status@%x %4.4x.\n", dev->name, + dev->irq, ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS)); + + el3_up(dev); + + if (el3_debug > 3) + pr_debug("%s: Opened 3c509 IRQ %d status %4.4x.\n", + dev->name, dev->irq, inw(ioaddr + EL3_STATUS)); + + return 0; +} + +static void +el3_tx_timeout (struct net_device *dev, unsigned int txqueue) +{ + int ioaddr = dev->base_addr; + + /* Transmitter timeout, serious problems. */ + pr_warn("%s: transmit timed out, Tx_status %2.2x status %4.4x Tx FIFO room %d\n", + dev->name, inb(ioaddr + TX_STATUS), inw(ioaddr + EL3_STATUS), + inw(ioaddr + TX_FREE)); + dev->stats.tx_errors++; + netif_trans_update(dev); /* prevent tx timeout */ + /* Issue TX_RESET and TX_START commands. */ + outw(TxReset, ioaddr + EL3_CMD); + outw(TxEnable, ioaddr + EL3_CMD); + netif_wake_queue(dev); +} + + +static netdev_tx_t +el3_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct el3_private *lp = netdev_priv(dev); + int ioaddr = dev->base_addr; + unsigned long flags; + + netif_stop_queue (dev); + + dev->stats.tx_bytes += skb->len; + + if (el3_debug > 4) { + pr_debug("%s: el3_start_xmit(length = %u) called, status %4.4x.\n", + dev->name, skb->len, inw(ioaddr + EL3_STATUS)); + } + /* + * We lock the driver against other processors. Note + * we don't need to lock versus the IRQ as we suspended + * that. This means that we lose the ability to take + * an RX during a TX upload. That sucks a bit with SMP + * on an original 3c509 (2K buffer) + * + * Using disable_irq stops us crapping on other + * time sensitive devices. + */ + + spin_lock_irqsave(&lp->lock, flags); + + /* Put out the doubleword header... */ + outw(skb->len, ioaddr + TX_FIFO); + outw(0x00, ioaddr + TX_FIFO); + /* ... and the packet rounded to a doubleword. */ + outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2); + + if (inw(ioaddr + TX_FREE) > 1536) + netif_start_queue(dev); + else + /* Interrupt us when the FIFO has room for max-sized packet. */ + outw(SetTxThreshold + 1536, ioaddr + EL3_CMD); + + spin_unlock_irqrestore(&lp->lock, flags); + + dev_consume_skb_any (skb); + + /* Clear the Tx status stack. */ + { + short tx_status; + int i = 4; + + while (--i > 0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { + if (tx_status & 0x38) dev->stats.tx_aborted_errors++; + if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); + if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); + outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ + } + } + return NETDEV_TX_OK; +} + +/* The EL3 interrupt handler. */ +static irqreturn_t +el3_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct el3_private *lp; + int ioaddr, status; + int i = max_interrupt_work; + + lp = netdev_priv(dev); + spin_lock(&lp->lock); + + ioaddr = dev->base_addr; + + if (el3_debug > 4) { + status = inw(ioaddr + EL3_STATUS); + pr_debug("%s: interrupt, status %4.4x.\n", dev->name, status); + } + + while ((status = inw(ioaddr + EL3_STATUS)) & + (IntLatch | RxComplete | StatsFull)) { + + if (status & RxComplete) + el3_rx(dev); + + if (status & TxAvailable) { + if (el3_debug > 5) + pr_debug(" TX room bit was handled.\n"); + /* There's room in the FIFO for a full-sized packet. */ + outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); + netif_wake_queue (dev); + } + if (status & (AdapterFailure | RxEarly | StatsFull | TxComplete)) { + /* Handle all uncommon interrupts. */ + if (status & StatsFull) /* Empty statistics. */ + update_stats(dev); + if (status & RxEarly) { /* Rx early is unused. */ + el3_rx(dev); + outw(AckIntr | RxEarly, ioaddr + EL3_CMD); + } + if (status & TxComplete) { /* Really Tx error. */ + short tx_status; + int i = 4; + + while (--i>0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { + if (tx_status & 0x38) dev->stats.tx_aborted_errors++; + if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); + if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); + outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ + } + } + if (status & AdapterFailure) { + /* Adapter failure requires Rx reset and reinit. */ + outw(RxReset, ioaddr + EL3_CMD); + /* Set the Rx filter to the current state. */ + outw(SetRxFilter | RxStation | RxBroadcast + | (dev->flags & IFF_ALLMULTI ? RxMulticast : 0) + | (dev->flags & IFF_PROMISC ? RxProm : 0), + ioaddr + EL3_CMD); + outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */ + outw(AckIntr | AdapterFailure, ioaddr + EL3_CMD); + } + } + + if (--i < 0) { + pr_err("%s: Infinite loop in interrupt, status %4.4x.\n", + dev->name, status); + /* Clear all interrupts. */ + outw(AckIntr | 0xFF, ioaddr + EL3_CMD); + break; + } + /* Acknowledge the IRQ. */ + outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); /* Ack IRQ */ + } + + if (el3_debug > 4) { + pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name, + inw(ioaddr + EL3_STATUS)); + } + spin_unlock(&lp->lock); + return IRQ_HANDLED; +} + + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* + * Polling receive - used by netconsole and other diagnostic tools + * to allow network i/o with interrupts disabled. + */ +static void el3_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + el3_interrupt(dev->irq, dev); + enable_irq(dev->irq); +} +#endif + +static struct net_device_stats * +el3_get_stats(struct net_device *dev) +{ + struct el3_private *lp = netdev_priv(dev); + unsigned long flags; + + /* + * This is fast enough not to bother with disable IRQ + * stuff. + */ + + spin_lock_irqsave(&lp->lock, flags); + update_stats(dev); + spin_unlock_irqrestore(&lp->lock, flags); + return &dev->stats; +} + +/* Update statistics. We change to register window 6, so this should be run + single-threaded if the device is active. This is expected to be a rare + operation, and it's simpler for the rest of the driver to assume that + window 1 is always valid rather than use a special window-state variable. + */ +static void update_stats(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + + if (el3_debug > 5) + pr_debug(" Updating the statistics.\n"); + /* Turn off statistics updates while reading. */ + outw(StatsDisable, ioaddr + EL3_CMD); + /* Switch to the stats window, and read everything. */ + EL3WINDOW(6); + dev->stats.tx_carrier_errors += inb(ioaddr + 0); + dev->stats.tx_heartbeat_errors += inb(ioaddr + 1); + /* Multiple collisions. */ inb(ioaddr + 2); + dev->stats.collisions += inb(ioaddr + 3); + dev->stats.tx_window_errors += inb(ioaddr + 4); + dev->stats.rx_fifo_errors += inb(ioaddr + 5); + dev->stats.tx_packets += inb(ioaddr + 6); + /* Rx packets */ inb(ioaddr + 7); + /* Tx deferrals */ inb(ioaddr + 8); + inw(ioaddr + 10); /* Total Rx and Tx octets. */ + inw(ioaddr + 12); + + /* Back to window 1, and turn statistics back on. */ + EL3WINDOW(1); + outw(StatsEnable, ioaddr + EL3_CMD); +} + +static int +el3_rx(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + short rx_status; + + if (el3_debug > 5) + pr_debug(" In rx_packet(), status %4.4x, rx_status %4.4x.\n", + inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS)); + while ((rx_status = inw(ioaddr + RX_STATUS)) > 0) { + if (rx_status & 0x4000) { /* Error, update stats. */ + short error = rx_status & 0x3800; + + outw(RxDiscard, ioaddr + EL3_CMD); + dev->stats.rx_errors++; + switch (error) { + case 0x0000: dev->stats.rx_over_errors++; break; + case 0x0800: dev->stats.rx_length_errors++; break; + case 0x1000: dev->stats.rx_frame_errors++; break; + case 0x1800: dev->stats.rx_length_errors++; break; + case 0x2000: dev->stats.rx_frame_errors++; break; + case 0x2800: dev->stats.rx_crc_errors++; break; + } + } else { + short pkt_len = rx_status & 0x7ff; + struct sk_buff *skb; + + skb = netdev_alloc_skb(dev, pkt_len + 5); + if (el3_debug > 4) + pr_debug("Receiving packet size %d status %4.4x.\n", + pkt_len, rx_status); + if (skb != NULL) { + skb_reserve(skb, 2); /* Align IP on 16 byte */ + + /* 'skb->data' points to the start of sk_buff data area. */ + insl(ioaddr + RX_FIFO, skb_put(skb,pkt_len), + (pkt_len + 3) >> 2); + + outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */ + skb->protocol = eth_type_trans(skb,dev); + netif_rx(skb); + dev->stats.rx_bytes += pkt_len; + dev->stats.rx_packets++; + continue; + } + outw(RxDiscard, ioaddr + EL3_CMD); + dev->stats.rx_dropped++; + if (el3_debug) + pr_debug("%s: Couldn't allocate a sk_buff of size %d.\n", + dev->name, pkt_len); + } + inw(ioaddr + EL3_STATUS); /* Delay. */ + while (inw(ioaddr + EL3_STATUS) & 0x1000) + pr_debug(" Waiting for 3c509 to discard packet, status %x.\n", + inw(ioaddr + EL3_STATUS) ); + } + + return 0; +} + +/* + * Set or clear the multicast filter for this adaptor. + */ +static void +set_multicast_list(struct net_device *dev) +{ + unsigned long flags; + struct el3_private *lp = netdev_priv(dev); + int ioaddr = dev->base_addr; + int mc_count = netdev_mc_count(dev); + + if (el3_debug > 1) { + static int old; + if (old != mc_count) { + old = mc_count; + pr_debug("%s: Setting Rx mode to %d addresses.\n", + dev->name, mc_count); + } + } + spin_lock_irqsave(&lp->lock, flags); + if (dev->flags&IFF_PROMISC) { + outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm, + ioaddr + EL3_CMD); + } + else if (mc_count || (dev->flags&IFF_ALLMULTI)) { + outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast, ioaddr + EL3_CMD); + } + else + outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); + spin_unlock_irqrestore(&lp->lock, flags); +} + +static int +el3_close(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + struct el3_private *lp = netdev_priv(dev); + + if (el3_debug > 2) + pr_debug("%s: Shutting down ethercard.\n", dev->name); + + el3_down(dev); + + free_irq(dev->irq, dev); + /* Switching back to window 0 disables the IRQ. */ + EL3WINDOW(0); + if (lp->type != EL3_EISA) { + /* But we explicitly zero the IRQ line select anyway. Don't do + * it on EISA cards, it prevents the module from getting an + * IRQ after unload+reload... */ + outw(0x0f00, ioaddr + WN0_IRQ); + } + + return 0; +} + +static int +el3_link_ok(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + u16 tmp; + + EL3WINDOW(4); + tmp = inw(ioaddr + WN4_MEDIA); + EL3WINDOW(1); + return tmp & (1<<11); +} + +static void +el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd) +{ + u16 tmp; + int ioaddr = dev->base_addr; + u32 supported; + + EL3WINDOW(0); + /* obtain current transceiver via WN4_MEDIA? */ + tmp = inw(ioaddr + WN0_ADDR_CONF); + switch (tmp >> 14) { + case 0: + cmd->base.port = PORT_TP; + break; + case 1: + cmd->base.port = PORT_AUI; + break; + case 3: + cmd->base.port = PORT_BNC; + break; + default: + break; + } + + cmd->base.duplex = DUPLEX_HALF; + supported = 0; + tmp = inw(ioaddr + WN0_CONF_CTRL); + if (tmp & (1<<13)) + supported |= SUPPORTED_AUI; + if (tmp & (1<<12)) + supported |= SUPPORTED_BNC; + if (tmp & (1<<9)) { + supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full; /* hmm... */ + EL3WINDOW(4); + tmp = inw(ioaddr + WN4_NETDIAG); + if (tmp & FD_ENABLE) + cmd->base.duplex = DUPLEX_FULL; + } + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + cmd->base.speed = SPEED_10; + EL3WINDOW(1); +} + +static int +el3_netdev_set_ecmd(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + u16 tmp; + int ioaddr = dev->base_addr; + + if (cmd->base.speed != SPEED_10) + return -EINVAL; + if ((cmd->base.duplex != DUPLEX_HALF) && + (cmd->base.duplex != DUPLEX_FULL)) + return -EINVAL; + + /* change XCVR type */ + EL3WINDOW(0); + tmp = inw(ioaddr + WN0_ADDR_CONF); + switch (cmd->base.port) { + case PORT_TP: + tmp &= ~(3<<14); + dev->if_port = 0; + break; + case PORT_AUI: + tmp |= (1<<14); + dev->if_port = 1; + break; + case PORT_BNC: + tmp |= (3<<14); + dev->if_port = 3; + break; + default: + return -EINVAL; + } + + outw(tmp, ioaddr + WN0_ADDR_CONF); + if (dev->if_port == 3) { + /* fire up the DC-DC convertor if BNC gets enabled */ + tmp = inw(ioaddr + WN0_ADDR_CONF); + if (tmp & (3 << 14)) { + outw(StartCoax, ioaddr + EL3_CMD); + udelay(800); + } else + return -EIO; + } + + EL3WINDOW(4); + tmp = inw(ioaddr + WN4_NETDIAG); + if (cmd->base.duplex == DUPLEX_FULL) + tmp |= FD_ENABLE; + else + tmp &= ~FD_ENABLE; + outw(tmp, ioaddr + WN4_NETDIAG); + EL3WINDOW(1); + + return 0; +} + +static void el3_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); +} + +static int el3_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct el3_private *lp = netdev_priv(dev); + + spin_lock_irq(&lp->lock); + el3_netdev_get_ecmd(dev, cmd); + spin_unlock_irq(&lp->lock); + return 0; +} + +static int el3_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct el3_private *lp = netdev_priv(dev); + int ret; + + spin_lock_irq(&lp->lock); + ret = el3_netdev_set_ecmd(dev, cmd); + spin_unlock_irq(&lp->lock); + return ret; +} + +static u32 el3_get_link(struct net_device *dev) +{ + struct el3_private *lp = netdev_priv(dev); + u32 ret; + + spin_lock_irq(&lp->lock); + ret = el3_link_ok(dev); + spin_unlock_irq(&lp->lock); + return ret; +} + +static u32 el3_get_msglevel(struct net_device *dev) +{ + return el3_debug; +} + +static void el3_set_msglevel(struct net_device *dev, u32 v) +{ + el3_debug = v; +} + +static const struct ethtool_ops ethtool_ops = { + .get_drvinfo = el3_get_drvinfo, + .get_link = el3_get_link, + .get_msglevel = el3_get_msglevel, + .set_msglevel = el3_set_msglevel, + .get_link_ksettings = el3_get_link_ksettings, + .set_link_ksettings = el3_set_link_ksettings, +}; + +static void +el3_down(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + + netif_stop_queue(dev); + + /* Turn off statistics ASAP. We update lp->stats below. */ + outw(StatsDisable, ioaddr + EL3_CMD); + + /* Disable the receiver and transmitter. */ + outw(RxDisable, ioaddr + EL3_CMD); + outw(TxDisable, ioaddr + EL3_CMD); + + if (dev->if_port == 3) + /* Turn off thinnet power. Green! */ + outw(StopCoax, ioaddr + EL3_CMD); + else if (dev->if_port == 0) { + /* Disable link beat and jabber, if_port may change here next open(). */ + EL3WINDOW(4); + outw(inw(ioaddr + WN4_MEDIA) & ~MEDIA_TP, ioaddr + WN4_MEDIA); + } + + outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD); + + update_stats(dev); +} + +static void +el3_up(struct net_device *dev) +{ + int i, sw_info, net_diag; + int ioaddr = dev->base_addr; + + /* Activating the board required and does no harm otherwise */ + outw(0x0001, ioaddr + 4); + + /* Set the IRQ line. */ + outw((dev->irq << 12) | 0x0f00, ioaddr + WN0_IRQ); + + /* Set the station address in window 2 each time opened. */ + EL3WINDOW(2); + + for (i = 0; i < 6; i++) + outb(dev->dev_addr[i], ioaddr + i); + + if ((dev->if_port & 0x03) == 3) /* BNC interface */ + /* Start the thinnet transceiver. We should really wait 50ms...*/ + outw(StartCoax, ioaddr + EL3_CMD); + else if ((dev->if_port & 0x03) == 0) { /* 10baseT interface */ + /* Combine secondary sw_info word (the adapter level) and primary + sw_info word (duplex setting plus other useless bits) */ + EL3WINDOW(0); + sw_info = (read_eeprom(ioaddr, 0x14) & 0x400f) | + (read_eeprom(ioaddr, 0x0d) & 0xBff0); + + EL3WINDOW(4); + net_diag = inw(ioaddr + WN4_NETDIAG); + net_diag = (net_diag | FD_ENABLE); /* temporarily assume full-duplex will be set */ + pr_info("%s: ", dev->name); + switch (dev->if_port & 0x0c) { + case 12: + /* force full-duplex mode if 3c5x9b */ + if (sw_info & 0x000f) { + pr_cont("Forcing 3c5x9b full-duplex mode"); + break; + } + fallthrough; + case 8: + /* set full-duplex mode based on eeprom config setting */ + if ((sw_info & 0x000f) && (sw_info & 0x8000)) { + pr_cont("Setting 3c5x9b full-duplex mode (from EEPROM configuration bit)"); + break; + } + fallthrough; + default: + /* xcvr=(0 || 4) OR user has an old 3c5x9 non "B" model */ + pr_cont("Setting 3c5x9/3c5x9B half-duplex mode"); + net_diag = (net_diag & ~FD_ENABLE); /* disable full duplex */ + } + + outw(net_diag, ioaddr + WN4_NETDIAG); + pr_cont(" if_port: %d, sw_info: %4.4x\n", dev->if_port, sw_info); + if (el3_debug > 3) + pr_debug("%s: 3c5x9 net diag word is now: %4.4x.\n", dev->name, net_diag); + /* Enable link beat and jabber check. */ + outw(inw(ioaddr + WN4_MEDIA) | MEDIA_TP, ioaddr + WN4_MEDIA); + } + + /* Switch to the stats window, and clear all stats by reading. */ + outw(StatsDisable, ioaddr + EL3_CMD); + EL3WINDOW(6); + for (i = 0; i < 9; i++) + inb(ioaddr + i); + inw(ioaddr + 10); + inw(ioaddr + 12); + + /* Switch to register set 1 for normal use. */ + EL3WINDOW(1); + + /* Accept b-case and phys addr only. */ + outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); + outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */ + + outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */ + outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */ + /* Allow status bits to be seen. */ + outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD); + /* Ack all pending events, and set active indicator mask. */ + outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq, + ioaddr + EL3_CMD); + outw(SetIntrEnb | IntLatch|TxAvailable|TxComplete|RxComplete|StatsFull, + ioaddr + EL3_CMD); + + netif_start_queue(dev); +} + +/* Power Management support functions */ +#ifdef CONFIG_PM + +static int +el3_suspend(struct device *pdev, pm_message_t state) +{ + unsigned long flags; + struct net_device *dev; + struct el3_private *lp; + int ioaddr; + + dev = dev_get_drvdata(pdev); + lp = netdev_priv(dev); + ioaddr = dev->base_addr; + + spin_lock_irqsave(&lp->lock, flags); + + if (netif_running(dev)) + netif_device_detach(dev); + + el3_down(dev); + outw(PowerDown, ioaddr + EL3_CMD); + + spin_unlock_irqrestore(&lp->lock, flags); + return 0; +} + +static int +el3_resume(struct device *pdev) +{ + unsigned long flags; + struct net_device *dev; + struct el3_private *lp; + int ioaddr; + + dev = dev_get_drvdata(pdev); + lp = netdev_priv(dev); + ioaddr = dev->base_addr; + + spin_lock_irqsave(&lp->lock, flags); + + outw(PowerUp, ioaddr + EL3_CMD); + EL3WINDOW(0); + el3_up(dev); + + if (netif_running(dev)) + netif_device_attach(dev); + + spin_unlock_irqrestore(&lp->lock, flags); + return 0; +} + +#endif /* CONFIG_PM */ + +module_param(debug,int, 0); +module_param_hw_array(irq, int, irq, NULL, 0); +module_param(max_interrupt_work, int, 0); +MODULE_PARM_DESC(debug, "debug level (0-6)"); +MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)"); +MODULE_PARM_DESC(max_interrupt_work, "maximum events handled per interrupt"); +#ifdef CONFIG_PNP +module_param(nopnp, int, 0); +MODULE_PARM_DESC(nopnp, "disable ISA PnP support (0-1)"); +#endif /* CONFIG_PNP */ +MODULE_DESCRIPTION("3Com Etherlink III (3c509, 3c509B, 3c529, 3c579) ethernet driver"); +MODULE_LICENSE("GPL"); + +static int __init el3_init_module(void) +{ + int ret = 0; + + if (debug >= 0) + el3_debug = debug; + +#ifdef CONFIG_PNP + if (!nopnp) { + ret = pnp_register_driver(&el3_pnp_driver); + if (!ret) + pnp_registered = 1; + } +#endif + /* Select an open I/O location at 0x1*0 to do ISA contention select. */ + /* Start with 0x110 to avoid some sound cards.*/ + for (id_port = 0x110 ; id_port < 0x200; id_port += 0x10) { + if (!request_region(id_port, 1, "3c509-control")) + continue; + outb(0x00, id_port); + outb(0xff, id_port); + if (inb(id_port) & 0x01) + break; + else + release_region(id_port, 1); + } + if (id_port >= 0x200) { + id_port = 0; + pr_err("No I/O port available for 3c509 activation.\n"); + } else { + ret = isa_register_driver(&el3_isa_driver, EL3_MAX_CARDS); + if (!ret) + isa_registered = 1; + } +#ifdef CONFIG_EISA + ret = eisa_driver_register(&el3_eisa_driver); + if (!ret) + eisa_registered = 1; +#endif + +#ifdef CONFIG_PNP + if (pnp_registered) + ret = 0; +#endif + if (isa_registered) + ret = 0; +#ifdef CONFIG_EISA + if (eisa_registered) + ret = 0; +#endif + return ret; +} + +static void __exit el3_cleanup_module(void) +{ +#ifdef CONFIG_PNP + if (pnp_registered) + pnp_unregister_driver(&el3_pnp_driver); +#endif + if (isa_registered) + isa_unregister_driver(&el3_isa_driver); + if (id_port) + release_region(id_port, 1); +#ifdef CONFIG_EISA + if (eisa_registered) + eisa_driver_unregister(&el3_eisa_driver); +#endif +} + +module_init (el3_init_module); +module_exit (el3_cleanup_module); diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index 399cb6c56198..81db16744f94 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -17,6 +17,20 @@ config NET_VENDOR_3COM if NET_VENDOR_3COM +config EL3 + tristate "3c509/3c579 \"EtherLink III\" support" + depends on (ISA || EISA) + help + If you have a network (Ethernet) card belonging to the 3Com + EtherLinkIII series, say Y here. + + If your card is not working you may need to use the DOS + setup disk to disable Plug & Play mode, and to select the default + media type. + + To compile this driver as a module, choose M here. The module + will be called 3c509. + config VORTEX tristate "3c590/3c900 series (592/595/597) \"Vortex/Boomerang\" support" depends on (PCI || EISA) && HAS_IOPORT_MAP diff --git a/drivers/net/ethernet/3com/Makefile b/drivers/net/ethernet/3com/Makefile index 5c4d07f1d456..2c65e472196f 100644 --- a/drivers/net/ethernet/3com/Makefile +++ b/drivers/net/ethernet/3com/Makefile @@ -3,5 +3,6 @@ # Makefile for the 3Com Ethernet device drivers # +obj-$(CONFIG_EL3) += 3c509.o obj-$(CONFIG_VORTEX) += 3c59x.o obj-$(CONFIG_TYPHOON) += typhoon.o -- cgit v1.2.3 From 029a6b3a14bf02e6f59ce6ecd10f9d003334c612 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 20 May 2026 12:18:53 +0100 Subject: ethernet: 3c509: Fix AUI transceiver type selection The transceiver type is held in bits 15:14 of the Address Configuration Register, with the values of 0b00, 0b01, and 0b11 denoting TP, AUI, and BNC types respectively. Therefore switching from BNC to AUI requires bits to be cleared before setting bit 14 or the setting won't change. NB this has always been wrong ever since this code was added in 2.5.42. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2605201205160.1450@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c509.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index fb68339e1511..67b9a3f4de5e 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -1099,6 +1099,7 @@ el3_netdev_set_ecmd(struct net_device *dev, dev->if_port = 0; break; case PORT_AUI: + tmp &= ~(3<<14); tmp |= (1<<14); dev->if_port = 1; break; -- cgit v1.2.3 From 240117bb51b95ce93ec28c7c9439c9a87d7b120c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 20 May 2026 12:18:57 +0100 Subject: ethernet: 3c509: Add GPL 2.0 SPDX license identifier This driver has landed with Linux 0.99.13k, which was covered by the GNU General Public License version 2, and no further conditions as to licensing terms have been specified within the copyright notice included with the driver itself. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2605201206370.1450@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c509.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 67b9a3f4de5e..6ebd3358e31b 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */ /* Written 1993-2000 by Donald Becker. -- cgit v1.2.3 From 75756cb4b2aa148816b32d7d40c1f79f9a11eef6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 20 May 2026 12:19:02 +0100 Subject: ethernet: 3c509: Update documentation to match MAINTAINERS There has been apparently a single message only ever publicly posted by David Ruggiero, back in 2002, which added this documentation piece among others, and MAINTAINERS was never updated accordingly. It is therefore doubtful that his maintainer status has actually come into effect. Just replace the reference then so as not to confuse people. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2605201207380.1450@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- Documentation/networking/device_drivers/ethernet/3com/3c509.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/ethernet/3com/3c509.rst b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst index 47f706bacdd9..a8c5e5e6841d 100644 --- a/Documentation/networking/device_drivers/ethernet/3com/3c509.rst +++ b/Documentation/networking/device_drivers/ethernet/3com/3c509.rst @@ -12,7 +12,7 @@ release 1.0 28 February 2002 Current maintainer (corrections to): - David Ruggiero + Maciej W. Rozycki Introduction ============ -- cgit v1.2.3 From 014767c709a44b4e0a0bf70ee9101fb73f4e288b Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 20 May 2026 12:19:06 +0100 Subject: ethernet: 3c509: Fix most coding style issues Update the driver for our current coding style according to output from `checkpatch.pl' and manual code review, where no change to binary code results, as indicated by `objdump -dr'. Exceptions are as follows: - incomplete reverse xmas tree in set_multicast_list(), as that would change binary output, - referring el3_start_xmit() verbatim rather than via `__func__' with pr_debug(), likewise, - a bunch of pr_cont() calls, likewise, - a long udelay() call in el3_netdev_set_ecmd() made under a spinlock, likewise plus it's not eligible for conversion to a sleep in the first place, - a blank line at the start of a block in el3_interrupt(), to improve readability where the first statement would otherwise visually merge with the controlling expression of the enclosing `while' statement. These issues are benign and depending on circumstances may be adressed with suitable code refactoring later on. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2605201208280.1450@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c509.c | 845 +++++++++++++++++++++----------------- 1 file changed, 469 insertions(+), 376 deletions(-) diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 6ebd3358e31b..f23be7425daf 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -1,94 +1,99 @@ // SPDX-License-Identifier: GPL-2.0 /* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */ /* - Written 1993-2000 by Donald Becker. - - Copyright 1994-2000 by Donald Becker. - Copyright 1993 United States Government as represented by the - Director, National Security Agency. This software may be used and - distributed according to the terms of the GNU General Public License, - incorporated herein by reference. - - This driver is for the 3Com EtherLinkIII series. - - The author may be reached as becker@scyld.com, or C/O - Scyld Computing Corporation - 410 Severn Ave., Suite 210 - Annapolis MD 21403 - - Known limitations: - Because of the way 3c509 ISA detection works it's difficult to predict - a priori which of several ISA-mode cards will be detected first. - - This driver does not use predictive interrupt mode, resulting in higher - packet latency but lower overhead. If interrupts are disabled for an - unusually long time it could also result in missed packets, but in - practice this rarely happens. - - - FIXES: - Alan Cox: Removed the 'Unexpected interrupt' bug. - Michael Meskes: Upgraded to Donald Becker's version 1.07. - Alan Cox: Increased the eeprom delay. Regardless of - what the docs say some people definitely - get problems with lower (but in card spec) - delays - v1.10 4/21/97 Fixed module code so that multiple cards may be detected, - other cleanups. -djb - Andrea Arcangeli: Upgraded to Donald Becker's version 1.12. - Rick Payne: Fixed SMP race condition - v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable variable -djb - v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb - v1.15 1/31/98 Faster recovery for Tx errors. -djb - v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb - v1.18 12Mar2001 Andrew Morton - - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz) - - Reviewed against 1.18 from scyld.com - v1.18a 17Nov2001 Jeff Garzik - - ethtool support - v1.18b 1Mar2002 Zwane Mwaikambo - - Power Management support - v1.18c 1Mar2002 David Ruggiero - - Full duplex support - v1.19 16Oct2002 Zwane Mwaikambo - - Additional ethtool features - v1.19a 28Oct2002 Davud Ruggiero - - Increase *read_eeprom udelay to workaround oops with 2 cards. - v1.19b 08Nov2002 Marc Zyngier - - Introduce driver model for EISA cards. - v1.20 04Feb2008 Ondrej Zary - - convert to isa_driver and pnp_driver and some cleanups -*/ + * Written 1993-2000 by Donald Becker. + * + * Copyright 1994-2000 by Donald Becker. + * Copyright 1993 United States Government as represented by the + * Director, National Security Agency. This software may be used and + * distributed according to the terms of the GNU General Public License, + * incorporated herein by reference. + * + * This driver is for the 3Com EtherLinkIII series. + * + * The author may be reached as becker@scyld.com, or C/O + * Scyld Computing Corporation + * 410 Severn Ave., Suite 210 + * Annapolis MD 21403 + * + * Known limitations: + * Because of the way 3c509 ISA detection works it's difficult to predict + * a priori which of several ISA-mode cards will be detected first. + * + * This driver does not use predictive interrupt mode, resulting in higher + * packet latency but lower overhead. If interrupts are disabled for an + * unusually long time it could also result in missed packets, but in + * practice this rarely happens. + * + * + * FIXES: + * Alan Cox: Removed the 'Unexpected interrupt' bug. + * Michael Meskes: Upgraded to Donald Becker's version 1.07. + * Alan Cox: Increased the eeprom delay. Regardless of + * what the docs say some people definitely + * get problems with lower (but in card spec) + * delays. + * v1.10 4/21/97 Fixed module code so that multiple cards may be + * detected, other cleanups. -djb + * Andrea Arcangeli: Upgraded to Donald Becker's version 1.12. + * Rick Payne: Fixed SMP race condition. + * v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable + * variable. -djb + * v1.14 10/15/97 Avoided waiting..discard message for fast + * machines. -djb + * v1.15 1/31/98 Faster recovery for Tx errors. -djb + * v1.16 2/3/98 Different ID port handling to avoid sound + * cards. -djb + * v1.18 12Mar2001 Andrew Morton + * - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz) + * - Reviewed against 1.18 from scyld.com + * v1.18a 17Nov2001 Jeff Garzik + * - ethtool support. + * v1.18b 1Mar2002 Zwane Mwaikambo + * - Power Management support. + * v1.18c 1Mar2002 David Ruggiero + * - Full duplex support. + * v1.19 16Oct2002 Zwane Mwaikambo + * - Additional ethtool features. + * v1.19a 28Oct2002 David Ruggiero + * - Increase *read_eeprom udelay to workaround oops with + * 2 cards. + * v1.19b 08Nov2002 Marc Zyngier + * - Introduce driver model for EISA cards. + * v1.20 04Feb2008 Ondrej Zary + * - convert to isa_driver and pnp_driver and some + * cleanups. + */ #define DRV_NAME "3c509" /* A few values that may be tweaked. */ /* Time in jiffies before concluding the transmitter is hung. */ -#define TX_TIMEOUT (400*HZ/1000) +#define TX_TIMEOUT (400 * HZ / 1000) -#include -#include -#include -#include -#include +#include +#include /* for udelay() */ +#include +#include #include +#include +#include #include -#include #include +#include +#include +#include +#include +#include #include -#include #include +#include #include -#include /* for udelay() */ #include -#include -#include -#include -#include - +#include #include -#include + #include #ifdef EL3_DEBUG @@ -98,14 +103,15 @@ static int el3_debug = 2; #endif /* Used to do a global count of all the cards in the system. Must be - * a global variable so that the eisa probe routines can increment - * it */ -static int el3_cards = 0; + * a global variable so that the eisa probe routines can increment it. + */ +static int el3_cards; #define EL3_MAX_CARDS 8 /* To minimize the size of the driver source I only define operating - constants if they are used several times. You'll need the manual - anyway if you want to understand driver details. */ + * constants if they are used several times. You'll need the manual + * anyway if you want to understand driver details. + */ /* Offsets from base I/O address. */ #define EL3_DATA 0x00 #define EL3_CMD 0x0e @@ -114,60 +120,90 @@ static int el3_cards = 0; #define EL3_IO_EXTENT 16 -#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD) - +#define EL3WINDOW(win_num) outw(SELECT_WINDOW + (win_num), ioaddr + EL3_CMD) /* The top five bits written to EL3_CMD are a command, the lower - 11 bits are the parameter, if applicable. */ + * 11 bits are the parameter, if applicable. + */ enum c509cmd { - TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11, - RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, RxDiscard = 8<<11, - TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11, - FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11, - SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11, - SetTxThreshold = 18<<11, SetTxStart = 19<<11, StatsEnable = 21<<11, - StatsDisable = 22<<11, StopCoax = 23<<11, PowerUp = 27<<11, - PowerDown = 28<<11, PowerAuto = 29<<11}; + TOTAL_RESET = 0 << 11, + SELECT_WINDOW = 1 << 11, + START_COAX = 2 << 11, + RX_DISABLE = 3 << 11, + RX_ENABLE = 4 << 11, + RX_RESET = 5 << 11, + RX_DISCARD = 8 << 11, + TX_ENABLE = 9 << 11, + TX_DISABLE = 10 << 11, + TX_RESET = 11 << 11, + FAKE_INTR = 12 << 11, + ACK_INTR = 13 << 11, + SET_INTR_ENB = 14 << 11, + SET_STATUS_ENB = 15 << 11, + SET_RX_FILTER = 16 << 11, + SET_RX_THRESHOLD = 17 << 11, + SET_TX_THRESHOLD = 18 << 11, + SET_TX_START = 19 << 11, + STATS_ENABLE = 21 << 11, + STATS_DISABLE = 22 << 11, + STOP_COAX = 23 << 11, + POWER_UP = 27 << 11, + POWER_DOWN = 28 << 11, + POWER_AUTO = 29 << 11, +}; enum c509status { - IntLatch = 0x0001, AdapterFailure = 0x0002, TxComplete = 0x0004, - TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020, - IntReq = 0x0040, StatsFull = 0x0080, CmdBusy = 0x1000, }; + INT_LATCH = 0x0001, + ADAPTER_FAILURE = 0x0002, + TX_COMPLETE = 0x0004, + TX_AVAILABLE = 0x0008, + RX_COMPLETE = 0x0010, + RX_EARLY = 0x0020, + INT_REQ = 0x0040, + STATS_FULL = 0x0080, + CMD_BUSY = 0x1000, +}; -/* The SetRxFilter command accepts the following classes: */ -enum RxFilter { - RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 }; +/* The SET_RX_FILTER command accepts the following classes: */ +enum rx_filter { + RX_STATION = 1, + RX_MULTICAST = 2, + RX_BROADCAST = 4, + RX_PROM = 8, +}; /* Register window 1 offsets, the window used in normal operation. */ #define TX_FIFO 0x00 #define RX_FIFO 0x00 -#define RX_STATUS 0x08 -#define TX_STATUS 0x0B -#define TX_FREE 0x0C /* Remaining free bytes in Tx buffer. */ - -#define WN0_CONF_CTRL 0x04 /* Window 0: Configuration control register */ -#define WN0_ADDR_CONF 0x06 /* Window 0: Address configuration register */ -#define WN0_IRQ 0x08 /* Window 0: Set IRQ line in bits 12-15. */ -#define WN4_MEDIA 0x0A /* Window 4: Various transcvr/media bits. */ -#define MEDIA_TP 0x00C0 /* Enable link beat and jabber for 10baseT. */ -#define WN4_NETDIAG 0x06 /* Window 4: Net diagnostic */ -#define FD_ENABLE 0x8000 /* Enable full-duplex ("external loopback") */ +#define RX_STATUS 0x08 +#define TX_STATUS 0x0B +#define TX_FREE 0x0C /* Remaining free bytes in Tx buffer. */ + +#define WN0_CONF_CTRL 0x04 /* Window 0: Configuration control register. */ +#define WN0_ADDR_CONF 0x06 /* Window 0: Address configuration register. */ +#define WN0_IRQ 0x08 /* Window 0: Set IRQ line in bits 12-15. */ +#define WN4_MEDIA 0x0A /* Window 4: Various transcvr/media bits. */ +#define MEDIA_TP 0x00C0 /* Enable link beat and jabber for 10baseT. */ +#define WN4_NETDIAG 0x06 /* Window 4: Net diagnostic. */ +#define FD_ENABLE 0x8000 /* Enable full-duplex ("external loopback"). */ /* * Must be a power of two (we use a binary and in the - * circular queue) + * circular queue). */ #define SKB_QUEUE_SIZE 64 enum el3_cardtype { EL3_ISA, EL3_PNP, EL3_EISA }; struct el3_private { + /* for device access */ spinlock_t lock; /* skb send-queue */ int head, size; struct sk_buff *queue[SKB_QUEUE_SIZE]; enum el3_cardtype type; }; + static int id_port; static int current_tag; static struct net_device *el3_devs[EL3_MAX_CARDS]; @@ -193,7 +229,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static void el3_down(struct net_device *dev); static void el3_up(struct net_device *dev); static const struct ethtool_ops ethtool_ops; @@ -205,24 +241,23 @@ static int el3_resume(struct device *); #define el3_resume NULL #endif - -/* generic device remove for all device types */ -static int el3_device_remove (struct device *device); +/* Generic device remove for all device types. */ +static int el3_device_remove(struct device *device); #ifdef CONFIG_NET_POLL_CONTROLLER static void el3_poll_controller(struct net_device *dev); #endif -/* Return 0 on success, 1 on error, 2 when found already detected PnP card */ +/* Return 0 on success, 1 on error, 2 when found already detected PnP card. */ static int el3_isa_id_sequence(__be16 *phys_addr) { short lrs_state = 0xff; int i; /* ISA boards are detected by sending the ID sequence to the - ID_PORT. We find cards past the first by setting the 'current_tag' - on cards as they are found. Cards with their tag set will not - respond to subsequent ID sequences. */ - + * ID_PORT. We find cards past the first by setting the 'current_tag' + * on cards as they are found. Cards with their tag set will not + * respond to subsequent ID sequences. + */ outb(0x00, id_port); outb(0x00, id_port); for (i = 0; i < 255; i++) { @@ -238,24 +273,33 @@ static int el3_isa_id_sequence(__be16 *phys_addr) if (id_read_eeprom(7) != 0x6d50) return 1; /* Read in EEPROM data, which does contention-select. - Only the lowest address board will stay "on-line". - 3Com got the byte order backwards. */ + * Only the lowest address board will stay "on-line". + * 3Com got the byte order backwards. + */ for (i = 0; i < 3; i++) phys_addr[i] = htons(id_read_eeprom(i)); #ifdef CONFIG_PNP if (!nopnp) { /* The ISA PnP 3c509 cards respond to the ID sequence too. - This check is needed in order not to register them twice. */ + * This check is needed in order not to register them twice. + */ for (i = 0; i < el3_cards; i++) { struct el3_private *lp = netdev_priv(el3_devs[i]); + if (lp->type == EL3_PNP && - ether_addr_equal((u8 *)phys_addr, el3_devs[i]->dev_addr)) { + ether_addr_equal((u8 *)phys_addr, + el3_devs[i]->dev_addr)) { if (el3_debug > 3) pr_debug("3c509 with address %02x %02x %02x %02x %02x %02x was found by ISAPnP\n", - phys_addr[0] & 0xff, phys_addr[0] >> 8, - phys_addr[1] & 0xff, phys_addr[1] >> 8, - phys_addr[2] & 0xff, phys_addr[2] >> 8); - /* Set the adaptor tag so that the next card can be found. */ + phys_addr[0] & 0xff, + phys_addr[0] >> 8, + phys_addr[1] & 0xff, + phys_addr[1] >> 8, + phys_addr[2] & 0xff, + phys_addr[2] >> 8); + /* Set the adaptor tag so that the next card + * can be found. + */ outb(0xd0 + ++current_tag, id_port); return 2; } @@ -263,7 +307,6 @@ static int el3_isa_id_sequence(__be16 *phys_addr) } #endif /* CONFIG_PNP */ return 0; - } static void el3_dev_fill(struct net_device *dev, __be16 *phys_addr, int ioaddr, @@ -280,8 +323,8 @@ static void el3_dev_fill(struct net_device *dev, __be16 *phys_addr, int ioaddr, static int el3_isa_match(struct device *pdev, unsigned int ndev) { - struct net_device *dev; int ioaddr, isa_irq, if_port, err; + struct net_device *dev; unsigned int iobase; __be16 phys_addr[3]; @@ -335,8 +378,7 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev) return 1; } -static void el3_isa_remove(struct device *pdev, - unsigned int ndev) +static void el3_isa_remove(struct device *pdev, unsigned int ndev) { el3_device_remove(pdev); dev_set_drvdata(pdev, NULL); @@ -384,6 +426,7 @@ static struct isa_driver el3_isa_driver = { .name = "3c509" }, }; + static int isa_registered; #ifdef CONFIG_PNP @@ -401,10 +444,10 @@ MODULE_DEVICE_TABLE(pnp, el3_pnp_ids); static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id) { - short i; + struct net_device *dev = NULL; int ioaddr, irq, if_port; __be16 phys_addr[3]; - struct net_device *dev = NULL; + short i; int err; ioaddr = pnp_port_start(pdev, 0); @@ -464,6 +507,7 @@ static struct pnp_driver el3_pnp_driver = { .resume = el3_pnp_resume, #endif }; + static int pnp_registered; #endif /* CONFIG_PNP */ @@ -480,7 +524,7 @@ static const struct eisa_device_id el3_eisa_ids[] = { }; MODULE_DEVICE_TABLE(eisa, el3_eisa_ids); -static int el3_eisa_probe (struct device *device); +static int el3_eisa_probe(struct device *device); static struct eisa_driver el3_eisa_driver = { .id_table = el3_eisa_ids, @@ -492,17 +536,18 @@ static struct eisa_driver el3_eisa_driver = { .resume = el3_resume, } }; + static int eisa_registered; #endif static const struct net_device_ops netdev_ops = { - .ndo_open = el3_open, - .ndo_stop = el3_close, - .ndo_start_xmit = el3_start_xmit, - .ndo_get_stats = el3_get_stats, + .ndo_open = el3_open, + .ndo_stop = el3_close, + .ndo_start_xmit = el3_start_xmit, + .ndo_get_stats = el3_get_stats, .ndo_set_rx_mode = set_multicast_list, - .ndo_tx_timeout = el3_tx_timeout, - .ndo_set_mac_address = eth_mac_addr, + .ndo_tx_timeout = el3_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = el3_poll_controller, @@ -511,11 +556,11 @@ static const struct net_device_ops netdev_ops = { static int el3_common_init(struct net_device *dev) { - struct el3_private *lp = netdev_priv(dev); - int err; - static const char * const if_names[] = { + static const char *const if_names[] = { "10baseT", "AUI", "undefined", "BNC" }; + struct el3_private *lp = netdev_priv(dev); + int err; spin_lock_init(&lp->lock); @@ -534,56 +579,55 @@ static int el3_common_init(struct net_device *dev) err = register_netdev(dev); if (err) { pr_err("Failed to register 3c5x9 at %#3.3lx, IRQ %d.\n", - dev->base_addr, dev->irq); + dev->base_addr, dev->irq); release_region(dev->base_addr, EL3_IO_EXTENT); return err; } pr_info("%s: 3c5x9 found at %#3.3lx, %s port, address %pM, IRQ %d.\n", - dev->name, dev->base_addr, if_names[(dev->if_port & 0x03)], - dev->dev_addr, dev->irq); + dev->name, dev->base_addr, if_names[(dev->if_port & 0x03)], + dev->dev_addr, dev->irq); return 0; - } -static void el3_common_remove (struct net_device *dev) +static void el3_common_remove(struct net_device *dev) { - unregister_netdev (dev); + unregister_netdev(dev); release_region(dev->base_addr, EL3_IO_EXTENT); - free_netdev (dev); + free_netdev(dev); } #ifdef CONFIG_EISA static int el3_eisa_probe(struct device *device) { - short i; - int ioaddr, irq, if_port; - __be16 phys_addr[3]; struct net_device *dev = NULL; struct eisa_device *edev; + int ioaddr, irq, if_port; + __be16 phys_addr[3]; + short i; int err; /* Yeepee, The driver framework is calling us ! */ - edev = to_eisa_device (device); + edev = to_eisa_device(device); ioaddr = edev->base_addr; if (!request_region(ioaddr, EL3_IO_EXTENT, "3c579-eisa")) return -EBUSY; /* Change the register set to the configuration window 0. */ - outw(SelectWindow | 0, ioaddr + 0xC80 + EL3_CMD); + outw(SELECT_WINDOW | 0, ioaddr + 0xC80 + EL3_CMD); irq = inw(ioaddr + WN0_IRQ) >> 12; - if_port = inw(ioaddr + 6)>>14; + if_port = inw(ioaddr + 6) >> 14; for (i = 0; i < 3; i++) phys_addr[i] = htons(read_eeprom(ioaddr, i)); /* Restore the "Product ID" to the EEPROM read register. */ read_eeprom(ioaddr, 3); - dev = alloc_etherdev(sizeof (struct el3_private)); - if (dev == NULL) { + dev = alloc_etherdev(sizeof(struct el3_private)); + if (!dev) { release_region(ioaddr, EL3_IO_EXTENT); return -ENOMEM; } @@ -591,11 +635,11 @@ static int el3_eisa_probe(struct device *device) SET_NETDEV_DEV(dev, device); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA); - eisa_set_drvdata (edev, dev); + eisa_set_drvdata(edev, dev); err = el3_common_init(dev); if (err) { - eisa_set_drvdata (edev, NULL); + eisa_set_drvdata(edev, NULL); free_netdev(dev); return err; } @@ -607,25 +651,27 @@ static int el3_eisa_probe(struct device *device) /* This remove works for all device types. * - * The net dev must be stored in the driver data field */ + * The net dev must be stored in the driver data field. + */ static int el3_device_remove(struct device *device) { struct net_device *dev; dev = dev_get_drvdata(device); - el3_common_remove (dev); + el3_common_remove(dev); return 0; } /* Read a word from the EEPROM using the regular EEPROM access register. - Assume that we are in register window zero. + * Assume that we are in register window zero. */ static ushort read_eeprom(int ioaddr, int index) { outw(EEPROM_READ + index, ioaddr + 10); - /* Pause for at least 162 us. for the read to take place. - Some chips seem to require much longer */ + /* Pause for at least 162 us for the read to take place. + * Some chips seem to require much longer. + */ mdelay(2); return inw(ioaddr + 12); } @@ -635,12 +681,14 @@ static ushort id_read_eeprom(int index) { int bit, word = 0; - /* Issue read command, and pause for at least 162 us. for it to complete. - Assume extra-fast 16Mhz bus. */ + /* Issue read command, and pause for at least 162 us for it to + * complete. Assume extra-fast 16MHz bus. + */ outb(EEPROM_READ + index, id_port); - /* Pause for at least 162 us. for the read to take place. */ - /* Some chips seem to require much longer */ + /* Pause for at least 162 us for the read to take place. + * Some chips seem to require much longer. + */ mdelay(4); for (bit = 15; bit >= 0; bit--) @@ -652,16 +700,14 @@ static ushort id_read_eeprom(int index) return word; } - -static int -el3_open(struct net_device *dev) +static int el3_open(struct net_device *dev) { int ioaddr = dev->base_addr; int i; - outw(TxReset, ioaddr + EL3_CMD); - outw(RxReset, ioaddr + EL3_CMD); - outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD); + outw(TX_RESET, ioaddr + EL3_CMD); + outw(RX_RESET, ioaddr + EL3_CMD); + outw(SET_STATUS_ENB | 0x00, ioaddr + EL3_CMD); i = request_irq(dev->irq, el3_interrupt, 0, dev->name, dev); if (i) @@ -669,20 +715,20 @@ el3_open(struct net_device *dev) EL3WINDOW(0); if (el3_debug > 3) - pr_debug("%s: Opening, IRQ %d status@%x %4.4x.\n", dev->name, - dev->irq, ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS)); + pr_debug("%s: Opening, IRQ %d status@%x %4.4x.\n", + dev->name, dev->irq, + ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS)); el3_up(dev); if (el3_debug > 3) pr_debug("%s: Opened 3c509 IRQ %d status %4.4x.\n", - dev->name, dev->irq, inw(ioaddr + EL3_STATUS)); + dev->name, dev->irq, inw(ioaddr + EL3_STATUS)); return 0; } -static void -el3_tx_timeout (struct net_device *dev, unsigned int txqueue) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { int ioaddr = dev->base_addr; @@ -693,33 +739,31 @@ el3_tx_timeout (struct net_device *dev, unsigned int txqueue) dev->stats.tx_errors++; netif_trans_update(dev); /* prevent tx timeout */ /* Issue TX_RESET and TX_START commands. */ - outw(TxReset, ioaddr + EL3_CMD); - outw(TxEnable, ioaddr + EL3_CMD); + outw(TX_RESET, ioaddr + EL3_CMD); + outw(TX_ENABLE, ioaddr + EL3_CMD); netif_wake_queue(dev); } - -static netdev_tx_t -el3_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); int ioaddr = dev->base_addr; unsigned long flags; - netif_stop_queue (dev); + netif_stop_queue(dev); dev->stats.tx_bytes += skb->len; if (el3_debug > 4) { pr_debug("%s: el3_start_xmit(length = %u) called, status %4.4x.\n", - dev->name, skb->len, inw(ioaddr + EL3_STATUS)); + dev->name, skb->len, inw(ioaddr + EL3_STATUS)); } /* * We lock the driver against other processors. Note * we don't need to lock versus the IRQ as we suspended * that. This means that we lose the ability to take * an RX during a TX upload. That sucks a bit with SMP - * on an original 3c509 (2K buffer) + * on an original 3c509 (2K buffer). * * Using disable_irq stops us crapping on other * time sensitive devices. @@ -733,39 +777,43 @@ el3_start_xmit(struct sk_buff *skb, struct net_device *dev) /* ... and the packet rounded to a doubleword. */ outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2); - if (inw(ioaddr + TX_FREE) > 1536) + if (inw(ioaddr + TX_FREE) > 1536) { netif_start_queue(dev); - else + } else { /* Interrupt us when the FIFO has room for max-sized packet. */ - outw(SetTxThreshold + 1536, ioaddr + EL3_CMD); + outw(SET_TX_THRESHOLD + 1536, ioaddr + EL3_CMD); + } spin_unlock_irqrestore(&lp->lock, flags); - dev_consume_skb_any (skb); + dev_consume_skb_any(skb); /* Clear the Tx status stack. */ { short tx_status; int i = 4; - while (--i > 0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { - if (tx_status & 0x38) dev->stats.tx_aborted_errors++; - if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); - if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); - outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ + while (--i > 0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { + if (tx_status & 0x38) + dev->stats.tx_aborted_errors++; + if (tx_status & 0x30) + outw(TX_RESET, ioaddr + EL3_CMD); + if (tx_status & 0x3C) + outw(TX_ENABLE, ioaddr + EL3_CMD); + /* Pop the status stack. */ + outb(0x00, ioaddr + TX_STATUS); } } return NETDEV_TX_OK; } /* The EL3 interrupt handler. */ -static irqreturn_t -el3_interrupt(int irq, void *dev_id) +static irqreturn_t el3_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; + int i = max_interrupt_work; struct el3_private *lp; int ioaddr, status; - int i = max_interrupt_work; lp = netdev_priv(dev); spin_lock(&lp->lock); @@ -778,70 +826,89 @@ el3_interrupt(int irq, void *dev_id) } while ((status = inw(ioaddr + EL3_STATUS)) & - (IntLatch | RxComplete | StatsFull)) { + (INT_LATCH | RX_COMPLETE | STATS_FULL)) { - if (status & RxComplete) + if (status & RX_COMPLETE) el3_rx(dev); - if (status & TxAvailable) { + if (status & TX_AVAILABLE) { if (el3_debug > 5) pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ - outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); - netif_wake_queue (dev); + outw(ACK_INTR | TX_AVAILABLE, ioaddr + EL3_CMD); + netif_wake_queue(dev); } - if (status & (AdapterFailure | RxEarly | StatsFull | TxComplete)) { + if (status & + (ADAPTER_FAILURE | RX_EARLY | STATS_FULL | TX_COMPLETE)) { /* Handle all uncommon interrupts. */ - if (status & StatsFull) /* Empty statistics. */ + if (status & STATS_FULL) { + /* Empty statistics. */ update_stats(dev); - if (status & RxEarly) { /* Rx early is unused. */ + } + if (status & RX_EARLY) { + /* Rx early is unused. */ el3_rx(dev); - outw(AckIntr | RxEarly, ioaddr + EL3_CMD); + outw(ACK_INTR | RX_EARLY, ioaddr + EL3_CMD); } - if (status & TxComplete) { /* Really Tx error. */ + if (status & TX_COMPLETE) { + /* Really Tx error. */ short tx_status; int i = 4; - while (--i>0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { - if (tx_status & 0x38) dev->stats.tx_aborted_errors++; - if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); - if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); - outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ + while (--i > 0 && + ((tx_status = inb(ioaddr + TX_STATUS)) + > 0)) { + if (tx_status & 0x38) + dev->stats.tx_aborted_errors++; + if (tx_status & 0x30) + outw(TX_RESET, + ioaddr + EL3_CMD); + if (tx_status & 0x3C) + outw(TX_ENABLE, + ioaddr + EL3_CMD); + /* Pop the status stack. */ + outb(0x00, ioaddr + TX_STATUS); } } - if (status & AdapterFailure) { - /* Adapter failure requires Rx reset and reinit. */ - outw(RxReset, ioaddr + EL3_CMD); + if (status & ADAPTER_FAILURE) { + /* Adapter failure requires Rx reset + * and reinit. + */ + outw(RX_RESET, ioaddr + EL3_CMD); /* Set the Rx filter to the current state. */ - outw(SetRxFilter | RxStation | RxBroadcast - | (dev->flags & IFF_ALLMULTI ? RxMulticast : 0) - | (dev->flags & IFF_PROMISC ? RxProm : 0), - ioaddr + EL3_CMD); - outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */ - outw(AckIntr | AdapterFailure, ioaddr + EL3_CMD); + outw((SET_RX_FILTER | RX_STATION | + RX_BROADCAST | + (dev->flags & IFF_ALLMULTI ? + RX_MULTICAST : 0) | + (dev->flags & IFF_PROMISC ? + RX_PROM : 0)), + ioaddr + EL3_CMD); + /* Re-enable the receiver. */ + outw(RX_ENABLE, ioaddr + EL3_CMD); + outw(ACK_INTR | ADAPTER_FAILURE, + ioaddr + EL3_CMD); } } if (--i < 0) { pr_err("%s: Infinite loop in interrupt, status %4.4x.\n", - dev->name, status); + dev->name, status); /* Clear all interrupts. */ - outw(AckIntr | 0xFF, ioaddr + EL3_CMD); + outw(ACK_INTR | 0xFF, ioaddr + EL3_CMD); break; } /* Acknowledge the IRQ. */ - outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); /* Ack IRQ */ + outw(ACK_INTR | INT_REQ | INT_LATCH, ioaddr + EL3_CMD); } if (el3_debug > 4) { pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name, - inw(ioaddr + EL3_STATUS)); + inw(ioaddr + EL3_STATUS)); } spin_unlock(&lp->lock); return IRQ_HANDLED; } - #ifdef CONFIG_NET_POLL_CONTROLLER /* * Polling receive - used by netconsole and other diagnostic tools @@ -855,28 +922,23 @@ static void el3_poll_controller(struct net_device *dev) } #endif -static struct net_device_stats * -el3_get_stats(struct net_device *dev) +static struct net_device_stats *el3_get_stats(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); unsigned long flags; - /* - * This is fast enough not to bother with disable IRQ - * stuff. - */ - + /* This is fast enough not to bother with disable IRQ stuff. */ spin_lock_irqsave(&lp->lock, flags); update_stats(dev); spin_unlock_irqrestore(&lp->lock, flags); return &dev->stats; } -/* Update statistics. We change to register window 6, so this should be run - single-threaded if the device is active. This is expected to be a rare - operation, and it's simpler for the rest of the driver to assume that - window 1 is always valid rather than use a special window-state variable. - */ +/* Update statistics. We change to register window 6, so this should be run + * single-threaded if the device is active. This is expected to be a rare + * operation, and it's simpler for the rest of the driver to assume that + * window 1 is always valid rather than use a special window-state variable. + */ static void update_stats(struct net_device *dev) { int ioaddr = dev->base_addr; @@ -884,10 +946,10 @@ static void update_stats(struct net_device *dev) if (el3_debug > 5) pr_debug(" Updating the statistics.\n"); /* Turn off statistics updates while reading. */ - outw(StatsDisable, ioaddr + EL3_CMD); + outw(STATS_DISABLE, ioaddr + EL3_CMD); /* Switch to the stats window, and read everything. */ EL3WINDOW(6); - dev->stats.tx_carrier_errors += inb(ioaddr + 0); + dev->stats.tx_carrier_errors += inb(ioaddr + 0); dev->stats.tx_heartbeat_errors += inb(ioaddr + 1); /* Multiple collisions. */ inb(ioaddr + 2); dev->stats.collisions += inb(ioaddr + 3); @@ -901,31 +963,42 @@ static void update_stats(struct net_device *dev) /* Back to window 1, and turn statistics back on. */ EL3WINDOW(1); - outw(StatsEnable, ioaddr + EL3_CMD); + outw(STATS_ENABLE, ioaddr + EL3_CMD); } -static int -el3_rx(struct net_device *dev) +static int el3_rx(struct net_device *dev) { int ioaddr = dev->base_addr; short rx_status; if (el3_debug > 5) pr_debug(" In rx_packet(), status %4.4x, rx_status %4.4x.\n", - inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS)); + inw(ioaddr + EL3_STATUS), inw(ioaddr + RX_STATUS)); while ((rx_status = inw(ioaddr + RX_STATUS)) > 0) { - if (rx_status & 0x4000) { /* Error, update stats. */ + if (rx_status & 0x4000) { + /* Error, update stats. */ short error = rx_status & 0x3800; - outw(RxDiscard, ioaddr + EL3_CMD); + outw(RX_DISCARD, ioaddr + EL3_CMD); dev->stats.rx_errors++; switch (error) { - case 0x0000: dev->stats.rx_over_errors++; break; - case 0x0800: dev->stats.rx_length_errors++; break; - case 0x1000: dev->stats.rx_frame_errors++; break; - case 0x1800: dev->stats.rx_length_errors++; break; - case 0x2000: dev->stats.rx_frame_errors++; break; - case 0x2800: dev->stats.rx_crc_errors++; break; + case 0x0000: + dev->stats.rx_over_errors++; + break; + case 0x0800: + dev->stats.rx_length_errors++; + break; + case 0x1000: + dev->stats.rx_frame_errors++; + break; + case 0x1800: + dev->stats.rx_length_errors++; + break; + case 0x2000: + dev->stats.rx_frame_errors++; + break; + case 0x2800: + dev->stats.rx_crc_errors++; break; } } else { short pkt_len = rx_status & 0x7ff; @@ -934,49 +1007,51 @@ el3_rx(struct net_device *dev) skb = netdev_alloc_skb(dev, pkt_len + 5); if (el3_debug > 4) pr_debug("Receiving packet size %d status %4.4x.\n", - pkt_len, rx_status); - if (skb != NULL) { - skb_reserve(skb, 2); /* Align IP on 16 byte */ - - /* 'skb->data' points to the start of sk_buff data area. */ - insl(ioaddr + RX_FIFO, skb_put(skb,pkt_len), - (pkt_len + 3) >> 2); - - outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */ - skb->protocol = eth_type_trans(skb,dev); + pkt_len, rx_status); + if (skb) { + /* Align IP on 16 byte. */ + skb_reserve(skb, 2); + + /* 'skb->data' points to the start of sk_buff + * data area. + */ + insl(ioaddr + RX_FIFO, skb_put(skb, pkt_len), + (pkt_len + 3) >> 2); + + /* Pop top Rx packet. */ + outw(RX_DISCARD, ioaddr + EL3_CMD); + skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); dev->stats.rx_bytes += pkt_len; dev->stats.rx_packets++; continue; } - outw(RxDiscard, ioaddr + EL3_CMD); + outw(RX_DISCARD, ioaddr + EL3_CMD); dev->stats.rx_dropped++; if (el3_debug) pr_debug("%s: Couldn't allocate a sk_buff of size %d.\n", - dev->name, pkt_len); + dev->name, pkt_len); } - inw(ioaddr + EL3_STATUS); /* Delay. */ + inw(ioaddr + EL3_STATUS); /* Delay. */ while (inw(ioaddr + EL3_STATUS) & 0x1000) pr_debug(" Waiting for 3c509 to discard packet, status %x.\n", - inw(ioaddr + EL3_STATUS) ); + inw(ioaddr + EL3_STATUS)); } return 0; } -/* - * Set or clear the multicast filter for this adaptor. - */ -static void -set_multicast_list(struct net_device *dev) +/* Set or clear the multicast filter for this adaptor. */ +static void set_multicast_list(struct net_device *dev) { - unsigned long flags; struct el3_private *lp = netdev_priv(dev); int ioaddr = dev->base_addr; int mc_count = netdev_mc_count(dev); + unsigned long flags; if (el3_debug > 1) { static int old; + if (old != mc_count) { old = mc_count; pr_debug("%s: Setting Rx mode to %d addresses.\n", @@ -984,23 +1059,24 @@ set_multicast_list(struct net_device *dev) } } spin_lock_irqsave(&lp->lock, flags); - if (dev->flags&IFF_PROMISC) { - outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm, - ioaddr + EL3_CMD); - } - else if (mc_count || (dev->flags&IFF_ALLMULTI)) { - outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast, ioaddr + EL3_CMD); + if (dev->flags & IFF_PROMISC) { + outw((SET_RX_FILTER | RX_STATION | RX_MULTICAST | + RX_BROADCAST | RX_PROM), + ioaddr + EL3_CMD); + } else if (mc_count || (dev->flags & IFF_ALLMULTI)) { + outw(SET_RX_FILTER | RX_STATION | RX_MULTICAST | RX_BROADCAST, + ioaddr + EL3_CMD); + } else { + outw(SET_RX_FILTER | RX_STATION | RX_BROADCAST, + ioaddr + EL3_CMD); } - else - outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); spin_unlock_irqrestore(&lp->lock, flags); } -static int -el3_close(struct net_device *dev) +static int el3_close(struct net_device *dev) { - int ioaddr = dev->base_addr; struct el3_private *lp = netdev_priv(dev); + int ioaddr = dev->base_addr; if (el3_debug > 2) pr_debug("%s: Shutting down ethercard.\n", dev->name); @@ -1013,15 +1089,15 @@ el3_close(struct net_device *dev) if (lp->type != EL3_EISA) { /* But we explicitly zero the IRQ line select anyway. Don't do * it on EISA cards, it prevents the module from getting an - * IRQ after unload+reload... */ + * IRQ after unload+reload... + */ outw(0x0f00, ioaddr + WN0_IRQ); } return 0; } -static int -el3_link_ok(struct net_device *dev) +static int el3_link_ok(struct net_device *dev) { int ioaddr = dev->base_addr; u16 tmp; @@ -1029,18 +1105,18 @@ el3_link_ok(struct net_device *dev) EL3WINDOW(4); tmp = inw(ioaddr + WN4_MEDIA); EL3WINDOW(1); - return tmp & (1<<11); + return tmp & (1 << 11); } -static void -el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd) +static void el3_netdev_get_ecmd(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { - u16 tmp; int ioaddr = dev->base_addr; u32 supported; + u16 tmp; EL3WINDOW(0); - /* obtain current transceiver via WN4_MEDIA? */ + /* Obtain current transceiver via WN4_MEDIA? */ tmp = inw(ioaddr + WN0_ADDR_CONF); switch (tmp >> 14) { case 0: @@ -1059,13 +1135,13 @@ el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd) cmd->base.duplex = DUPLEX_HALF; supported = 0; tmp = inw(ioaddr + WN0_CONF_CTRL); - if (tmp & (1<<13)) + if (tmp & (1 << 13)) supported |= SUPPORTED_AUI; - if (tmp & (1<<12)) + if (tmp & (1 << 12)) supported |= SUPPORTED_BNC; - if (tmp & (1<<9)) { + if (tmp & (1 << 9)) { supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full; /* hmm... */ + SUPPORTED_10baseT_Full; /* hmm... */ EL3WINDOW(4); tmp = inw(ioaddr + WN4_NETDIAG); if (tmp & FD_ENABLE) @@ -1078,17 +1154,15 @@ el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd) EL3WINDOW(1); } -static int -el3_netdev_set_ecmd(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int el3_netdev_set_ecmd(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { - u16 tmp; int ioaddr = dev->base_addr; + u16 tmp; if (cmd->base.speed != SPEED_10) return -EINVAL; - if ((cmd->base.duplex != DUPLEX_HALF) && - (cmd->base.duplex != DUPLEX_FULL)) + if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL) return -EINVAL; /* change XCVR type */ @@ -1096,16 +1170,16 @@ el3_netdev_set_ecmd(struct net_device *dev, tmp = inw(ioaddr + WN0_ADDR_CONF); switch (cmd->base.port) { case PORT_TP: - tmp &= ~(3<<14); + tmp &= ~(3 << 14); dev->if_port = 0; break; case PORT_AUI: - tmp &= ~(3<<14); - tmp |= (1<<14); + tmp &= ~(3 << 14); + tmp |= 1 << 14; dev->if_port = 1; break; case PORT_BNC: - tmp |= (3<<14); + tmp |= 3 << 14; dev->if_port = 3; break; default: @@ -1114,13 +1188,14 @@ el3_netdev_set_ecmd(struct net_device *dev, outw(tmp, ioaddr + WN0_ADDR_CONF); if (dev->if_port == 3) { - /* fire up the DC-DC convertor if BNC gets enabled */ + /* Fire up the DC-DC converter if BNC gets enabled. */ tmp = inw(ioaddr + WN0_ADDR_CONF); if (tmp & (3 << 14)) { - outw(StartCoax, ioaddr + EL3_CMD); + outw(START_COAX, ioaddr + EL3_CMD); udelay(800); - } else + } else { return -EIO; + } } EL3WINDOW(4); @@ -1135,7 +1210,8 @@ el3_netdev_set_ecmd(struct net_device *dev, return 0; } -static void el3_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +static void el3_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) { strscpy(info->driver, DRV_NAME, sizeof(info->driver)); } @@ -1193,41 +1269,41 @@ static const struct ethtool_ops ethtool_ops = { .set_link_ksettings = el3_set_link_ksettings, }; -static void -el3_down(struct net_device *dev) +static void el3_down(struct net_device *dev) { int ioaddr = dev->base_addr; netif_stop_queue(dev); /* Turn off statistics ASAP. We update lp->stats below. */ - outw(StatsDisable, ioaddr + EL3_CMD); + outw(STATS_DISABLE, ioaddr + EL3_CMD); /* Disable the receiver and transmitter. */ - outw(RxDisable, ioaddr + EL3_CMD); - outw(TxDisable, ioaddr + EL3_CMD); + outw(RX_DISABLE, ioaddr + EL3_CMD); + outw(TX_DISABLE, ioaddr + EL3_CMD); - if (dev->if_port == 3) + if (dev->if_port == 3) { /* Turn off thinnet power. Green! */ - outw(StopCoax, ioaddr + EL3_CMD); - else if (dev->if_port == 0) { - /* Disable link beat and jabber, if_port may change here next open(). */ + outw(STOP_COAX, ioaddr + EL3_CMD); + } else if (dev->if_port == 0) { + /* Disable link beat and jabber, if_port may change here next + * open(). + */ EL3WINDOW(4); outw(inw(ioaddr + WN4_MEDIA) & ~MEDIA_TP, ioaddr + WN4_MEDIA); } - outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD); + outw(SET_INTR_ENB | 0x0000, ioaddr + EL3_CMD); update_stats(dev); } -static void -el3_up(struct net_device *dev) +static void el3_up(struct net_device *dev) { - int i, sw_info, net_diag; int ioaddr = dev->base_addr; + int i, sw_info, net_diag; - /* Activating the board required and does no harm otherwise */ + /* Activating the board required and does no harm otherwise. */ outw(0x0001, ioaddr + 4); /* Set the IRQ line. */ @@ -1239,51 +1315,67 @@ el3_up(struct net_device *dev) for (i = 0; i < 6; i++) outb(dev->dev_addr[i], ioaddr + i); - if ((dev->if_port & 0x03) == 3) /* BNC interface */ - /* Start the thinnet transceiver. We should really wait 50ms...*/ - outw(StartCoax, ioaddr + EL3_CMD); - else if ((dev->if_port & 0x03) == 0) { /* 10baseT interface */ - /* Combine secondary sw_info word (the adapter level) and primary - sw_info word (duplex setting plus other useless bits) */ + if ((dev->if_port & 0x03) == 3) { + /* BNC interface */ + + /* Start the thinnet transceiver. We should really wait + * 50ms... + */ + outw(START_COAX, ioaddr + EL3_CMD); + } else if ((dev->if_port & 0x03) == 0) { + /* 10baseT interface */ + + /* Combine secondary sw_info word (the adapter level) and + * primary sw_info word (duplex setting plus other useless + * bits). + */ EL3WINDOW(0); sw_info = (read_eeprom(ioaddr, 0x14) & 0x400f) | - (read_eeprom(ioaddr, 0x0d) & 0xBff0); + (read_eeprom(ioaddr, 0x0d) & 0xBff0); EL3WINDOW(4); net_diag = inw(ioaddr + WN4_NETDIAG); - net_diag = (net_diag | FD_ENABLE); /* temporarily assume full-duplex will be set */ + /* Temporarily assume full-duplex will be set. */ + net_diag = (net_diag | FD_ENABLE); pr_info("%s: ", dev->name); switch (dev->if_port & 0x0c) { - case 12: - /* force full-duplex mode if 3c5x9b */ - if (sw_info & 0x000f) { - pr_cont("Forcing 3c5x9b full-duplex mode"); - break; - } - fallthrough; - case 8: - /* set full-duplex mode based on eeprom config setting */ - if ((sw_info & 0x000f) && (sw_info & 0x8000)) { - pr_cont("Setting 3c5x9b full-duplex mode (from EEPROM configuration bit)"); - break; - } - fallthrough; - default: - /* xcvr=(0 || 4) OR user has an old 3c5x9 non "B" model */ - pr_cont("Setting 3c5x9/3c5x9B half-duplex mode"); - net_diag = (net_diag & ~FD_ENABLE); /* disable full duplex */ + case 12: + /* Force full-duplex mode if 3c5x9b. */ + if (sw_info & 0x000f) { + pr_cont("Forcing 3c5x9b full-duplex mode"); + break; + } + fallthrough; + case 8: + /* Set full-duplex mode based on eeprom config + * setting. + */ + if ((sw_info & 0x000f) && (sw_info & 0x8000)) { + pr_cont("Setting 3c5x9b full-duplex mode (from EEPROM configuration bit)"); + break; + } + fallthrough; + default: + /* xcvr = (0 || 4) OR user has an old 3c5x9 non "B" + * model. + */ + pr_cont("Setting 3c5x9/3c5x9B half-duplex mode"); + /* Disable full duplex. */ + net_diag = (net_diag & ~FD_ENABLE); } outw(net_diag, ioaddr + WN4_NETDIAG); - pr_cont(" if_port: %d, sw_info: %4.4x\n", dev->if_port, sw_info); + pr_cont(" if_port: %d, sw_info: %4.4x\n", + dev->if_port, sw_info); if (el3_debug > 3) - pr_debug("%s: 3c5x9 net diag word is now: %4.4x.\n", dev->name, net_diag); + pr_debug("%s: 3c5x9 net diag word is now: %4.4x.\n", + dev->name, net_diag); /* Enable link beat and jabber check. */ outw(inw(ioaddr + WN4_MEDIA) | MEDIA_TP, ioaddr + WN4_MEDIA); } /* Switch to the stats window, and clear all stats by reading. */ - outw(StatsDisable, ioaddr + EL3_CMD); + outw(STATS_DISABLE, ioaddr + EL3_CMD); EL3WINDOW(6); for (i = 0; i < 9; i++) inb(ioaddr + i); @@ -1294,18 +1386,22 @@ el3_up(struct net_device *dev) EL3WINDOW(1); /* Accept b-case and phys addr only. */ - outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); - outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */ - - outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */ - outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */ + outw(SET_RX_FILTER | RX_STATION | RX_BROADCAST, ioaddr + EL3_CMD); + /* Turn on statistics. */ + outw(STATS_ENABLE, ioaddr + EL3_CMD); + + /* Enable the receiver. */ + outw(RX_ENABLE, ioaddr + EL3_CMD); + /* Enable transmitter. */ + outw(TX_ENABLE, ioaddr + EL3_CMD); /* Allow status bits to be seen. */ - outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD); + outw(SET_STATUS_ENB | 0xff, ioaddr + EL3_CMD); /* Ack all pending events, and set active indicator mask. */ - outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq, - ioaddr + EL3_CMD); - outw(SetIntrEnb | IntLatch|TxAvailable|TxComplete|RxComplete|StatsFull, - ioaddr + EL3_CMD); + outw(ACK_INTR | INT_LATCH | TX_AVAILABLE | RX_EARLY | INT_REQ, + ioaddr + EL3_CMD); + outw((SET_INTR_ENB | INT_LATCH | TX_AVAILABLE | TX_COMPLETE | + RX_COMPLETE | STATS_FULL), + ioaddr + EL3_CMD); netif_start_queue(dev); } @@ -1313,12 +1409,11 @@ el3_up(struct net_device *dev) /* Power Management support functions */ #ifdef CONFIG_PM -static int -el3_suspend(struct device *pdev, pm_message_t state) +static int el3_suspend(struct device *pdev, pm_message_t state) { - unsigned long flags; struct net_device *dev; struct el3_private *lp; + unsigned long flags; int ioaddr; dev = dev_get_drvdata(pdev); @@ -1331,18 +1426,17 @@ el3_suspend(struct device *pdev, pm_message_t state) netif_device_detach(dev); el3_down(dev); - outw(PowerDown, ioaddr + EL3_CMD); + outw(POWER_DOWN, ioaddr + EL3_CMD); spin_unlock_irqrestore(&lp->lock, flags); return 0; } -static int -el3_resume(struct device *pdev) +static int el3_resume(struct device *pdev) { - unsigned long flags; struct net_device *dev; struct el3_private *lp; + unsigned long flags; int ioaddr; dev = dev_get_drvdata(pdev); @@ -1351,7 +1445,7 @@ el3_resume(struct device *pdev) spin_lock_irqsave(&lp->lock, flags); - outw(PowerUp, ioaddr + EL3_CMD); + outw(POWER_UP, ioaddr + EL3_CMD); EL3WINDOW(0); el3_up(dev); @@ -1364,7 +1458,7 @@ el3_resume(struct device *pdev) #endif /* CONFIG_PM */ -module_param(debug,int, 0); +module_param(debug, int, 0); module_param_hw_array(irq, int, irq, NULL, 0); module_param(max_interrupt_work, int, 0); MODULE_PARM_DESC(debug, "debug level (0-6)"); @@ -1393,15 +1487,14 @@ static int __init el3_init_module(void) #endif /* Select an open I/O location at 0x1*0 to do ISA contention select. */ /* Start with 0x110 to avoid some sound cards.*/ - for (id_port = 0x110 ; id_port < 0x200; id_port += 0x10) { + for (id_port = 0x110; id_port < 0x200; id_port += 0x10) { if (!request_region(id_port, 1, "3c509-control")) continue; outb(0x00, id_port); outb(0xff, id_port); if (inb(id_port) & 0x01) break; - else - release_region(id_port, 1); + release_region(id_port, 1); } if (id_port >= 0x200) { id_port = 0; @@ -1446,5 +1539,5 @@ static void __exit el3_cleanup_module(void) #endif } -module_init (el3_init_module); -module_exit (el3_cleanup_module); +module_init(el3_init_module); +module_exit(el3_cleanup_module); -- cgit v1.2.3 From 8f0f5c4fb9df0e19a341e0c6ed8dc4fda9124f03 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 21 May 2026 13:49:14 +0900 Subject: tracing: Do not call map->ops->elt_free() if elt_alloc() fails In paths where tracing_map_elt_alloc() failed to allocate objects, the map->ops->elt_alloc() call was never successful. In this case, map->ops->elt_free() should not be called. Link: https://sashiko.dev/#/patchset/20260520223101.34710-1-rosenp%40gmail.com Cc: stable@vger.kernel.org Cc: Tom Zanussi Cc: Mathieu Desnoyers Cc: Rosen Penev Reported-by: Sashiko Fixes: 2734b629525a ("tracing: Add per-element variable support to tracing_map") Link: https://patch.msgid.link/177933895460.108746.5396070821443932634.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt --- kernel/trace/tracing_map.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index bf1a507695b6..0dd7927df22a 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -386,13 +386,11 @@ static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) } } -static void tracing_map_elt_free(struct tracing_map_elt *elt) +static void __tracing_map_elt_free(struct tracing_map_elt *elt) { if (!elt) return; - if (elt->map->ops && elt->map->ops->elt_free) - elt->map->ops->elt_free(elt); kfree(elt->fields); kfree(elt->vars); kfree(elt->var_set); @@ -400,6 +398,17 @@ static void tracing_map_elt_free(struct tracing_map_elt *elt) kfree(elt); } +static void tracing_map_elt_free(struct tracing_map_elt *elt) +{ + if (!elt) + return; + + /* Only objects initialized with alloc_elt() should be passed to free_elt().*/ + if (elt->map->ops && elt->map->ops->elt_free) + elt->map->ops->elt_free(elt); + __tracing_map_elt_free(elt); +} + static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) { struct tracing_map_elt *elt; @@ -444,7 +453,7 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) } return elt; free: - tracing_map_elt_free(elt); + __tracing_map_elt_free(elt); return ERR_PTR(err); } -- cgit v1.2.3 From 9a1730245e416d11ad5c0f2c100061d61cc43f60 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 20 May 2026 20:43:20 +0200 Subject: net: bcmgenet: keep RBUF EEE/PM disabled Setting RBUF_EEE_EN | RBUF_PM_EN in RBUF_ENERGY_CTRL breaks the RX path on GENET hardware once MAC EEE becomes active. RX traffic stops flowing while the link stays up and the usual descriptor/RX error counters remain quiet. In that state the MAC still accepts frames (rbuf_ovflow_cnt keeps climbing) but RBUF no longer forwards them to DMA, so rx_packets is no longer incremented at the netdev level. On some boards the corruption ends up as a paging fault in skb_release_data via bcmgenet_rx_poll on an LPI exit. Reproduced on Pi 4B (BCM2711 + BCM54213PE) and confirmed by Florian Fainelli on an internal Broadcom 4908-family board with the same crash signature. RBUF_PM_EN is not publicly documented. This shows up more often now that phy_support_eee() enables EEE by default, but it also affects older kernels as soon as TX LPI is turned on via ethtool, so it is not specific to recent changes. Always clear RBUF_EEE_EN | RBUF_PM_EN in bcmgenet_eee_enable_set so the bits stay off across resets. UMAC and TBUF setup is left alone so TX-side EEE keeps working. Link: https://github.com/raspberrypi/linux/issues/7304 Fixes: 6ef398ea60d9 ("net: bcmgenet: add EEE support") Cc: stable@vger.kernel.org Signed-off-by: Nicolai Buchwitz Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20260520184320.652053-1-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 54f71b1e85fc..7c11cf916762 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1368,13 +1368,12 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) reg &= ~(TBUF_EEE_EN | TBUF_PM_EN); bcmgenet_writel(reg, priv->base + off); - /* Do the same for thing for RBUF */ + /* RBUF EEE/PM can break the RX path on GENET. Keep it disabled. */ reg = bcmgenet_rbuf_readl(priv, RBUF_ENERGY_CTRL); - if (enable) - reg |= RBUF_EEE_EN | RBUF_PM_EN; - else + if (reg & (RBUF_EEE_EN | RBUF_PM_EN)) { reg &= ~(RBUF_EEE_EN | RBUF_PM_EN); - bcmgenet_rbuf_writel(priv, reg, RBUF_ENERGY_CTRL); + bcmgenet_rbuf_writel(priv, reg, RBUF_ENERGY_CTRL); + } if (!enable && priv->clk_eee_enabled) { clk_disable_unprepare(priv->clk_eee); -- cgit v1.2.3 From 979c017803c40829b03acd9e5236e354b7622360 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 18 May 2026 14:34:47 -0400 Subject: l2tp: use list_del_rcu in l2tp_session_unhash An unprivileged local user can pin a host CPU indefinitely in l2tp_session_get_by_ifname() by issuing L2TP_CMD_SESSION_GET on L2TP_ATTR_IFNAME concurrently with L2TP_CMD_SESSION_CREATE and L2TP_CMD_SESSION_DELETE on the same tunnel. All three commands take GENL_UNS_ADMIN_PERM, so CAP_NET_ADMIN in the netns user namespace suffices; on any host that has l2tp_core loaded the trigger is reachable from a standard `unshare -Urn` sandbox. l2tp_session_unhash() removes a session from tunnel->session_list with list_del_init(), but that list is walked by l2tp_session_get_by_ifname() with list_for_each_entry_rcu() under rcu_read_lock_bh(). list_del_init() leaves the deleted entry's next/prev self-pointing; a reader that has loaded the entry and then advances pos->list.next reads &session->list, container_of()s back to the same session, and list_for_each_entry_rcu() never reaches the list head. The CPU stays in strcmp() inside the walker, with BH and preemption disabled, so RCU grace periods on the host stall behind it and the wedged thread cannot be killed (SIGKILL is delivered on syscall return). Use list_del_rcu() to match the existing list_add_rcu() in l2tp_session_register(); the deleted session remains visible to in-flight walkers with consistent next/prev pointers until kfree_rcu() in l2tp_session_free() releases it. tunnel->session_list has exactly one list_del_init() call site; the list_del_init (&session->clist) at l2tp_core.c:533 operates on the per-collision list, which is not walked under RCU. list_empty(&session->list) is not used anywhere in net/l2tp/ after the unhash point, so dropping the post-delete self-init is safe; the fix has no userspace-visible behavior change. Fixes: 89b768ec2dfef ("l2tp: use rcu list add/del when updating lists") Cc: stable@vger.kernel.org # 6.11+ Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260518183447.64078-1-michael.bommarito@gmail.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 157fc23ce4e1..1455f67e01dd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1360,7 +1360,7 @@ static void l2tp_session_unhash(struct l2tp_session *session) spin_lock_bh(&pn->l2tp_session_idr_lock); /* Remove from the per-tunnel list */ - list_del_init(&session->list); + list_del_rcu(&session->list); /* Remove from per-net IDR */ if (tunnel->version == L2TP_HDR_VER_3) { -- cgit v1.2.3 From bdd39576bf50a50bdafe3da968fd271bc674a48f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 May 2026 11:42:07 +0000 Subject: net: bridge: prevent too big nested attributes in br_fill_linkxstats() After commit ff205bf8c554 ("netlink: add one debug check in nla_nest_end()") syzbot found that br_fill_linkxstats() can send corrupted netlink packets. Make sure the nested attribute size is bounded. Fixes: a60c090361ea ("bridge: netlink: export per-vlan stats") Reported-by: syzbot+a35f9259d08f907c06e6@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6a0b0da3.050a0220.175f0c.0000.GAE@google.com/ Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260520114207.1394241-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/bridge/br_netlink.c | 10 ++++++++++ net/core/rtnetlink.c | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6fd5386a1d64..c04a4d0889ae 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1824,6 +1824,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, int *prividx, int attr) { + unsigned int limit = U16_MAX - nla_total_size(0); struct nlattr *nla __maybe_unused; struct net_bridge_port *p = NULL; struct net_bridge_vlan_group *vg; @@ -1841,6 +1842,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, p = br_port_get_rtnl(dev); if (!p) return 0; + limit -= nla_total_size_64bit(sizeof(p->stp_xstats)); br = p->br; vg = nbp_vlan_group(p); break; @@ -1855,6 +1857,9 @@ static int br_fill_linkxstats(struct sk_buff *skb, if (vg) { u16 pvid; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + limit -= nla_total_size_64bit(sizeof(struct br_mcast_stats)); +#endif pvid = br_get_pvid(vg); list_for_each_entry(v, &vg->vlan_list, vlist) { struct bridge_vlan_xstats vxi; @@ -1862,6 +1867,11 @@ static int br_fill_linkxstats(struct sk_buff *skb, if (++vl_idx < *prividx) continue; + + if (skb_tail_pointer(skb) - (unsigned char *)nest + + nla_total_size(sizeof(vxi)) >= limit) + goto nla_put_failure; + memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; vxi.flags = v->flags; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index df042da422ef..511c25bf6f2a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6328,8 +6328,9 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, &filters, &idxattr, &prividx, extack); if (err < 0) { - /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ - WARN_ON(err == -EMSGSIZE); + /* -EMSGSIZE implies BUG in if_nlmsg_stats_size + * or a too big nested attribute. + */ kfree_skb(nskb); } else { err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); -- cgit v1.2.3 From 8c84c5ec4aaff6ad7aac49935e050fed6b360a28 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:13 +0800 Subject: net: enetc: fix incorrect mailbox message status returned to VFs There are two cases where VFs receive an incorrect success status from the PF mailbox message handler, misleading them into believing their requests have been fulfilled: In enetc_msg_handle_rxmsg(), *status is pre-initialized to ENETC_MSG_CMD_STATUS_OK. When an unsupported command type is received, the default case only logs an error without updating *status, so it remains as ENETC_MSG_CMD_STATUS_OK. In enetc_msg_pf_set_vf_primary_mac_addr(), when the PF has already assigned a MAC address for the VF (ENETC_VF_FLAG_PF_SET_MAC is set), the function rejects the request but returns ENETC_MSG_CMD_STATUS_OK instead of ENETC_MSG_CMD_STATUS_FAIL. Therefore, correct the status value for the two cases mentioned above. Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index a12fd54a475f..27d4bb65e017 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -493,11 +493,13 @@ static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, return ENETC_MSG_CMD_STATUS_FAIL; addr = cmd->mac.sa_data; - if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) + if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) { dev_warn(dev, "Attempt to override PF set mac addr for VF%d\n", vf_id); - else - enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr); + return ENETC_MSG_CMD_STATUS_FAIL; + } + + enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr); return ENETC_MSG_CMD_STATUS_OK; } @@ -509,7 +511,6 @@ void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status) struct enetc_msg_cmd_header *cmd_hdr; u16 cmd_type; - *status = ENETC_MSG_CMD_STATUS_OK; cmd_hdr = (struct enetc_msg_cmd_header *)msg->vaddr; cmd_type = cmd_hdr->type; @@ -518,6 +519,7 @@ void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status) *status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id); break; default: + *status = ENETC_MSG_CMD_STATUS_FAIL; dev_err(dev, "command not supported (cmd_type: 0x%x)\n", cmd_type); } -- cgit v1.2.3 From 5027266dea471e140f93dd534845c9c4f43219a3 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:14 +0800 Subject: net: enetc: fix missing error code when pf->vf_state allocation fails In enetc_pf_probe(), when the memory allocation for pf->vf_state fails, the code jumps to the error handling label but the variable 'err' is not assigned an appropriate error code beforehand. This causes the function to return 0 (success) on an allocation failure path, misleading the caller into thinking the probe succeeded. So set err to -ENOMEM before jumping to the error handling label when the allocation for pf->vf_state returns NULL. Fixes: e15c5506dd39 ("net: enetc: allocate vf_state during PF probes") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 27d4bb65e017..b743b6d33ccc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -962,8 +962,10 @@ static int enetc_pf_probe(struct pci_dev *pdev, if (pf->total_vfs) { pf->vf_state = kzalloc_objs(struct enetc_vf_state, pf->total_vfs); - if (!pf->vf_state) + if (!pf->vf_state) { + err = -ENOMEM; goto err_alloc_vf_state; + } } err = enetc_setup_mac_addresses(node, pf); -- cgit v1.2.3 From 4a995d37b537f437daa01752d39cf44c6ba9ee2c Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:15 +0800 Subject: net: enetc: add ratelimiting to VF mailbox error messages Sashiko reported that a buggy or malicious guest VM can flood the host kernel log by repeatedly sending VF-to-PF messages at a high rate, degrading host performance and hiding important system logs [1]. Fix by replacing dev_err()/dev_warn() with dev_err_ratelimited(), limiting output to the default kernel ratelimit. This ensures errors are still logged for debugging while preventing log flooding attacks. Link: https://sashiko.dev/#/patchset/20260511080805.2052495-1-wei.fang%40nxp.com #1 Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index b743b6d33ccc..dea3a92c4722 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -494,8 +494,9 @@ static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, addr = cmd->mac.sa_data; if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) { - dev_warn(dev, "Attempt to override PF set mac addr for VF%d\n", - vf_id); + dev_err_ratelimited(dev, + "VF%d attempted to override PF set MAC\n", + vf_id); return ENETC_MSG_CMD_STATUS_FAIL; } @@ -520,8 +521,9 @@ void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status) break; default: *status = ENETC_MSG_CMD_STATUS_FAIL; - dev_err(dev, "command not supported (cmd_type: 0x%x)\n", - cmd_type); + dev_err_ratelimited(dev, + "command not supported (cmd_type: 0x%x)\n", + cmd_type); } } -- cgit v1.2.3 From c666fa632fe628c34904bcd59aeb96bf08e40d31 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:16 +0800 Subject: net: enetc: fix TOCTOU race and validate VF MAC address Sashiko reported that the PF driver accepts arbitrary MAC address from from VF mailbox messages without proper validation, creating a security vulnerability [1]. In enetc_msg_pf_set_vf_primary_mac_addr(), the MAC address is extracted directly from the message buffer (cmd->mac.sa_data) and programmed into hardware via pf->ops->set_si_primary_mac() without any validity checks. A malicious VF can configure a multicast, broadcast, or all-zero MAC address. Therefore, a validation to check the MAC address provided by VF is required. However, simply checking the MAC address is not enough, because it also has the potential TOCTOU race [2]: The code reads the MAC address from the DMA buffer to validate it via is_valid_ether_addr(), if validation passes, reads the same DMA buffer a second time when calling enetc_pf_set_primary_mac_addr() to program the hardware. A malicious VF can exploit this window by overwriting the MAC address in the DMA buffer between the validation check and the hardware programming, bypassing the validation entirely. Therefore, allocate a local buffer in enetc_msg_handle_rxmsg() and copy the message content from the DMA buffer via memcpy() before processing. This ensures the PF operates on a stable snapshot that the VF cannot modify. Link: https://sashiko.dev/#/patchset/20260511080805.2052495-1-wei.fang%40nxp.com #1 Link: https://sashiko.dev/#/patchset/20260513103021.2190593-1-wei.fang%40nxp.com #2 Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-5-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 39 +++++++++++++++++++------ 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index dea3a92c4722..09c642040892 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -478,21 +478,24 @@ static void enetc_configure_port(struct enetc_pf *pf) /* Messaging */ static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, - int vf_id) + int vf_id, void *msg) { struct enetc_vf_state *vf_state = &pf->vf_state[vf_id]; - struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id]; - struct enetc_msg_cmd_set_primary_mac *cmd; + struct enetc_msg_cmd_set_primary_mac *cmd = msg; struct device *dev = &pf->si->pdev->dev; - u16 cmd_id; + u16 cmd_id = cmd->header.id; char *addr; - cmd = (struct enetc_msg_cmd_set_primary_mac *)msg->vaddr; - cmd_id = cmd->header.id; if (cmd_id != ENETC_MSG_CMD_MNG_ADD) return ENETC_MSG_CMD_STATUS_FAIL; addr = cmd->mac.sa_data; + if (!is_valid_ether_addr(addr)) { + dev_err_ratelimited(dev, "VF%d attempted to set invalid MAC\n", + vf_id); + return ENETC_MSG_CMD_STATUS_FAIL; + } + if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) { dev_err_ratelimited(dev, "VF%d attempted to override PF set MAC\n", @@ -507,17 +510,33 @@ static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status) { - struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id]; + struct enetc_msg_swbd *msg_swbd = &pf->rxmsg[vf_id]; struct device *dev = &pf->si->pdev->dev; struct enetc_msg_cmd_header *cmd_hdr; u16 cmd_type; + u8 *msg; - cmd_hdr = (struct enetc_msg_cmd_header *)msg->vaddr; + msg = kzalloc_objs(*msg, msg_swbd->size); + if (!msg) { + dev_err_ratelimited(dev, + "Failed to allocate message buffer\n"); + *status = ENETC_MSG_CMD_STATUS_FAIL; + return; + } + + /* Currently, only ENETC_MSG_CMD_MNG_MAC command is supported, so + * only sizeof(struct enetc_msg_cmd_set_primary_mac) bytes need to + * be copied. This data already includes the cmd_type field, so it + * can correctly return an error code. + */ + memcpy(msg, msg_swbd->vaddr, + sizeof(struct enetc_msg_cmd_set_primary_mac)); + cmd_hdr = (struct enetc_msg_cmd_header *)msg; cmd_type = cmd_hdr->type; switch (cmd_type) { case ENETC_MSG_CMD_MNG_MAC: - *status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id); + *status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id, msg); break; default: *status = ENETC_MSG_CMD_STATUS_FAIL; @@ -525,6 +544,8 @@ void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status) "command not supported (cmd_type: 0x%x)\n", cmd_type); } + + kfree(msg); } #ifdef CONFIG_PCI_IOV -- cgit v1.2.3 From f262f5d893327a7131ed25ac8dd01ed7024bcc18 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:17 +0800 Subject: net: enetc: fix race condition in VF MAC address configuration Sashiko reported a potential race condition between the VF message handler and administrative VF MAC configuration from the host [1]. The VF message handler (enetc_msg_pf_set_vf_primary_mac_addr) runs asynchronously in a workqueue context and accesses vf_state->flags without any locking. Concurrently, the host can administratively change the VF MAC address via enetc_pf_set_vf_mac(), which executes under RTNL lock and modifies both vf_state->flags and hardware registers. This creates two race windows: 1) TOCTOU race on vf_state->flags: The check of ENETC_VF_FLAG_PF_SET_MAC and subsequent MAC programming are not atomic, allowing the flag state to change between check and use. 2) Torn MAC address writes: Hardware MAC programming requires multiple non-atomic register writes (__raw_writel for lower 32 bits and __raw_writew for upper 16 bits). Concurrent updates from VF mailbox and PF admin paths can interleave these operations, resulting in a corrupted MAC address being programmed into the hardware. Fix by introducing a per-VF mutex to serialize access to vf_state and hardware MAC register updates. Both enetc_pf_set_vf_mac() and enetc_msg_pf_set_vf_primary_mac_addr() now acquire this lock before accessing vf_state->flags or programming the MAC address, ensuring atomic read-modify-write sequences and preventing register write interleaving. Link: https://sashiko.dev/#/patchset/20260511080805.2052495-1-wei.fang%40nxp.com #1 Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-6-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 10 ++++++++++ drivers/net/ethernet/freescale/enetc/enetc_pf.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 09c642040892..8e11a023d516 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -252,8 +252,12 @@ static int enetc_pf_set_vf_mac(struct net_device *ndev, int vf, u8 *mac) return -EADDRNOTAVAIL; vf_state = &pf->vf_state[vf]; + + mutex_lock(&vf_state->lock); vf_state->flags |= ENETC_VF_FLAG_PF_SET_MAC; enetc_pf_set_primary_mac_addr(&priv->si->hw, vf + 1, mac); + mutex_unlock(&vf_state->lock); + return 0; } @@ -496,7 +500,9 @@ static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, return ENETC_MSG_CMD_STATUS_FAIL; } + mutex_lock(&vf_state->lock); if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) { + mutex_unlock(&vf_state->lock); dev_err_ratelimited(dev, "VF%d attempted to override PF set MAC\n", vf_id); @@ -504,6 +510,7 @@ static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, } enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr); + mutex_unlock(&vf_state->lock); return ENETC_MSG_CMD_STATUS_OK; } @@ -989,6 +996,9 @@ static int enetc_pf_probe(struct pci_dev *pdev, err = -ENOMEM; goto err_alloc_vf_state; } + + for (int i = 0; i < pf->total_vfs; i++) + mutex_init(&pf->vf_state[i].lock); } err = enetc_setup_mac_addresses(node, pf); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h index ae407e9e9ee7..35d484858c7b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h @@ -14,6 +14,7 @@ enum enetc_vf_flags { }; struct enetc_vf_state { + struct mutex lock; /* Prevent concurrent access */ enum enetc_vf_flags flags; }; -- cgit v1.2.3 From adb4599979cd00d5d426f26cf78b65264217e35b Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:18 +0800 Subject: net: enetc: fix DMA write to freed memory in enetc_msg_free_mbx() The teardown sequence in enetc_msg_psi_free() frees the DMA buffer before clearing the device's DMA address registers. If a VF sends a message or a pending DMA transfer completes within this window, the hardware will perform a DMA write into the kernel memory that has already been returned to the allocator. The result is silent memory corruption that can affect arbitrary kernel data structures. Therefore, clear the DMA address registers before the DMA buffer is freed. Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-7-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_msg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_msg.c b/drivers/net/ethernet/freescale/enetc/enetc_msg.c index 40d22ebe9224..b4d7457097e6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_msg.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_msg.c @@ -96,12 +96,12 @@ static void enetc_msg_free_mbx(struct enetc_si *si, int idx) struct enetc_hw *hw = &si->hw; struct enetc_msg_swbd *msg; + enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), 0); + enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), 0); + msg = &pf->rxmsg[idx]; dma_free_coherent(&si->pdev->dev, msg->size, msg->vaddr, msg->dma); memset(msg, 0, sizeof(*msg)); - - enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), 0); - enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), 0); } int enetc_msg_psi_init(struct enetc_pf *pf) -- cgit v1.2.3 From f8ae63de2a872fa3b68c287c35379f6d73d38a5d Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:19 +0800 Subject: net: enetc: fix unbounded loop and interrupt handling in VF-to-PF messaging The enetc_msg_task() function has several issues that need to be addressed: 1. Unbounded loop causing potential DoS: enetc_msg_task() processes VF-to-PF mailbox messages in an unbounded for(;;) loop that keeps polling ENETC_PSIMSGRR until no MR bits are set. A malicious guest VM can exploit this by continuously sending messages at a high rate - immediately sending a new message as soon as the PF acknowledges the previous one. Since the worker thread never yields or enforces a processing budget, the mr_mask check frequently evaluates to non-zero, causing the PF to spin indefinitely and starving other tasks. Fix this by replacing the unbounded loop with a single snapshot read at task entry. The task processes only the VFs whose MR bits were set at that point, then re-enables message interrupts before returning. This bounds work per invocation to at most num_vfs iterations. No messages are lost because the message interrupt is disabled in enetc_msg_psi_msix() before scheduling enetc_msg_task(), so any new messages arriving during processing will trigger a fresh interrupt once re-enabled, scheduling another task invocation. 2. Write order of ENETC_PSIIDR and ENETC_PSIMSGRR: Both ENETC_PSIIDR and ENETC_PSIMSGRR contain MR bits indicating messages have been received from VSIs, but only ENETC_PSIIDR trigger the CPU interrupt. Previously, ENETC_PSIMSGRR was written before ENETC_PSIIDR. Writing ENETC_PSIMSGRR returns the message code to the VSI in its upper 16 bits, signaling to the VF that message processing is complete and it may send the next message. If the VF sends a new message before ENETC_PSIIDR is written, the subsequent w1c write to ENETC_PSIIDR would inadvertently clear the MR bit set by the new message, causing the interrupt to be lost and the new message to go unprocessed. Therefore, write ENETC_PSIIDR first to clear the interrupt source, then write ENETC_PSIMSGRR to acknowledge the message to the VSI. 3. Check both ENETC_PSIMSGRR and ENETC_PSIIDR for mr_status: The write order change above introduces a potential race: if a VF sends a new message in the window between the ENETC_PSIIDR w1c and the ENETC_PSIMSGRR w1c, the ENETC_PSIMSGRR MR bit for the new message may not be set. If mr_status was derived solely from ENETC_PSIMSGRR, this message would never be detected despite ENETC_PSIIDR retaining its MR bit, leading to an unacknowledged interrupt storm. Fix this by computing mr_status as the union of both ENETC_PSIMSGRR and ENETC_PSIIDR MR bits, ensuring all pending messages are detected regardless of which register reflects the new message state. Additionally, rename the per-register MR macros (ENETC_PSI*_MR_MASK, ENETC_PSI*_MR) to register-agnostic names (ENETC_PSIMR_MASK, ENETC_PSIMR_BIT) since the MR bit layout is shared across ENETC_PSIMSGRR, ENETC_PSIIER, and ENETC_PSIIDR. Make the mask macro dynamic based on the actual number of active VFs rather than hardcoded. Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260520064421.91569-8-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_hw.h | 15 ++++-- drivers/net/ethernet/freescale/enetc/enetc_msg.c | 65 ++++++++++++++---------- 2 files changed, 49 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 662e4fbafb74..e58cc81d199d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -56,11 +56,21 @@ static inline u32 enetc_vsi_set_msize(u32 size) } #define ENETC_PSIMSGRR 0x204 -#define ENETC_PSIMSGRR_MR_MASK GENMASK(2, 1) -#define ENETC_PSIMSGRR_MR(n) BIT((n) + 1) /* n = VSI index */ #define ENETC_PSIVMSGRCVAR0(n) (0x210 + (n) * 0x8) /* n = VSI index */ #define ENETC_PSIVMSGRCVAR1(n) (0x214 + (n) * 0x8) +/* Message received mask, n is the active number of VSIs. + * It is available for ENETC_PSIMSGRR, ENETC_PSIIER, and + * ENETC_PSIIDR registers. + */ +#define ENETC_PSIMR_MASK(n) \ + ({ typeof(n) _n = (n); (_n) ? GENMASK((_n), 1) : 0; }) + +/* Message received bit, n is VSI index. It is available for + * ENETC_PSIMSGRR, ENETC_PSIIER, and ENETC_PSIIDR registers. + */ +#define ENETC_PSIMR_BIT(n) BIT((n) + 1) + #define ENETC_VSIMSGSR 0x204 /* RO */ #define ENETC_VSIMSGSR_MB BIT(0) #define ENETC_VSIMSGSR_MS BIT(1) @@ -94,7 +104,6 @@ static inline u32 enetc_vsi_set_msize(u32 size) #define ENETC_SICAPR1 0x904 #define ENETC_PSIIER 0xa00 -#define ENETC_PSIIER_MR_MASK GENMASK(2, 1) #define ENETC_PSIIDR 0xa08 #define ENETC_SITXIDR 0xa18 #define ENETC_SIRXIDR 0xa28 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_msg.c b/drivers/net/ethernet/freescale/enetc/enetc_msg.c index b4d7457097e6..3136e8321e4d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_msg.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_msg.c @@ -3,18 +3,25 @@ #include "enetc_pf.h" -static void enetc_msg_disable_mr_int(struct enetc_hw *hw) +static void enetc_msg_disable_mr_int(struct enetc_pf *pf) { - u32 psiier = enetc_rd(hw, ENETC_PSIIER); + struct enetc_hw *hw = &pf->si->hw; + u32 psiier; + + psiier = enetc_rd(hw, ENETC_PSIIER) & ~ENETC_PSIMR_MASK(pf->num_vfs); + /* disable MR int source(s) */ - enetc_wr(hw, ENETC_PSIIER, psiier & ~ENETC_PSIIER_MR_MASK); + enetc_wr(hw, ENETC_PSIIER, psiier); } -static void enetc_msg_enable_mr_int(struct enetc_hw *hw) +static void enetc_msg_enable_mr_int(struct enetc_pf *pf) { - u32 psiier = enetc_rd(hw, ENETC_PSIIER); + struct enetc_hw *hw = &pf->si->hw; + u32 psiier; - enetc_wr(hw, ENETC_PSIIER, psiier | ENETC_PSIIER_MR_MASK); + psiier = enetc_rd(hw, ENETC_PSIIER) | ENETC_PSIMR_MASK(pf->num_vfs); + + enetc_wr(hw, ENETC_PSIIER, psiier); } static irqreturn_t enetc_msg_psi_msix(int irq, void *data) @@ -22,7 +29,7 @@ static irqreturn_t enetc_msg_psi_msix(int irq, void *data) struct enetc_si *si = (struct enetc_si *)data; struct enetc_pf *pf = enetc_si_priv(si); - enetc_msg_disable_mr_int(&si->hw); + enetc_msg_disable_mr_int(pf); schedule_work(&pf->msg_task); return IRQ_HANDLED; @@ -31,33 +38,35 @@ static irqreturn_t enetc_msg_psi_msix(int irq, void *data) static void enetc_msg_task(struct work_struct *work) { struct enetc_pf *pf = container_of(work, struct enetc_pf, msg_task); + u32 mr_mask = ENETC_PSIMR_MASK(pf->num_vfs); struct enetc_hw *hw = &pf->si->hw; - unsigned long mr_mask; + u32 mr_status; int i; - for (;;) { - mr_mask = enetc_rd(hw, ENETC_PSIMSGRR) & ENETC_PSIMSGRR_MR_MASK; - if (!mr_mask) { - /* re-arm MR interrupts, w1c the IDR reg */ - enetc_wr(hw, ENETC_PSIIDR, ENETC_PSIIER_MR_MASK); - enetc_msg_enable_mr_int(hw); - return; - } + mr_status = (enetc_rd(hw, ENETC_PSIMSGRR) & mr_mask) | + (enetc_rd(hw, ENETC_PSIIDR) & mr_mask); + if (!mr_status) + goto out; + + for (i = 0; i < pf->num_vfs; i++) { + u32 psimsgrr; + u16 msg_code; - for (i = 0; i < pf->num_vfs; i++) { - u32 psimsgrr; - u16 msg_code; + if (!(ENETC_PSIMR_BIT(i) & mr_status)) + continue; - if (!(ENETC_PSIMSGRR_MR(i) & mr_mask)) - continue; + enetc_msg_handle_rxmsg(pf, i, &msg_code); - enetc_msg_handle_rxmsg(pf, i, &msg_code); + /* w1c to clear the corresponding VF MR bit */ + enetc_wr(hw, ENETC_PSIIDR, ENETC_PSIMR_BIT(i)); - psimsgrr = ENETC_SIMSGSR_SET_MC(msg_code); - psimsgrr |= ENETC_PSIMSGRR_MR(i); /* w1c */ - enetc_wr(hw, ENETC_PSIMSGRR, psimsgrr); - } + psimsgrr = ENETC_SIMSGSR_SET_MC(msg_code); + psimsgrr |= ENETC_PSIMR_BIT(i); /* w1c */ + enetc_wr(hw, ENETC_PSIMSGRR, psimsgrr); } + +out: + enetc_msg_enable_mr_int(pf); } /* Init */ @@ -133,7 +142,7 @@ int enetc_msg_psi_init(struct enetc_pf *pf) } /* enable MR interrupts */ - enetc_msg_enable_mr_int(&si->hw); + enetc_msg_enable_mr_int(pf); return 0; @@ -154,7 +163,7 @@ void enetc_msg_psi_free(struct enetc_pf *pf) cancel_work_sync(&pf->msg_task); /* disable MR interrupts */ - enetc_msg_disable_mr_int(&si->hw); + enetc_msg_disable_mr_int(pf); for (i = 0; i < pf->num_vfs; i++) enetc_msg_free_mbx(si, i); -- cgit v1.2.3 From 54362b0176080b905dbd0651ee3dbb295da41541 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:20 +0800 Subject: net: enetc: fix init and teardown order to prevent use of unsafe resources Sashiko reported a potential issue in enetc_msg_psi_init() where the IRQ handler is registered before DMA resources are fully initialized [1]. The current initialization sequence is: 1. request_irq(enetc_msg_psi_msix) <- IRQ handler registered 2. INIT_WORK(&pf->msg_task, ...) <- work_struct initialized 3. enetc_msg_alloc_mbx() <- mailbox DMA allocated This ordering is unsafe because if a spurious interrupt or pending interrupt from a previous device state fires immediately after request_irq() returns, the registered ISR enetc_msg_psi_msix() will execute and unconditionally call: schedule_work(&pf->msg_task) At this point, pf->msg_task has not been initialized by INIT_WORK(), so the work_struct contains garbage values in its internal linked list pointers (work_struct->entry). Passing an uninitialized work_struct to schedule_work() could corrupt the kernel's workqueue linked lists, potentially leading to: - Kernel panic in __queue_work() - Memory corruption in workqueue data structures - System deadlock or undefined behavior Additionally, even if the work_struct was initialized, the mailbox DMA buffers (pf->rxmsg[]) may not yet be allocated when the work handler enetc_msg_task() runs, resulting in NULL pointer dereference. Fix by reordering the initialization sequence to ensure all resources are properly initialized before the interrupt handler can execute: 1. enetc_msg_alloc_mbx() <- Allocate all mailboxes 2. INIT_WORK(&pf->msg_task, ...) <- Initialize work first 3. request_irq(enetc_msg_psi_msix) <- Register IRQ last 4. Configure hardware & enable MR interrupts This guarantees that when enetc_msg_psi_msix() runs: - pf->msg_task is properly initialized (safe for schedule_work) - pf->rxmsg[] buffers are allocated (safe for work handler access) - Hardware is configured appropriately As the inverse of enetc_msg_psi_init(), enetc_msg_psi_free() also has similar problems. For example, if a pending interrupt fires between enetc_msg_free_mbx() and free_irq(), the ISR enetc_msg_psi_msix() may schedule the work handler again via schedule_work(), which could then access already-freed DMA buffers (pf->rxmsg[]), leading to use-after-free and potential memory corruption. Therefore, the order of enetc_msg_psi_free() is adjusted: 1. enetc_msg_disable_mr_int() <- Stop new interrupts first 2. free_irq() <- Ensure no IRQ handler can run 3. cancel_work_sync() <- Wait for any pending work 4. enetc_msg_disable_mr_int() <- Re-disable in case work re-enabled it 5. enetc_msg_free_mbx() <- Safe to free DMA buffers now Link: https://sashiko.dev/#/patchset/20260511080805.2052495-1-wei.fang%40nxp.com #1 Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-9-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_msg.c | 35 ++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_msg.c b/drivers/net/ethernet/freescale/enetc/enetc_msg.c index 3136e8321e4d..c09635e7eb3d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_msg.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_msg.c @@ -118,6 +118,15 @@ int enetc_msg_psi_init(struct enetc_pf *pf) struct enetc_si *si = pf->si; int vector, i, err; + for (i = 0; i < pf->num_vfs; i++) { + err = enetc_msg_alloc_mbx(si, i); + if (err) + goto free_mbx; + } + + /* initialize PSI mailbox */ + INIT_WORK(&pf->msg_task, enetc_msg_task); + /* register message passing interrupt handler */ snprintf(pf->msg_int_name, sizeof(pf->msg_int_name), "%s-vfmsg", si->ndev->name); @@ -126,32 +135,21 @@ int enetc_msg_psi_init(struct enetc_pf *pf) if (err) { dev_err(&si->pdev->dev, "PSI messaging: request_irq() failed!\n"); - return err; + goto free_mbx; } /* set one IRQ entry for PSI message receive notification (SI int) */ enetc_wr(&si->hw, ENETC_SIMSIVR, ENETC_SI_INT_IDX); - /* initialize PSI mailbox */ - INIT_WORK(&pf->msg_task, enetc_msg_task); - - for (i = 0; i < pf->num_vfs; i++) { - err = enetc_msg_alloc_mbx(si, i); - if (err) - goto err_init_mbx; - } - /* enable MR interrupts */ enetc_msg_enable_mr_int(pf); return 0; -err_init_mbx: +free_mbx: for (i--; i >= 0; i--) enetc_msg_free_mbx(si, i); - free_irq(vector, si); - return err; } @@ -160,14 +158,17 @@ void enetc_msg_psi_free(struct enetc_pf *pf) struct enetc_si *si = pf->si; int i; + /* disable MR interrupts */ + enetc_msg_disable_mr_int(pf); + + /* de-register message passing interrupt handler */ + free_irq(pci_irq_vector(si->pdev, ENETC_SI_INT_IDX), si); + cancel_work_sync(&pf->msg_task); - /* disable MR interrupts */ + /* MR interrupts may be re-enabled by workqueue */ enetc_msg_disable_mr_int(pf); for (i = 0; i < pf->num_vfs; i++) enetc_msg_free_mbx(si, i); - - /* de-register message passing interrupt handler */ - free_irq(pci_irq_vector(si->pdev, ENETC_SI_INT_IDX), si); } -- cgit v1.2.3 From 9e68817f12d5935dbf73f2fe6e6299644f6de1b6 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 20 May 2026 14:44:21 +0800 Subject: net: enetc: avoid VF->PF mailbox timeout during SR-IOV teardown During SR-IOV teardown, enetc_msg_psi_free() disables the MR interrupt before pci_disable_sriov() removes the VFs. If a VF sends a mailbox message during this window, the PF cannot receive it, causing the VF to timeout waiting for a reply. Since the timeout occurs during SR-IOV teardown when the VF is about to be removed anyway, it has no functional impact on operation. However, more messages will be added in the future, some visible error logs may confuse users. So fix it by calling pci_disable_sriov() first to remove all VFs, then safely clean up the mailbox resources. This eliminates the race window where VFs could send messages to an unresponsive PF. Fixes: beb74ac878c8 ("enetc: Add vf to pf messaging support") Signed-off-by: Wei Fang Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260520064421.91569-10-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 8e11a023d516..3206b3daa1a0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -563,9 +563,9 @@ static int enetc_sriov_configure(struct pci_dev *pdev, int num_vfs) int err; if (!num_vfs) { + pci_disable_sriov(pdev); enetc_msg_psi_free(pf); pf->num_vfs = 0; - pci_disable_sriov(pdev); } else { pf->num_vfs = num_vfs; -- cgit v1.2.3 From d1ebfce2c1d161186a82e77590bf7da2ea1bce91 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 17 May 2026 20:11:50 -0400 Subject: smb: client: require net admin for CIFS SWN netlink CIFS_GENL_CMD_SWN_NOTIFY is the userspace witness-notify command. The intended sender is the cifs.witness helper, but the generic-netlink operation currently has no capability flag, so any local process can send RESOURCE_CHANGE or CLIENT_MOVE notifications to the in-kernel witness handler. The same family exposes CIFS_GENL_MCGRP_SWN without multicast-group capability flags. Register messages sent to that group include the witness registration id and, for NTLM-authenticated mounts, the username, domain, and password attributes copied from the CIFS session. An unprivileged local process should not be able to join that group and receive those messages. Require CAP_NET_ADMIN for incoming SWN_NOTIFY commands with GENL_ADMIN_PERM, and require CAP_NET_ADMIN over the network namespace for joining the SWN multicast group with GENL_MCAST_CAP_NET_ADMIN. The cifs.witness service runs with the privileges needed for both operations. Fixes: fed979a7e082 ("cifs: Set witness notification handler for messages from userspace daemon") Cc: stable@vger.kernel.org Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Steve French --- fs/smb/client/netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/netlink.c b/fs/smb/client/netlink.c index 147d9409252c..0dd10913c37a 100644 --- a/fs/smb/client/netlink.c +++ b/fs/smb/client/netlink.c @@ -33,13 +33,17 @@ static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = { static const struct genl_ops cifs_genl_ops[] = { { .cmd = CIFS_GENL_CMD_SWN_NOTIFY, + .flags = GENL_ADMIN_PERM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = cifs_swn_notify, }, }; static const struct genl_multicast_group cifs_genl_mcgrps[] = { - [CIFS_GENL_MCGRP_SWN] = { .name = CIFS_GENL_MCGRP_SWN_NAME }, + [CIFS_GENL_MCGRP_SWN] = { + .name = CIFS_GENL_MCGRP_SWN_NAME, + .flags = GENL_MCAST_CAP_NET_ADMIN, + }, }; struct genl_family cifs_genl_family = { -- cgit v1.2.3 From dcd4313f0987d69c4134c12bbe3a8cdf795f6c1e Mon Sep 17 00:00:00 2001 From: Fredric Cover Date: Wed, 13 May 2026 13:19:15 -0700 Subject: smb: client: change allocation requirements in DUP_CTX_STR macro Currently, the macro DUP_CTX_STR allocates new_ctx->field using GFP_ATOMIC. DUP_CTX_STR is only used in smb3_fs_context_dup(), which is never called in an atomic context. Using GFP_ATOMIC puts unnecessary pressure on emergency memory pools. Change GFP_ATOMIC to GFP_KERNEL. Signed-off-by: Fredric Cover Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 4c8b1b9ade8b..2f86158f85d7 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -420,7 +420,7 @@ static int parse_symlink_flavor(struct fs_context *fc, char *value, #define DUP_CTX_STR(field) \ do { \ if (ctx->field) { \ - new_ctx->field = kstrdup(ctx->field, GFP_ATOMIC); \ + new_ctx->field = kstrdup(ctx->field, GFP_KERNEL); \ if (new_ctx->field == NULL) { \ smb3_cleanup_fs_context_contents(new_ctx); \ return -ENOMEM; \ -- cgit v1.2.3 From 0c1a9dce208b4dc265925898e5da98934f7f9266 Mon Sep 17 00:00:00 2001 From: Samuele Mariotti Date: Thu, 21 May 2026 12:59:11 +0200 Subject: sched_ext: Fix spurious WARN on stale ops_state in ops_dequeue() ops_dequeue() can race with finish_dispatch() and spuriously trigger the "queued task must be in BPF scheduler's custody" warning. ops_dequeue() snapshots p->scx.ops_state via atomic_long_read_acquire() and then, in the SCX_OPSS_QUEUED arm, asserts that SCX_TASK_IN_CUSTODY is set. The two reads are not atomic w.r.t. a concurrent finish_dispatch() running on another CPU: CPU 1 CPU 2 ===== ===== dequeue_task_scx() ops_dequeue() opss = read_acquire(ops_state) = SCX_OPSS_QUEUED finish_dispatch() cmpxchg ops_state: SCX_OPSS_QUEUED -> SCX_OPSS_DISPATCHING [succeeds] dispatch_enqueue(SCX_DSQ_GLOBAL, SCX_ENQ_CLEAR_OPSS) call_task_dequeue() p->scx.flags &= ~SCX_TASK_IN_CUSTODY WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_IN_CUSTODY)) /* opss is stale: QUEUED, * but task already claimed */ set_release(ops_state, SCX_OPSS_NONE) The race has been observed via two distinct call chains: the most common goes through sched_setaffinity(), a rarer variant through sched_change_begin(). For SCX_DSQ_GLOBAL / SCX_DSQ_BYPASS, dispatch_enqueue() clears SCX_TASK_IN_CUSTODY before clearing ops_state to SCX_OPSS_NONE (intentional, to avoid concurrent non-atomic RMW of p->scx.flags against ops_dequeue()). The window between those two writes is exactly what ops_dequeue() observes as "QUEUED without custody". The observed state is not actually inconsistent, it just means CPU 1 has already claimed the task and the QUEUED value held by CPU 2 is stale. Re-read ops_state in that case; the next read is guaranteed to return SCX_OPSS_DISPATCHING or SCX_OPSS_NONE, both of which exit the switch cleanly. The retry is bounded: once IN_CUSTODY is cleared, ops_state has already advanced past QUEUED for this dispatch cycle, and a fresh QUEUED would require re-enqueue under p's rq lock, which CPU 2 holds. Changes in v2: - Use READ_ONCE() for p->scx.flags to ensure fresh reads and prevent compiler reordering in the lockless path - Add cpu_relax() to reduce power consumption and improve performance during the spin-wait - Use unlikely() to optimize branch prediction for the common case - Expand the in-code comment to document the race condition and bounded retry guarantee Fixes: ebf1ccff79c4 ("sched_ext: Fix ops.dequeue() semantics") Suggested-by: Andrea Righi Signed-off-by: Samuele Mariotti Signed-off-by: Paolo Valente Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 547ca398f646..c1762420cc35 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2078,6 +2078,7 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags) /* dequeue is always temporary, don't reset runnable_at */ clr_task_runnable(p, false); +retry: /* acquire ensures that we see the preceding updates on QUEUED */ opss = atomic_long_read_acquire(&p->scx.ops_state); @@ -2091,8 +2092,20 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags) */ BUG(); case SCX_OPSS_QUEUED: - /* A queued task must always be in BPF scheduler's custody */ - WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_IN_CUSTODY)); + /* + * A queued task must always be in BPF scheduler's custody. If + * SCX_TASK_IN_CUSTODY is clear, finish_dispatch() on another + * CPU has already passed call_task_dequeue() (which clears the + * flag), but has not yet written SCX_OPSS_NONE. That final + * store does not require this rq's lock, so retrying with + * cpu_relax() is bounded: we will observe NONE (or DISPATCHING, + * handled by the fallthrough) on a subsequent iteration. + */ + if (unlikely(!(READ_ONCE(p->scx.flags) & SCX_TASK_IN_CUSTODY))) { + cpu_relax(); + goto retry; + } + if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss, SCX_OPSS_NONE)) break; -- cgit v1.2.3 From fb6988b83b4cafe8db63999c1ddff1b7c66d2ff5 Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Thu, 7 May 2026 10:48:54 +0200 Subject: kunit: fix use-after-free in debugfs when using kunit.filter When the kernel is booted with a kunit filter (e.g., kunit.filter="speed!=slow"), the kunit executor dynamically allocates copies of the filtered test suites using kmalloc/kmemdup. During the initial boot execution, kunit_debugfs_create_suite() creates debugfs files (such as /sys/kernel/debug/kunit//run) and permanently stores a pointer to the dynamically allocated suite in the inode's i_private field. Previously, the executor freed this dynamically allocated suite_set immediately after executing the boot-time tests. Because the debugfs nodes were not destroyed, any subsequent interaction with the debugfs `run` file from userspace triggered a use-after-free (UAF). On systems with architectural capabilities, like CHERI RISC-V, this resulted in an immediate fatal hardware exception due to the invalidation of the capability tags on the reclaimed memory. On other architectures, it resulted in silent memory corruption. Fix this UAF by properly coupling the lifetime of the filtered suite memory allocation to the lifetime of the kunit subsystem and its associated VFS nodes. Ownership of the boot-time suite_set is now transferred to a global tracker ('kunit_boot_suites'), and the memory is cleanly released in kunit_exit() during module teardown. Link: https://lore.kernel.org/r/20260507084854.233984-1-florian.schmaus@codasip.com Fixes: e2219db280e3 ("kunit: add debugfs /sys/kernel/debug/kunit//results display") Signed-off-by: Florian Schmaus Reviewed-by: Martin Kaiser Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 1 + lib/kunit/executor.c | 19 ++++++++++++++++--- lib/kunit/test.c | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 9cd1594ab697..ce0573e196ce 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -613,6 +613,7 @@ unsigned long kunit_vm_mmap(struct kunit *test, struct file *file, unsigned long offset); void kunit_cleanup(struct kunit *test); +void kunit_free_boot_suites(void); void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, ...); diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 1fef217de11d..b0f8a41d61d3 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -15,6 +15,16 @@ extern struct kunit_suite * const __kunit_suites_end[]; extern struct kunit_suite * const __kunit_init_suites_start[]; extern struct kunit_suite * const __kunit_init_suites_end[]; +static struct kunit_suite_set kunit_boot_suites; + +void kunit_free_boot_suites(void) +{ + if (kunit_boot_suites.start) { + kunit_free_suite_set(kunit_boot_suites); + kunit_boot_suites = (struct kunit_suite_set){ NULL, NULL }; + } +} + static char *action_param; module_param_named(action, action_param, charp, 0400); @@ -411,9 +421,12 @@ int kunit_run_all_tests(void) pr_err("kunit executor: unknown action '%s'\n", action_param); free_out: - if (filter_glob_param || filter_param) - kunit_free_suite_set(suite_set); - else if (init_num_suites > 0) + if (filter_glob_param || filter_param) { + if (err) + kunit_free_suite_set(suite_set); + else + kunit_boot_suites = suite_set; + } else if (init_num_suites > 0) /* Don't use kunit_free_suite_set because suites aren't individually allocated */ kfree(suite_set.start); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 41e1c89799b6..99773e000e1b 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -1075,6 +1075,7 @@ static void __exit kunit_exit(void) kunit_bus_shutdown(); kunit_debugfs_cleanup(); + kunit_free_boot_suites(); } module_exit(kunit_exit); -- cgit v1.2.3 From f706e6a4ce75585af979aec3dcbdce68bc76306b Mon Sep 17 00:00:00 2001 From: Dhabaleshwar Das Date: Thu, 21 May 2026 00:00:00 +0530 Subject: accel/rocket: fix UAF via dangling GEM handle in create_bo rocket_ioctl_create_bo() inserts a GEM handle into the file's IDR via drm_gem_handle_create() early on, then performs several operations that can fail (sgt allocation, drm_mm insert, iommu_map). If any fail after the handle is live, the error path calls drm_gem_shmem_object_free() which kfree's the object without removing the handle from the IDR. This leaves a dangling handle pointing to freed slab memory. Any subsequent ioctl using that handle (PREP_BO, FINI_BO, SUBMIT) calls drm_gem_object_lookup() and dereferences freed memory (UAF). Fix by moving drm_gem_handle_create() to after all fallible operations succeed, matching the pattern used by panfrost, lima, and etnaviv. Also fix drm_mm_insert_node_generic() whose return value was silently overwritten by iommu_map_sgtable() on the next line. Add the missing error check. [tomeu: Move handle creation to the very end] Fixes: 658ebeac3351 ("accel/rocket: Add IOCTL for BO creation") Reported-by: Dhabaleshwar Das Signed-off-by: Dhabaleshwar Das Reviewed-by: Tomeu Vizoso Link: https://patch.msgid.link/20260521165720.2113571-1-tomeu@tomeuvizoso.net Signed-off-by: Tomeu Vizoso --- drivers/accel/rocket/rocket_gem.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c index c8084719208a..a5fffa51ff35 100644 --- a/drivers/accel/rocket/rocket_gem.c +++ b/drivers/accel/rocket/rocket_gem.c @@ -79,11 +79,6 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file * rkt_obj->size = args->size; rkt_obj->offset = 0; - ret = drm_gem_handle_create(file, gem_obj, &args->handle); - drm_gem_object_put(gem_obj); - if (ret) - goto err; - sgt = drm_gem_shmem_get_pages_sgt(shmem_obj); if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); @@ -95,6 +90,8 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file * rkt_obj->size, PAGE_SIZE, 0, 0); mutex_unlock(&rocket_priv->mm_lock); + if (ret) + goto err; ret = iommu_map_sgtable(rocket_priv->domain->domain, rkt_obj->mm.start, @@ -112,8 +109,18 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file * args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); args->dma_address = rkt_obj->mm.start; + ret = drm_gem_handle_create(file, gem_obj, &args->handle); + if (ret) + goto err_unmap; + + drm_gem_object_put(gem_obj); + return 0; +err_unmap: + iommu_unmap(rocket_priv->domain->domain, + rkt_obj->mm.start, rkt_obj->size); + err_remove_node: mutex_lock(&rocket_priv->mm_lock); drm_mm_remove_node(&rkt_obj->mm); -- cgit v1.2.3 From e97ff8b62d4690c69297f0f6de874f0564cc01a4 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 20 May 2026 20:00:44 +0200 Subject: io_uring/nop: pass all errors to userspace This fixes an inconsistency where io_nop() called req_set_fail() based on ret, but passed just nop->result to userspace. Originally, ret is a even copy of nop->result, but is set to an error when such happens subsequently. Now that's also passed to userspace. Fixes: a85f31052bce ("io_uring/nop: add support for testing registered files and buffers") Signed-off-by: Alexander A. Klimov Link: https://patch.msgid.link/20260520180045.538533-1-grandmaster@al2klimov.de Signed-off-by: Jens Axboe --- io_uring/nop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/nop.c b/io_uring/nop.c index 3caf07878f8a..f5c9969e7f64 100644 --- a/io_uring/nop.c +++ b/io_uring/nop.c @@ -79,9 +79,9 @@ done: if (ret < 0) req_set_fail(req); if (nop->flags & IORING_NOP_CQE32) - io_req_set_res32(req, nop->result, 0, nop->extra1, nop->extra2); + io_req_set_res32(req, ret, 0, nop->extra1, nop->extra2); else - io_req_set_res(req, nop->result, 0); + io_req_set_res(req, ret, 0); if (nop->flags & IORING_NOP_TW) { req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); -- cgit v1.2.3 From a8878e19d2f5205ad1f170fc230c2cc25a3b9390 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 20 May 2026 15:35:31 -0700 Subject: accel/amdxdna: Block running when IOMMU is off The AIE2 device firmware requires IOMMU on. Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/5319 Reviewed-by: Mario Limonciello Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260520223531.1403302-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index f1ac4e00bd9f..4500b9ccb02e 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -511,6 +511,11 @@ static int aie2_init(struct amdxdna_dev *xdna) return -EINVAL; } + if (!xdna->group) { + XDNA_ERR(xdna, "Running without IOMMU not supported"); + return -EINVAL; + } + ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL); if (!ndev) return -ENOMEM; -- cgit v1.2.3 From c9b7598eb013c6dbf2526dc050364bd8dc24f0d3 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 20 May 2026 23:31:15 +0300 Subject: irqchip/renesas-rzt2h: Use pm_runtime_put_sync() in probe error path pm_runtime_put() may trigger the idle check after pm_runtime_disable() is run as part of devm_pm_runtime_enable()'s cleanup action, leaving runtime PM active. Use pm_runtime_put_sync() to ensure the idle check runs synchronously. Fixes: 13e7b3305b64 ("irqchip: Add RZ/{T2H,N2H} Interrupt Controller (ICU) driver") Signed-off-by: Cosmin Tanislav Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20260520203117.1516442-2-cosmin-gabriel.tanislav.xa@renesas.com --- drivers/irqchip/irq-renesas-rzt2h.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-rzt2h.c b/drivers/irqchip/irq-renesas-rzt2h.c index 53cf80e1155a..ecb69da55508 100644 --- a/drivers/irqchip/irq-renesas-rzt2h.c +++ b/drivers/irqchip/irq-renesas-rzt2h.c @@ -265,7 +265,7 @@ static int rzt2h_icu_init(struct platform_device *pdev, struct device_node *pare irq_domain = irq_domain_create_hierarchy(parent_domain, 0, RZT2H_ICU_NUM_IRQ, dev_fwnode(dev), &rzt2h_icu_domain_ops, priv); if (!irq_domain) { - pm_runtime_put(dev); + pm_runtime_put_sync(dev); return -ENOMEM; } -- cgit v1.2.3 From fd9b9204f30e0463e1abec222aeec33b98571b71 Mon Sep 17 00:00:00 2001 From: Jairaj Arava Date: Wed, 20 May 2026 14:08:13 +0800 Subject: ASoC: Intel: sof_sdw: Add support for nvlrvp in NVL platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an entry in the soundwire quirk table for novalake boards to support NVL RVP Signed-off-by: Jairaj Arava Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260520060814.2024852-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index c18ec607e029..ce7718338e6b 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -837,6 +837,14 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_BT_OFFLOAD_SSP(2) | SOF_SSP_BT_OFFLOAD_PRESENT), }, + /* Novalake devices*/ + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_nvlrvp"), + }, + .driver_data = (void *)(SOC_SDW_PCH_DMIC), + }, {} }; -- cgit v1.2.3 From 2b8305f24a61b290f3258a3368be548f17451533 Mon Sep 17 00:00:00 2001 From: Balamurugan C Date: Wed, 20 May 2026 14:11:43 +0800 Subject: ASoC: Intel: soc-acpi: Add entry for sof_es8336 in NVL match table. Adding ES83x6 I2S codec support for NVL platforms and entry in match table. Signed-off-by: Balamurugan C Reviewed-by: Liam Girdwood Signed-off-by: Bard Liao Link: https://patch.msgid.link/20260520061143.2024963-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-nvl-match.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-nvl-match.c b/sound/soc/intel/common/soc-acpi-intel-nvl-match.c index b8695d47e55b..217272260803 100644 --- a/sound/soc/intel/common/soc-acpi-intel-nvl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-nvl-match.c @@ -10,7 +10,20 @@ #include #include "soc-acpi-intel-sdw-mockup-match.h" +static const struct snd_soc_acpi_codecs nvl_essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + struct snd_soc_acpi_mach snd_soc_acpi_intel_nvl_machines[] = { + { + .comp_ids = &nvl_essx_83x6, + .drv_name = "sof-essx8336", + .sof_tplg_filename = "sof-nvl-es8336", /* the tplg suffix is added at run time */ + .tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER | + SND_SOC_ACPI_TPLG_INTEL_SSP_MSB | + SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER, + }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_nvl_machines); -- cgit v1.2.3 From e0fb794d67f86726817756bcc25c628f4894df29 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 20 May 2026 17:36:29 +0100 Subject: ASoC: soc-acpi-intel-ptl-match: Make Chrome matches conditional For PTL onwards Cirrus are intending to rely on function topologies, rather than using a match table for each system type. Chrome systems tend to have custom magic in the topology and thus need to load a specific file. This causes problems as these system can have the same layout as generic laptops causing the match to apply to other laptops. Add a DMI quirk that forces these matches to only apply to specific devices. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20260520163631.3300102-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 1 + sound/soc/intel/common/soc-acpi-intel-sdca-quirks.c | 16 ++++++++++++++++ sound/soc/intel/common/soc-acpi-intel-sdca-quirks.h | 1 + 3 files changed, 18 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index ad3af8834e43..c6bf70e39397 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -632,6 +632,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { .link_mask = BIT(2) | BIT(3), .links = ptl_cs42l43_agg_l3_cs35l56_l2, .drv_name = "sof_sdw", + .machine_check = snd_soc_acpi_intel_no_function_topology, .sof_tplg_filename = "sof-ptl-cs42l43-agg-l3-cs35l56-l2.tplg", }, { diff --git a/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.c b/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.c index 3eaa058f8460..7caabc501b16 100644 --- a/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.c +++ b/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.c @@ -6,6 +6,7 @@ * */ +#include #include #include #include @@ -37,6 +38,21 @@ bool snd_soc_acpi_intel_sdca_is_device_rt712_vb(void *arg) } EXPORT_SYMBOL_NS(snd_soc_acpi_intel_sdca_is_device_rt712_vb, "SND_SOC_ACPI_INTEL_SDCA_QUIRKS"); +static const struct dmi_system_id function_topology_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + }, + }, + {} +}; + +bool snd_soc_acpi_intel_no_function_topology(void *arg) +{ + return !!dmi_check_system(function_topology_quirk_table); +} +EXPORT_SYMBOL_NS(snd_soc_acpi_intel_no_function_topology, "SND_SOC_ACPI_INTEL_SDCA_QUIRKS"); + MODULE_DESCRIPTION("ASoC ACPI Intel SDCA quirks"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("SND_SOC_SDCA"); diff --git a/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.h b/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.h index bead5ec6243f..2ea0a1881c4b 100644 --- a/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.h +++ b/sound/soc/intel/common/soc-acpi-intel-sdca-quirks.h @@ -10,5 +10,6 @@ #define _SND_SOC_ACPI_INTEL_SDCA_QUIRKS bool snd_soc_acpi_intel_sdca_is_device_rt712_vb(void *arg); +bool snd_soc_acpi_intel_no_function_topology(void *arg); #endif -- cgit v1.2.3 From 45cf24da0a10203890fae4bd10ca5dbfca430324 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 20 May 2026 17:36:30 +0100 Subject: ASoC: Intel: soc-acpi-intel-ptl-match: Remove unnecessary cs42l43 match For PTL onwards Cirrus are intending to rely on function topologies, rather than using a match table for each system type. Remove this unnecessary match table entry. Having the match entries can mean that systems match when they should use function topologies instead, resulting in incorrect audio configurations. Although, admittedly this is not too likely with this 6x amp configuration as those are quite rare, but best to follow best practice. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20260520163631.3300102-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 118 ---------------------- 1 file changed, 118 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index c6bf70e39397..f7694b2a2b02 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -92,48 +92,6 @@ static const struct snd_soc_acpi_endpoint spk_r_endpoint = { .group_id = 1, }; -static const struct snd_soc_acpi_endpoint spk_1_endpoint = { - .num = 0, - .aggregated = 1, - .group_position = 1, - .group_id = 1, -}; - -static const struct snd_soc_acpi_endpoint spk_2_endpoint = { - .num = 0, - .aggregated = 1, - .group_position = 2, - .group_id = 1, -}; - -static const struct snd_soc_acpi_endpoint spk_3_endpoint = { - .num = 0, - .aggregated = 1, - .group_position = 3, - .group_id = 1, -}; - -static const struct snd_soc_acpi_endpoint spk_4_endpoint = { - .num = 0, - .aggregated = 1, - .group_position = 4, - .group_id = 1, -}; - -static const struct snd_soc_acpi_endpoint spk_5_endpoint = { - .num = 0, - .aggregated = 1, - .group_position = 5, - .group_id = 1, -}; - -static const struct snd_soc_acpi_endpoint spk_6_endpoint = { - .num = 0, - .aggregated = 1, - .group_position = 6, - .group_id = 1, -}; - static const struct snd_soc_acpi_endpoint jack_dmic_endpoints[] = { /* Jack Endpoint */ { @@ -202,15 +160,6 @@ static const struct snd_soc_acpi_endpoint cs42l43_amp_spkagg_endpoints[] = { }, }; -static const struct snd_soc_acpi_adr_device cs42l43_2_adr[] = { - { - .adr = 0x00023001fa424301ull, - .num_endpoints = ARRAY_SIZE(cs42l43_amp_spkagg_endpoints), - .endpoints = cs42l43_amp_spkagg_endpoints, - .name_prefix = "cs42l43" - } -}; - static const struct snd_soc_acpi_adr_device cs42l43_3_agg_adr[] = { { .adr = 0x00033001FA424301ull, @@ -235,48 +184,6 @@ static const struct snd_soc_acpi_adr_device cs35l56_2_lr_adr[] = { } }; -static const struct snd_soc_acpi_adr_device cs35l56_1_3amp_adr[] = { - { - .adr = 0x00013001fa355601ull, - .num_endpoints = 1, - .endpoints = &spk_1_endpoint, - .name_prefix = "AMP1" - }, - { - .adr = 0x00013101fa355601ull, - .num_endpoints = 1, - .endpoints = &spk_2_endpoint, - .name_prefix = "AMP2" - }, - { - .adr = 0x00013201fa355601ull, - .num_endpoints = 1, - .endpoints = &spk_3_endpoint, - .name_prefix = "AMP3" - } -}; - -static const struct snd_soc_acpi_adr_device cs35l56_3_3amp_adr[] = { - { - .adr = 0x00033301fa355601ull, - .num_endpoints = 1, - .endpoints = &spk_4_endpoint, - .name_prefix = "AMP4" - }, - { - .adr = 0x00033401fa355601ull, - .num_endpoints = 1, - .endpoints = &spk_5_endpoint, - .name_prefix = "AMP5" - }, - { - .adr = 0x00033501fa355601ull, - .num_endpoints = 1, - .endpoints = &spk_6_endpoint, - .name_prefix = "AMP6" - } -}; - static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = { { .adr = 0x000030025D071101ull, @@ -408,25 +315,6 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_agg_l3_cs35l56_l2[] = { {} }; -static const struct snd_soc_acpi_link_adr ptl_cs42l43_l2_cs35l56x6_l13[] = { - { - .mask = BIT(2), - .num_adr = ARRAY_SIZE(cs42l43_2_adr), - .adr_d = cs42l43_2_adr, - }, - { - .mask = BIT(1), - .num_adr = ARRAY_SIZE(cs35l56_1_3amp_adr), - .adr_d = cs35l56_1_3amp_adr, - }, - { - .mask = BIT(3), - .num_adr = ARRAY_SIZE(cs35l56_3_3amp_adr), - .adr_d = cs35l56_3_3amp_adr, - }, - {} -}; - static const struct snd_soc_acpi_link_adr ptl_rt722_l0_rt1320_l23[] = { { .mask = BIT(0), @@ -599,12 +487,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { .sof_tplg_filename = "sof-ptl-rt713-l3-rt1320-l1.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, - { - .link_mask = BIT(1) | BIT(2) | BIT(3), - .links = ptl_cs42l43_l2_cs35l56x6_l13, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-ptl-cs42l43-l2-cs35l56x6-l13.tplg", - }, { .link_mask = BIT(0) | BIT(2) | BIT(3), .links = ptl_rt722_l0_rt1320_l23, -- cgit v1.2.3 From 09e8f9a9aa19aa8c1b0cc7a0ebc68f6ecf86a660 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Thu, 21 May 2026 20:37:12 +0900 Subject: ASoC: codecs: pcm512x: fix null-ptr dereference in pcm512x_overclock_xxx_put() In the pcm512x chipset driver, pcm512x_overclock_xxx_put() is defined as a general mixer kcontrol instead of a DAPM kcontrol, so struct snd_soc_dapm_context must not be accessed via snd_soc_dapm_kcontrol_to_dapm(). This causes a NULL pointer dereference, so it must be modified to use snd_soc_component_to_dapm(). Cc: stable@kernel.org Closes: https://github.com/raspberrypi/linux/issues/7242 Fixes: 02dbbb7e982a ("ASoC: codecs: pcm512x: convert to snd_soc_dapm_xxx()") Signed-off-by: Jeongjun Park Link: https://patch.msgid.link/20260521113712.227438-1-aha310510@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/pcm512x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index a70e8ea166dc..fdef98ce52f1 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -235,7 +235,7 @@ static int pcm512x_overclock_pll_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_to_dapm(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); struct pcm512x_priv *pcm512x = snd_soc_component_get_drvdata(component); switch (snd_soc_dapm_get_bias_level(dapm)) { @@ -264,7 +264,7 @@ static int pcm512x_overclock_dsp_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_to_dapm(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); struct pcm512x_priv *pcm512x = snd_soc_component_get_drvdata(component); switch (snd_soc_dapm_get_bias_level(dapm)) { @@ -293,7 +293,7 @@ static int pcm512x_overclock_dac_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_to_dapm(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_component_to_dapm(component); struct pcm512x_priv *pcm512x = snd_soc_component_get_drvdata(component); switch (snd_soc_dapm_get_bias_level(dapm)) { -- cgit v1.2.3 From dc278e9bf2b9513a763353e6b9cc21e0f532954e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 21 May 2026 12:02:53 -0700 Subject: blk-mq: pop cached request if it is usable When submitting a bio to blk-mq, if the task should sleep after peeking a cached request, but before it pops it, the plug flushes and calls blk_mq_free_plug_rqs, freeing the cached_rqs. This creates a use-after-free bug. Fix this by popping the cached request before any possible blocking calls if it is suitable for use. Popping this request first holds a queue reference, so avoid any serialization races with queue freezes and can safely proceed with dispatching that request to the driver. This potentially increases a timing window from when a driver wants to freeze its queue to when requests stop being dispatched. That scenario is off the fast path though, and drivers need to appropriately handle requests during a freeze request anyway. The downside is the popped element needs to be individually freed when we performed a bio plug merge. The cached request would have had to be freed later anyway, but this patch does it inline with building the plug list instead of after flushing it. Fixes: b0077e269f6c1 ("blk-mq: make sure active queue usage is held for bio_integrity_prep()") Fixes: 7b4f36cd22a65 ("block: ensure we hold a queue reference when using queue limits") Signed-off-by: Keith Busch Link: https://patch.msgid.link/20260521190253.242065-1-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d0c37daf568f..28c2d931e75e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3077,7 +3077,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, /* * Check if there is a suitable cached request and return it. */ -static struct request *blk_mq_peek_cached_request(struct blk_plug *plug, +static struct request *blk_mq_get_cached_request(struct blk_plug *plug, struct request_queue *q, blk_opf_t opf) { enum hctx_type type = blk_mq_get_hctx_type(opf); @@ -3093,27 +3093,10 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug, return NULL; if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) return NULL; + rq_list_pop(&plug->cached_rqs); return rq; } -static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug, - struct bio *bio) -{ - if (rq_list_pop(&plug->cached_rqs) != rq) - WARN_ON_ONCE(1); - - /* - * If any qos ->throttle() end up blocking, we will have flushed the - * plug and hence killed the cached_rq list as well. Pop this entry - * before we throttle. - */ - rq_qos_throttle(rq->q, bio); - - blk_mq_rq_time_init(rq, blk_time_get_ns()); - rq->cmd_flags = bio->bi_opf; - INIT_LIST_HEAD(&rq->queuelist); -} - static bool bio_unaligned(const struct bio *bio, struct request_queue *q) { unsigned int bs_mask = queue_logical_block_size(q) - 1; @@ -3152,7 +3135,7 @@ void blk_mq_submit_bio(struct bio *bio) /* * If the plug has a cached request for this queue, try to use it. */ - rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf); + rq = blk_mq_get_cached_request(plug, q, bio->bi_opf); /* * A BIO that was released from a zone write plug has already been @@ -3211,7 +3194,10 @@ void blk_mq_submit_bio(struct bio *bio) new_request: if (rq) { - blk_mq_use_cached_rq(rq, plug, bio); + rq_qos_throttle(rq->q, bio); + blk_mq_rq_time_init(rq, blk_time_get_ns()); + rq->cmd_flags = bio->bi_opf; + INIT_LIST_HEAD(&rq->queuelist); } else { rq = blk_mq_get_new_requests(q, plug, bio); if (unlikely(!rq)) { @@ -3257,12 +3243,10 @@ new_request: return; queue_exit: - /* - * Don't drop the queue reference if we were trying to use a cached - * request and thus didn't acquire one. - */ if (!rq) blk_queue_exit(q); + else + blk_mq_free_request(rq); } #ifdef CONFIG_BLK_MQ_STACKING -- cgit v1.2.3 From 27cd2dde35b2c3b8659fa18f6a935c61fedee5c1 Mon Sep 17 00:00:00 2001 From: Zhengyu He Date: Thu, 21 May 2026 22:44:45 +0800 Subject: spi: dt-bindings: fsl-qspi: support SpacemiT K3 Add the SpacemiT K3 QSPI compatible to the fsl-qspi binding. K3 and K1 use the same QSPI controller, so document the K3 compatible with "spacemit,k1-qspi" as fallback. Signed-off-by: Cody Kang Signed-off-by: Zhengyu He Acked-by: Conor Dooley Link: https://patch.msgid.link/20260521-k3-pico-itx-qspi-v2-for-next-20260521-v2-1-52bce26e5fd8@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml b/Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml index 1d10cfbad86c..504df31a4f90 100644 --- a/Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml +++ b/Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml @@ -20,6 +20,9 @@ properties: - fsl,ls1021a-qspi - fsl,ls2080a-qspi - spacemit,k1-qspi + - items: + - const: spacemit,k3-qspi + - const: spacemit,k1-qspi - items: - enum: - fsl,ls1043a-qspi -- cgit v1.2.3 From ea25e3c7915b24e0ef93ee85190f3fada037dfb1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 21 Apr 2026 12:11:25 -0400 Subject: sunrpc: prevent out-of-bounds read in __cache_seq_start() Commit 7b546bd89975 ("sunrpc/cache: improve RCU safety in cache_list walking.") changed the tail of __cache_seq_start() to unconditionally store *pos = ((long long)hash << 32) + 1 before returning, dropping a prior "hash >= cd->hash_size" guard. When the while loop exits because every remaining bucket was empty, hash equals cd->hash_size, so the stored *pos is one position past the table's last valid bucket. seq_read_iter() caches that index in m->index. A subsequent pread(2) at the same file offset skips traverse() and hands the stored index back to __cache_seq_start(), which decodes hash = cd->hash_size and dereferences cd->hash_table[cd->hash_size] -- one hlist_head past the end of the kzalloc'd table. KASAN reports an eight-byte slab-out-of-bounds read at the tail of the 2048-byte hash_table allocation for the NFSD export cache (EXPORT_HASHMAX * sizeof(struct hlist_head) == 256 * 8). Reject an input hash that is out of range before touching the hash table. cache_seq_next() already bounds-checks its own loop; the start routine needs to be symmetric. Reported-by: syzbot+60cfa08822470bbebe44@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=60cfa08822470bbebe44 Fixes: 7b546bd89975 ("sunrpc/cache: improve RCU safety in cache_list walking.") Reviewed-by: Benjamin Coddington Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- net/sunrpc/cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b5474ce534fb..27dd6b58b8ff 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1348,6 +1348,9 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos) hash = n >> 32; entry = n & ((1LL<<32) - 1); + if (hash >= cd->hash_size) + return NULL; + hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; -- cgit v1.2.3 From fc151100098d2899b7aed99aa1bcfe27bf00d58d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 21 Apr 2026 15:20:21 -0400 Subject: NFSD: Report whether fh_key was actually updated The nfsd_ctl_fh_key_set tracepoint was introduced to capture operator activity on the filehandle signing key. Earlier revisions logged the key bytes verbatim; the version that landed hashes the 16 key bytes through crc32_le and stores the result. CRC32 is a linear projection of its input rather than a one-way function, and truncating any hash of fixed-size secret material leaves the key recoverable under offline brute force when the threat model includes an attacker with access to the trace ring. The operational question the fingerprint was meant to answer is whether a NFSD_CMD_THREADS_SET call that carries an NFSD_A_SERVER_FH_KEY attribute actually replaced the active key or re-installed the value already in place. Answer that question directly: compare the incoming key bytes against the current nn->fh_key inside nfsd_nl_fh_key_set() and surface a single bit to the tracepoint. The event now prints "updated" when the stored key changed and "unmodified" otherwise. A first set that fails kmalloc reports "unmodified" because no key was installed. Reported-by: jaeyeong Fixes: 62346217fd72 ("NFSD: Add a key for signing filehandles") Cc: Benjamin Coddington Reviewed-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 18 ++++++++++++++---- fs/nfsd/trace.h | 16 +++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 39e7012a60d8..04e3954d54bd 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1594,16 +1594,27 @@ out_unlock: static int nfsd_nl_fh_key_set(const struct nlattr *attr, struct nfsd_net *nn) { siphash_key_t *fh_key = nn->fh_key; + u64 k0, k1; + bool changed; + + k0 = get_unaligned_le64(nla_data(attr)); + k1 = get_unaligned_le64(nla_data(attr) + 8); if (!fh_key) { fh_key = kmalloc(sizeof(siphash_key_t), GFP_KERNEL); - if (!fh_key) + if (!fh_key) { + trace_nfsd_ctl_fh_key_set(false, -ENOMEM); return -ENOMEM; + } nn->fh_key = fh_key; + changed = true; + } else { + changed = fh_key->key[0] != k0 || fh_key->key[1] != k1; } - fh_key->key[0] = get_unaligned_le64(nla_data(attr)); - fh_key->key[1] = get_unaligned_le64(nla_data(attr) + 8); + fh_key->key[0] = k0; + fh_key->key[1] = k1; + trace_nfsd_ctl_fh_key_set(changed, 0); return 0; } @@ -1682,7 +1693,6 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) attr = info->attrs[NFSD_A_SERVER_FH_KEY]; if (attr) { ret = nfsd_nl_fh_key_set(attr, nn); - trace_nfsd_ctl_fh_key_set((const char *)nn->fh_key, ret); if (ret) goto out_unlock; } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 5ad38f50836d..b631a472222b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2243,23 +2243,21 @@ TRACE_EVENT(nfsd_end_grace, TRACE_EVENT(nfsd_ctl_fh_key_set, TP_PROTO( - const char *key, + bool changed, int result ), - TP_ARGS(key, result), + TP_ARGS(changed, result), TP_STRUCT__entry( - __field(u32, key_hash) + __field(bool, changed) __field(int, result) ), TP_fast_assign( - if (key) - __entry->key_hash = ~crc32_le(0xFFFFFFFF, key, 16); - else - __entry->key_hash = 0; + __entry->changed = changed; __entry->result = result; ), - TP_printk("key=0x%08x result=%d", - __entry->key_hash, __entry->result + TP_printk("key %s, result=%d", + __entry->changed ? "updated" : "unmodified", + __entry->result ) ); -- cgit v1.2.3 From 0b474240327cebeff08ad429e8ed3cfc6c8ee816 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 28 Apr 2026 15:47:44 -0400 Subject: lockd: fix TEST handling when not all permissions are available. The F_GETLK fcntl can work with either read access or write access or both. It can query F_RDLCK and F_WRLCK locks in either case. However lockd currently treats F_GETLK similar to F_SETLK in that read access is required to query an F_RDLCK lock and write access is required to query a F_WRLCK lock. This is wrong and can cause problems - e.g. when qemu accesses a read-only (e.g. iso) filesystem image over NFS (though why it queries if it can get a write lock - I don't know. But it does, and this works with local filesystems). So we need TEST requests to be handled differently. To do this: - change nlm_do_fopen() to accept O_RDWR as a mode and in that case succeed if either a O_RDONLY or O_WRONLY file can be opened. - change nlm_lookup_file() to accept a mode argument from caller, instead of deducing base on lock time, and pass that on to nlm_do_fopen() - change nlm4svc_retrieve_args() and nlmsvc_retrieve_args() to detect TEST requests and pass O_RDWR as a mode to nlm_lookup_file, passing the same mode as before for other requests. Also set lock->fl.c.flc_file to whichever file is available for TEST requests. - change nlmsvc_testlock() to also not calculate the mode, but to use whatever was stored in lock->fl.c.flc_file. This behaviour of lockd - requesting O_WRONLY access to TEST for exclusive locks - has been present at least since git history began. However it was hidden until recently because knfsd ignored the access requested by lockd and required only READ access for all locking requests (unless the underlying filesystem provided an f_op->open function which checked access permissions). The commit mentioned in Fixes: below changed nfsd_permission() to NOT override the access request for LOCK requests and this exposed the bug that we are now fixing. Note that there is another issue that this patch does not address. The flock(.., LOCK_EX) call is permitted on a read-only file descriptor. Linux NFS maps this to NLM locking as whole-file byte-range locks. nfsd will see this as though it were fcntl( F_SETLK (F_WRLCK)) and will now require write access, which it might not be able to get. It is not clear if this is a problem in practice, or what the best solution might be. So no attempt is made to address it. Reported-by: Tj Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1128861 Fixes: 4cc9b9f2bf4d ("nfsd: refine and rename NFSD_MAY_LOCK") Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/lockd.h | 2 +- fs/lockd/svc4proc.c | 9 +++++++-- fs/lockd/svclock.c | 4 +--- fs/lockd/svcproc.c | 15 ++++++++++++--- fs/lockd/svcsubs.c | 31 +++++++++++++++++++++---------- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h index a7c85ab6d4b5..1db6cb352542 100644 --- a/fs/lockd/lockd.h +++ b/fs/lockd/lockd.h @@ -332,7 +332,7 @@ int nlmsvc_dispatch(struct svc_rqst *rqstp); * File handling for the server personality */ __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, - struct nlm_lock *); + struct nlm_lock *, int); void nlm_release_file(struct nlm_file *); void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 5de41e249534..41cab858de57 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -146,8 +146,11 @@ nlm4svc_lookup_file(struct svc_rqst *rqstp, struct nlm_host *host, struct nlm_lock *lock, struct nlm_file **filp, struct nlm4_lock *xdr_lock, unsigned char type) { + bool is_test = (rqstp->rq_proc == NLMPROC4_TEST || + rqstp->rq_proc == NLMPROC4_TEST_MSG); struct file_lock *fl = &lock->fl; struct nlm_file *file = NULL; + int mode; __be32 error; if (xdr_lock->fh.len > NFS_MAXFHSIZE) @@ -170,7 +173,8 @@ nlm4svc_lookup_file(struct svc_rqst *rqstp, struct nlm_host *host, fl->c.flc_type = type; lockd_set_file_lock_range4(fl, lock->lock_start, lock->lock_len); - error = nlm_lookup_file(rqstp, &file, lock); + mode = is_test ? O_RDWR : lock_to_openmode(fl); + error = nlm_lookup_file(rqstp, &file, lock, mode); switch (error) { case nlm_granted: break; @@ -184,7 +188,8 @@ nlm4svc_lookup_file(struct svc_rqst *rqstp, struct nlm_host *host, *filp = file; fl->c.flc_flags = FL_POSIX; - fl->c.flc_file = file->f_file[lock_to_openmode(fl)]; + fl->c.flc_file = is_test ? nlmsvc_file_file(file) + : file->f_file[mode]; fl->c.flc_pid = current->tgid; fl->fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(fl, host, (pid_t)lock->svid); diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index ee23f5802af1..44bc20837062 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -613,7 +613,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_lock *conflock) { int error; - int mode; __be32 ret; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", @@ -631,14 +630,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - mode = lock_to_openmode(&lock->fl); locks_init_lock(&conflock->fl); /* vfs_test_lock only uses start, end, and owner, but tests flc_file */ conflock->fl.c.flc_file = lock->fl.c.flc_file; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; conflock->fl.c.flc_owner = lock->fl.c.flc_owner; - error = vfs_test_lock(file->f_file[mode], &conflock->fl); + error = vfs_test_lock(lock->fl.c.flc_file, &conflock->fl); if (error) { ret = nlm_lck_denied_nolocks; goto out; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 749abf8886ba..c0a3487719e2 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -68,6 +68,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, struct nlm_host *host = NULL; struct nlm_file *file = NULL; struct nlm_lock *lock = &argp->lock; + bool is_test = (rqstp->rq_proc == NLMPROC_TEST || + rqstp->rq_proc == NLMPROC_TEST_MSG); int mode; __be32 error = 0; @@ -83,15 +85,22 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - error = cast_status(nlm_lookup_file(rqstp, &file, lock)); + mode = lock_to_openmode(&lock->fl); + + if (is_test) + mode = O_RDWR; + + error = cast_status(nlm_lookup_file(rqstp, &file, lock, mode)); if (error != 0) goto no_locks; *filp = file; /* Set up the missing parts of the file_lock structure */ - mode = lock_to_openmode(&lock->fl); lock->fl.c.flc_flags = FL_POSIX; - lock->fl.c.flc_file = file->f_file[mode]; + if (is_test) + lock->fl.c.flc_file = nlmsvc_file_file(file); + else + lock->fl.c.flc_file = file->f_file[mode]; lock->fl.c.flc_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 71eaec5ed8d7..976cc66d0c19 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -83,23 +83,36 @@ int lock_to_openmode(struct file_lock *lock) * * We have to make sure we have the right credential to open * the file. + * + * @mode is O_RDONLY, O_WRONLY, or O_RDWR. O_RDWR means success + * is achieved with EITHER O_RDONLY or O_WRONLY; it does not + * require both. */ static __be32 nlm_do_fopen(struct svc_rqst *rqstp, struct nlm_file *file, int mode) { - struct file **fp = &file->f_file[mode]; - __be32 nlmerr = nlm_granted; + __be32 nlmerr = nlm__int__failed; + __be32 deferred = 0; int error; + int m; + + for (m = O_RDONLY; m <= O_WRONLY; m++) { + struct file **fp = &file->f_file[m]; + + if (mode != O_RDWR && mode != m) + continue; + if (*fp) + return nlm_granted; - if (*fp) - return nlmerr; + error = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, m); + if (!error) + return nlm_granted; - error = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode); - if (error) { dprintk("lockd: open failed (errno %d)\n", error); switch (error) { case -EWOULDBLOCK: nlmerr = nlm__int__drop_reply; + deferred = nlmerr; break; case -ESTALE: nlmerr = nlm__int__stale_fh; @@ -110,7 +123,7 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp, } } - return nlmerr; + return deferred ? deferred : nlmerr; } /* @@ -119,17 +132,15 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp, */ __be32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, - struct nlm_lock *lock) + struct nlm_lock *lock, int mode) { struct nlm_file *file; unsigned int hash; __be32 nfserr; - int mode; nlm_debug_print_fh("nlm_lookup_file", &lock->fh); hash = file_hash(&lock->fh); - mode = lock_to_openmode(&lock->fl); /* Lock file table */ mutex_lock(&nlm_file_mutex); -- cgit v1.2.3 From b60621c5121c9435eda99af7dc2100f5c0f88695 Mon Sep 17 00:00:00 2001 From: Emily Ehlert Date: Mon, 18 May 2026 13:59:56 +0000 Subject: KVM: x86: Fix ERAPS RAP clear on INVPCID single-context invalidation Use kvm_register_mark_dirty() instead of kvm_register_is_dirty() to actually mark VCPU_EXREG_ERAPS as dirty when emulating INVPCID_TYPE_SINGLE_CTXT. kvm_register_is_dirty() is a read-only predicate whose return value is discarded, making the call a no-op. Without this fix, a single-context INVPCID will not trigger a RAP clear on the next VMRUN, breaking the ERAPS security guarantee. Fixes: db5e82496492 ("KVM: SVM: Virtualize and advertise support for ERAPS") Signed-off-by: Emily Ehlert Link: https://patch.msgid.link/20260518135956.82569-1-ehemily@amazon.de Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e01d6984ed04..108318e1b3f0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -14330,7 +14330,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) * the RAP (Return Address Predicator). */ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS)) - kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS); + kvm_register_mark_dirty(vcpu, VCPU_EXREG_ERAPS); kvm_invalidate_pcid(vcpu, operand.pcid); return kvm_skip_emulated_instruction(vcpu); -- cgit v1.2.3 From a9e18aa3263f356edae305e29830e5fe63d8597a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 May 2026 10:15:36 -0700 Subject: KVM: SVM: Flush the current TLB when transitioning from xAVIC => x2AVIC Flush the current TLB when xAVIC *or* x2AVIC is activated, as KVM is (apparently) responsible for purging TLB entries when transitioning from xAVIC to x2AVIC. The APM says a whole lot of nothing about TLB flushing with respect to (x2)AVIC, but empirical data strongly suggests hardware also does a whole lot of nothing. Failure to flush the TLB when enabling x2AVIC can lead to guest accesses to the APIC base address getting incorrectly redirected to the virtual APIC page. The flaw most visibly manifests as failures in KVM-Unit-Test's verify_disabled_apic_mmio() testcase when x2APIC is enabled (though for reasons unknown, the test only reliably fails with EFI builds). Fixes: 0ccf3e7cb95a ("KVM: SVM: Flush the "current" TLB when activating AVIC") Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") Cc: stable@vger.kernel.org Cc: Naveen N Rao (AMD) Link: https://patch.msgid.link/20260515171536.1841645-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index adf211860949..e8bd60156941 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -206,6 +206,35 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); + /* + * Flush the TLB when enabling (x2)AVIC and when transitioning between + * xAVIC and x2AVIC, as the CPU may have inserted a TLB entry for the + * "wrong" mapping. + * + * KVM uses a per-VM "scratch" page to back the APIC memslot, because + * KVM also uses per-VM page tables *and* maintains the page table (NPT + * or shadow page) mappings for said memslot even if one or more vCPUs + * have their local APIC hardware-disabled or are in x2APIC mode, i.e. + * even if one or more vCPUs' APIC MMIO BAR is effectively disabled. + * + * If xAVIC is fully enabled, hardware ignores the physical address in + * KVM's page tables, i.e. in the leaf SPTE for the APIC memslot, and + * instead redirects the access to the AVIC backing page, i.e. to the + * vCPU's virtual APIC page. If xAVIC is not enabled (APIC is either + * hardware-disabled or in x2APIC mode), then guest accesses will use + * the page table mapping verbatim, i.e. will access the per-VM scratch + * page, as normal memory. + * + * In both cases, the CPU is allowed to cache TLB entries for the APIC + * base GPA. So, KVM needs to flush the TLB when enabling xAVIC, as + * accesses need to be redirected to the virtual APIC page, but the TLB + * may contain entries pointing at the scratch page. KVM also needs to + * flush the TLB when enabling x2AVIC, as accesses need to go to the + * scratch page, but the TLB may contain entries tagged as xAVIC, i.e. + * entries pointing to the vCPU's virtual APIC page. + */ + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu); + /* * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR * accesses, while interrupt injection to a running vCPU can be @@ -219,12 +248,6 @@ static void avic_activate_vmcb(struct vcpu_svm *svm) /* Disabling MSR intercept for x2APIC registers */ avic_set_x2apic_msr_interception(svm, false); } else { - /* - * Flush the TLB, the guest may have inserted a non-APIC - * mapping into the TLB while AVIC was disabled. - */ - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu); - /* Enabling MSR intercept for x2APIC registers */ avic_set_x2apic_msr_interception(svm, true); } -- cgit v1.2.3 From 3515503322f4819277091839eed46b695096aca5 Mon Sep 17 00:00:00 2001 From: Junyi Liu Date: Mon, 18 May 2026 23:27:19 +0900 Subject: ksmbd: fix durable reconnect error path file lifetime After a durable reconnect succeeds, ksmbd_reopen_durable_fd() republishes the same ksmbd_file into the session volatile-id table. If smb2_open() then takes a later error path, cleanup first calls ksmbd_fd_put(work, fp) and then unconditionally calls ksmbd_put_durable_fd(dh_info.fp). In this case fp and dh_info.fp are the same object. The first put drops the reconnect lookup reference, but the final durable put can run __ksmbd_close_fd(NULL, fp). Because the final close is not session-aware, it can free the file object without removing the volatile-id entry that was just published into the session table. Use the session-aware put for the final reconnect drop when the reconnect had already succeeded and the error path is cleaning up the republished file. Earlier reconnect failures, before fp is assigned to dh_info.fp, keep using the durable-only put path. Fixes: 1baff47b81f9 ("ksmbd: fix use-after-free in smb2_open during durable reconnect") Signed-off-by: Junyi Liu Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 62d4399a993d..5128a693aca6 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3804,8 +3804,19 @@ err_out2: ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status); } - if (dh_info.reconnected) - ksmbd_put_durable_fd(dh_info.fp); + if (dh_info.reconnected) { + /* + * If reconnect succeeded, fp was republished in the + * session file table. On a later error, ksmbd_fd_put() + * above drops the session reference; drop the durable + * lookup reference through the same session-aware path so + * final close removes the volatile id before freeing fp. + */ + if (rc && fp == dh_info.fp) + ksmbd_fd_put(work, dh_info.fp); + else + ksmbd_put_durable_fd(dh_info.fp); + } kfree(name); kfree(lc); -- cgit v1.2.3 From 69f030cf95488ae1186c72ac8c66fd279664ea7f Mon Sep 17 00:00:00 2001 From: Junyi Liu Date: Tue, 19 May 2026 16:12:04 +0900 Subject: ksmbd: validate SID in parent security descriptor during ACL inheritance Introduce smb_validate_ntsd_sid() helper to safely validate Owner SID and Group SID inside the NT Security Descriptor (smb_ntsd) retrieved from the parent directory. Cc: stable@vger.kernel.org Signed-off-by: Junyi Liu Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 66 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 9161e9d7ed24..c2d9be52a311 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1096,6 +1096,40 @@ static int smb_append_inherited_ace(struct smb_ace **ace, int *nt_size, return 0; } +static int smb_validate_ntsd_sid(struct smb_ntsd *pntsd, size_t pntsd_size, + unsigned int sid_offset, struct smb_sid **sid, + size_t *sid_size) +{ + size_t sid_end; + + *sid = NULL; + *sid_size = 0; + + if (!sid_offset) + return 0; + + if (sid_offset < sizeof(struct smb_ntsd) || + check_add_overflow(sid_offset, (size_t)CIFS_SID_BASE_SIZE, + &sid_end) || + sid_end > pntsd_size) + return -EINVAL; + + *sid = (struct smb_sid *)((char *)pntsd + sid_offset); + if ((*sid)->num_subauth > SID_MAX_SUB_AUTHORITIES) + return -EINVAL; + + if (check_add_overflow((size_t)CIFS_SID_BASE_SIZE, + sizeof(__le32) * (size_t)(*sid)->num_subauth, + &sid_end)) + return -EINVAL; + + if (sid_offset > pntsd_size || sid_end > pntsd_size - sid_offset) + return -EINVAL; + + *sid_size = sid_end; + return 0; +} + int smb_inherit_dacl(struct ksmbd_conn *conn, const struct path *path, unsigned int uid, unsigned int gid) @@ -1108,28 +1142,28 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct dentry *parent = path->dentry->d_parent; struct mnt_idmap *idmap = mnt_idmap(path->mnt); int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size; - int rc = 0, pntsd_type, pntsd_size, acl_len, aces_size; + int rc = 0, pntsd_type, ppntsd_size, acl_len, aces_size; unsigned int dacloffset; size_t dacl_struct_end; u16 num_aces, ace_cnt = 0; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); - pntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap, + ppntsd_size = ksmbd_vfs_get_sd_xattr(conn, idmap, parent, &parent_pntsd); - if (pntsd_size <= 0) + if (ppntsd_size <= 0) return -ENOENT; dacloffset = le32_to_cpu(parent_pntsd->dacloffset); if (!dacloffset || check_add_overflow(dacloffset, sizeof(struct smb_acl), &dacl_struct_end) || - dacl_struct_end > (size_t)pntsd_size) { + dacl_struct_end > (size_t)ppntsd_size) { rc = -EINVAL; goto free_parent_pntsd; } parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); - acl_len = pntsd_size - dacloffset; + acl_len = ppntsd_size - dacloffset; num_aces = le16_to_cpu(parent_pdacl->num_aces); pntsd_type = le16_to_cpu(parent_pntsd->type); pdacl_size = le16_to_cpu(parent_pdacl->size); @@ -1243,19 +1277,19 @@ pass: struct smb_ntsd *pntsd; struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; - int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; + size_t powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; size_t pntsd_alloc_size; - if (parent_pntsd->osidoffset) { - powner_sid = (struct smb_sid *)((char *)parent_pntsd + - le32_to_cpu(parent_pntsd->osidoffset)); - powner_sid_size = 1 + 1 + 6 + (powner_sid->num_subauth * 4); - } - if (parent_pntsd->gsidoffset) { - pgroup_sid = (struct smb_sid *)((char *)parent_pntsd + - le32_to_cpu(parent_pntsd->gsidoffset)); - pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); - } + rc = smb_validate_ntsd_sid(parent_pntsd, ppntsd_size, + le32_to_cpu(parent_pntsd->osidoffset), + &powner_sid, &powner_sid_size); + if (rc) + goto free_aces_base; + rc = smb_validate_ntsd_sid(parent_pntsd, ppntsd_size, + le32_to_cpu(parent_pntsd->gsidoffset), + &pgroup_sid, &pgroup_sid_size); + if (rc) + goto free_aces_base; if (check_add_overflow(sizeof(struct smb_ntsd), (size_t)powner_sid_size, -- cgit v1.2.3 From 4ec9c8e023c79f613fe4d5ad8cc737112efb2e44 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Mon, 18 May 2026 15:23:22 +0000 Subject: smb/server: promote S_DEL_ON_CLS to S_DEL_PENDING when close Reproducer: 1. server: systemctl start ksmbd 2. client: mount -t cifs //${server_ip}/export /mnt 3. client: C program: openat(AT_FDCWD, "/mnt", O_RDWR | O_TMPFILE, 0600) Do not treat `FILE_DELETE_ON_CLOSE_LE` as delete pending while files remain open. This patch fixes xfstests generic/004. Cc: stable@vger.kernel.org Link: https://chenxiaosong.com/en/smb-xfstests-generic-004.html Co-developed-by: Huiwen He Signed-off-by: Huiwen He Signed-off-by: ChenXiaoSong Tested-by: Steve French Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 913164c958b1..5a232d94f567 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -211,7 +211,7 @@ int ksmbd_query_inode_status(struct dentry *dentry) return ret; down_read(&ci->m_lock); - if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)) + if (ci->m_flags & S_DEL_PENDING) ret = KSMBD_INODE_STATUS_PENDING_DELETE; else ret = KSMBD_INODE_STATUS_OK; @@ -227,7 +227,7 @@ bool ksmbd_inode_pending_delete(struct ksmbd_file *fp) int ret; down_read(&ci->m_lock); - ret = (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)); + ret = (ci->m_flags & S_DEL_PENDING); up_read(&ci->m_lock); return ret; @@ -395,12 +395,20 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) } } + down_write(&ci->m_lock); + /* Promote S_DEL_ON_CLS to S_DEL_PENDING when close */ + if (ci->m_flags & S_DEL_ON_CLS) { + ci->m_flags &= ~S_DEL_ON_CLS; + ci->m_flags |= S_DEL_PENDING; + } + up_write(&ci->m_lock); + if (atomic_dec_and_test(&ci->m_count)) { bool do_unlink = false; down_write(&ci->m_lock); - if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) { - ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING); + if (ci->m_flags & S_DEL_PENDING) { + ci->m_flags &= ~S_DEL_PENDING; do_unlink = true; } up_write(&ci->m_lock); -- cgit v1.2.3 From 7734b168cad167a43e5bda19bc8ccc65669ec964 Mon Sep 17 00:00:00 2001 From: Simon Schuster Date: Thu, 21 May 2026 14:46:28 +0200 Subject: MAINTAINERS: arch/nios2: Add Simon Schuster as co-maintainer Add Simon Schuster as a co-maintainer for the nios2 architecture and mark it as supported. Signed-off-by: Simon Schuster Acked-by: Arnd Bergmann Signed-off-by: Dinh Nguyen --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2fb1c75afd16..ab70c209bec2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18898,7 +18898,8 @@ F: drivers/hid/hid-nintendo* NIOS2 ARCHITECTURE M: Dinh Nguyen -S: Maintained +M: Simon Schuster +S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git F: arch/nios2/ -- cgit v1.2.3 From e90ef85ada857819313000cc50c6edfcddec6850 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 May 2026 14:23:55 +0200 Subject: nios2: Implement _THIS_IP_ using inline asm Both GCC [1] and Clang [2] consider the generic version of _THIS_IP_ to be broken: #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) In particular, the address of a label is only expected to be used with a computed goto. While the generic version more or less works today, it is known to be brittle and may break with current and future optimizations. For example, Clang -O2 always returns 1 when this function is inlined: static inline unsigned long get_ip(void) { return ({ __label__ __here; __here: (unsigned long)&&__here; }); } Fix it by overriding _THIS_IP_ in (which is included by ) using an architecture-specific inline asm version. Additionally, avoiding taking the address of a label prevents compilers from emitting spurious indirect branch targets (e.g. ENDBR or BTI) under control-flow integrity schemes. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120071 [1] Link: https://github.com/llvm/llvm-project/issues/138272 [2] Signed-off-by: Marco Elver Reviewed-by: David Laight Signed-off-by: Dinh Nguyen --- arch/nios2/include/asm/linkage.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/nios2/include/asm/linkage.h b/arch/nios2/include/asm/linkage.h index 211302301a8a..c4073235852b 100644 --- a/arch/nios2/include/asm/linkage.h +++ b/arch/nios2/include/asm/linkage.h @@ -12,4 +12,6 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +#define _THIS_IP_ ({ unsigned long __ip; asm volatile("nextpc %0" : "=r" (__ip)); __ip; }) + #endif -- cgit v1.2.3 From 83ec6eeb74a592e6568cb0723bac99fb8b3810b4 Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Wed, 6 May 2026 09:33:35 +0300 Subject: MAINTAINERS: .mailmap: update after GEHC spin-off Update my email address from @ge.com to @gehealthcare.com after GE HealthCare was spun-off from GE. Link: https://lore.kernel.org/20260506063335.3-1-ian.ray@gehealthcare.com Signed-off-by: Ian Ray Reviewed-by: Luca Ceresoli Cc: Neil Armstrong Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index bd6a72f29d9c..de41cfdae5d0 100644 --- a/.mailmap +++ b/.mailmap @@ -339,6 +339,7 @@ Henrik Rydberg Herbert Xu Huacai Chen Huacai Chen +Ian Ray Ignat Korchagin Igor Korotin Ike Panhc diff --git a/MAINTAINERS b/MAINTAINERS index 10e8253181d3..9eb15dacb939 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16504,7 +16504,7 @@ F: drivers/usb/mtu3/ MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES M: Peter Senna Tschudin -M: Ian Ray +M: Ian Ray M: Martyn Welch S: Maintained F: Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt -- cgit v1.2.3 From 83f9efcce93f8574be2279090ee2aec58b86cda7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 May 2026 17:06:43 +0100 Subject: Revert "mm/hugetlbfs: update hugetlbfs to use mmap_prepare" This reverts commit ea52cb24cd3f ("mm/hugetlbfs: update hugetlbfs to use mmap_prepare") with conflict resolution to account for changes in commit ea52cb24cd3f ("mm/hugetlbfs: update hugetlbfs to use mmap_prepare"). The patch incorrectly handled hugetlb VMA lock allocation at the mmap_prepare stage, where a failed allocation occurring after mmap_prepare is called might result in the lock leaking. There is no risk of a merge causing a similar issues, as VMA_DONTEXPAND_BIT is set for hugetlb mappings. As a first step in addressing this issue, simply revert the change so we can rework how we do this having corrected the underlying issues. We maintain the VMA flags changes as best we can, accounting for the fact that we were working with a VMA descriptor previously and propagating like-for-like changes for this. Note that we invoke vma_set_flags() and do not call vma_start_write() as vm_flags_set() does. This is OK as it's being done in an .mmap hook where the VMA is not yet linked into the tree so nobody else can be accessing it. Link: https://lore.kernel.org/20260512160643.266960-1-ljs@kernel.org Fixes: ea52cb24cd3f ("mm/hugetlbfs: update hugetlbfs to use mmap_prepare") Signed-off-by: Lorenzo Stoakes Reported-by: Mingyu Wang <25181214217@stu.xidian.edu.cn> Closes: https://lore.kernel.org/linux-mm/20260425070700.562229-1-25181214217@stu.xidian.edu.cn/ Acked-by: Muchun Song Acked-by: Oscar Salvador Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 46 ++++++++------------------- include/linux/hugetlb.h | 8 +---- include/linux/hugetlb_inline.h | 14 ++------- mm/hugetlb.c | 71 +++++++++++++++++------------------------- 4 files changed, 45 insertions(+), 94 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8b05bec08e04..78d61bf2bd9b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -96,15 +96,8 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = { #define PGOFF_LOFFT_MAX \ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1))) -static int hugetlb_file_mmap_prepare_success(const struct vm_area_struct *vma) +static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { - /* Unfortunate we have to reassign vma->vm_private_data. */ - return hugetlb_vma_lock_alloc((struct vm_area_struct *)vma); -} - -static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc) -{ - struct file *file = desc->file; struct inode *inode = file_inode(file); loff_t len, vma_len; int ret; @@ -119,8 +112,8 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc) * way when do_mmap unwinds (may be important on powerpc * and ia64). */ - vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT); - desc->vm_ops = &hugetlb_vm_ops; + vma_set_flags(vma, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT); + vma->vm_ops = &hugetlb_vm_ops; /* * page based offset in vm_pgoff could be sufficiently large to @@ -129,16 +122,16 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc) * sizeof(unsigned long). So, only check in those instances. */ if (sizeof(unsigned long) == sizeof(loff_t)) { - if (desc->pgoff & PGOFF_LOFFT_MAX) + if (vma->vm_pgoff & PGOFF_LOFFT_MAX) return -EINVAL; } /* must be huge page aligned */ - if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; - vma_len = (loff_t)vma_desc_size(desc); - len = vma_len + ((loff_t)desc->pgoff << PAGE_SHIFT); + vma_len = (loff_t)(vma->vm_end - vma->vm_start); + len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); /* check for overflow */ if (len < vma_len) return -EINVAL; @@ -148,7 +141,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc) ret = -ENOMEM; - vma_flags = desc->vma_flags; + vma_flags = vma->flags; /* * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip * reserving here. Note: only for SHM hugetlbfs file, the inode @@ -158,30 +151,17 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc) vma_flags_set(&vma_flags, VMA_NORESERVE_BIT); if (hugetlb_reserve_pages(inode, - desc->pgoff >> huge_page_order(h), - len >> huge_page_shift(h), desc, - vma_flags) < 0) + vma->vm_pgoff >> huge_page_order(h), + len >> huge_page_shift(h), vma, + vma_flags) < 0) goto out; ret = 0; - if (vma_desc_test(desc, VMA_WRITE_BIT) && inode->i_size < len) + if (vma_test(vma, VMA_WRITE_BIT) && inode->i_size < len) i_size_write(inode, len); out: inode_unlock(inode); - if (!ret) { - /* Allocate the VMA lock after we set it up. */ - desc->action.success_hook = hugetlb_file_mmap_prepare_success; - /* - * We cannot permit the rmap finding this VMA in the time - * between the VMA being inserted into the VMA tree and the - * completion/success hook being invoked. - * - * This is because we establish a per-VMA hugetlb lock which can - * be raced by rmap. - */ - desc->action.hide_from_rmap_until_complete = true; - } return ret; } @@ -1227,7 +1207,7 @@ static void init_once(void *foo) static const struct file_operations hugetlbfs_file_operations = { .read_iter = hugetlbfs_read_iter, - .mmap_prepare = hugetlbfs_file_mmap_prepare, + .mmap = hugetlbfs_file_mmap, .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, .llseek = default_llseek, diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 93418625d3c5..5957bc25efa8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -148,7 +148,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, struct folio **foliop); #endif /* CONFIG_USERFAULTFD */ long hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_desc *desc, vma_flags_t vma_flags); + struct vm_area_struct *vma, vma_flags_t vma_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); @@ -276,7 +276,6 @@ long hugetlb_change_protection(struct vm_area_struct *vma, void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void fixup_hugetlb_reservations(struct vm_area_struct *vma); void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); -int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); unsigned int arch_hugetlb_cma_order(void); @@ -469,11 +468,6 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} -static inline int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ - return 0; -} - #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h index 565b473fd135..5c29cd3223a1 100644 --- a/include/linux/hugetlb_inline.h +++ b/include/linux/hugetlb_inline.h @@ -6,23 +6,13 @@ #ifdef CONFIG_HUGETLB_PAGE -static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags) -{ - return !!(vm_flags & VM_HUGETLB); -} - static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) { - return vma_flags_test_any(flags, VMA_HUGETLB_BIT); + return vma_flags_test(flags, VMA_HUGETLB_BIT); } #else -static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags) -{ - return false; -} - static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) { return false; @@ -32,7 +22,7 @@ static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags) static inline bool is_vm_hugetlb_page(const struct vm_area_struct *vma) { - return is_vm_hugetlb_flags(vma->vm_flags); + return is_vma_hugetlb_flags(&vma->flags); } #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f24bf49be047..4b80b167cc9c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -116,6 +116,7 @@ struct mutex *hugetlb_fault_mutex_table __ro_after_init; /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); +static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool take_locks); @@ -413,21 +414,17 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma) } } -/* - * vma specific semaphore used for pmd sharing and fault/truncation - * synchronization - */ -int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) { struct hugetlb_vma_lock *vma_lock; /* Only establish in (flags) sharable vmas */ if (!vma || !(vma->vm_flags & VM_MAYSHARE)) - return 0; + return; /* Should never get here with non-NULL vm_private_data */ if (vma->vm_private_data) - return -EINVAL; + return; vma_lock = kmalloc_obj(*vma_lock); if (!vma_lock) { @@ -442,15 +439,13 @@ int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) * allocation failure. */ pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); - return -EINVAL; + return; } kref_init(&vma_lock->refs); init_rwsem(&vma_lock->rw_sema); vma_lock->vma = vma; vma->vm_private_data = vma_lock; - - return 0; } /* Helper that removes a struct file_region from the resv_map cache and returns @@ -1147,28 +1142,20 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma) } } -static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) +static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) { VM_WARN_ON_ONCE_VMA(!is_vm_hugetlb_page(vma), vma); - VM_WARN_ON_ONCE_VMA(vma->vm_flags & VM_MAYSHARE, vma); + VM_WARN_ON_ONCE_VMA(vma_test(vma, VMA_MAYSHARE_BIT), vma); - set_vma_private_data(vma, get_vma_private_data(vma) | flags); + set_vma_private_data(vma, (unsigned long)map); } -static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map) -{ - VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags)); - VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_MAYSHARE_BIT)); - - desc->private_data = map; -} - -static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags) +static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) { - VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags)); - VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_MAYSHARE_BIT)); + VM_WARN_ON_ONCE_VMA(!is_vm_hugetlb_page(vma), vma); + VM_WARN_ON_ONCE_VMA(vma_test(vma, VMA_MAYSHARE_BIT), vma); - desc->private_data = (void *)((unsigned long)desc->private_data | flags); + set_vma_private_data(vma, get_vma_private_data(vma) | flags); } static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) @@ -1178,13 +1165,6 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) return (get_vma_private_data(vma) & flag) != 0; } -static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag) -{ - VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags)); - - return ((unsigned long)desc->private_data) & flag; -} - bool __vma_private_lock(struct vm_area_struct *vma) { return !(vma->vm_flags & VM_MAYSHARE) && @@ -6553,7 +6533,7 @@ next: long hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_desc *desc, + struct vm_area_struct *vma, vma_flags_t vma_flags) { long chg = -1, add = -1, spool_resv, gbl_resv; @@ -6570,6 +6550,12 @@ long hugetlb_reserve_pages(struct inode *inode, return -EINVAL; } + /* + * vma specific semaphore used for pmd sharing and fault/truncation + * synchronization + */ + hugetlb_vma_lock_alloc(vma); + /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page @@ -6582,9 +6568,9 @@ long hugetlb_reserve_pages(struct inode *inode, * Shared mappings base their reservation on the number of pages that * are already allocated on behalf of the file. Private mappings need * to reserve the full area even if read-only as mprotect() may be - * called to make the mapping read-write. Assume !desc is a shm mapping + * called to make the mapping read-write. Assume !vma is a shm mapping */ - if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) { + if (!vma || vma_test(vma, VMA_MAYSHARE_BIT)) { /* * resv_map can not be NULL as hugetlb_reserve_pages is only * called for inodes for which resv_maps were created (see @@ -6603,8 +6589,8 @@ long hugetlb_reserve_pages(struct inode *inode, chg = to - from; - set_vma_desc_resv_map(desc, resv_map); - set_vma_desc_resv_flags(desc, HPAGE_RESV_OWNER); + set_vma_resv_map(vma, resv_map); + set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } if (chg < 0) { @@ -6618,7 +6604,7 @@ long hugetlb_reserve_pages(struct inode *inode, if (err < 0) goto out_err; - if (desc && !vma_desc_test(desc, VMA_MAYSHARE_BIT) && h_cg) { + if (vma && !vma_test(vma, VMA_MAYSHARE_BIT) && h_cg) { /* For private mappings, the hugetlb_cgroup uncharge info hangs * of the resv_map. */ @@ -6655,7 +6641,7 @@ long hugetlb_reserve_pages(struct inode *inode, * consumed reservations are stored in the map. Hence, nothing * else has to be done for private mappings here */ - if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) { + if (!vma || vma_test(vma, VMA_MAYSHARE_BIT)) { add = region_add(resv_map, from, to, regions_needed, h, h_cg); if (unlikely(add < 0)) { @@ -6719,15 +6705,16 @@ out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), chg * pages_per_huge_page(h), h_cg); out_err: - if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) + hugetlb_vma_lock_free(vma); + if (!vma || vma_test(vma, VMA_MAYSHARE_BIT)) /* Only call region_abort if the region_chg succeeded but the * region_add failed or didn't run. */ if (chg >= 0 && add < 0) region_abort(resv_map, from, to, regions_needed); - if (desc && is_vma_desc_resv_set(desc, HPAGE_RESV_OWNER)) { + if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { kref_put(&resv_map->refs, resv_map_release); - set_vma_desc_resv_map(desc, NULL); + set_vma_resv_map(vma, NULL); } return err; } -- cgit v1.2.3 From fa0b9b2b7ae3539908d69c2b9ac0d144d9bc5139 Mon Sep 17 00:00:00 2001 From: Linpu Yu Date: Sun, 10 May 2026 13:43:30 +0800 Subject: ipc: limit next_id allocation to the valid ID range The checkpoint/restore sysctl path can request the next SysV IPC id through ids->next_id. ipc_idr_alloc() currently forwards that request to idr_alloc() with an open-ended upper bound. If the valid tail of the SysV IPC id space is full, the allocation can spill beyond ipc_mni. The returned SysV IPC id still uses the normal index encoding, so later lookup and removal can target the wrong slot. This leaves the real IDR entry behind and breaks the IDR state for the object. The bug is in ipc_idr_alloc() in the checkpoint/restore path. 1. ids->next_id is passed to: idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id), 0, ...) 2. The zero upper bound makes the allocation effectively open-ended. Once the valid SysV IPC tail is occupied, idr_alloc() can spill past ipc_mni and allocate an entry beyond the valid IPC id range. 3. The new object id is still encoded with the narrower SysV IPC index width: new->id = (new->seq << ipcmni_seq_shift()) + idx 4. Later removal goes through ipc_rmid(), which uses: ipcid_to_idx(ipcp->id) That truncates the real IDR index. An object actually stored at a high index can then be removed as if it lived at a low in-range index. 5. For shared memory, shm_destroy() frees the current object anyway, but the real high IDR slot is left behind as a dangling pointer. 6. A subsequent walk of /proc/sysvipc/shm reaches the stale IDR entry and dereferences freed memory. Prevent this by bounding the requested allocation to ipc_mni so the checkpoint/restore path fails once the valid range is exhausted. Link: https://lore.kernel.org/cover.1778336914.git.linpu5433@gmail.com Link: https://lore.kernel.org/2eebe949bfa7d1f6e13b5be6a92c64c850ce9d45.1778336914.git.linpu5433@gmail.com Fixes: 03f595668017 ("ipc: add sysctl to specify desired next object id") Signed-off-by: Linpu Yu Signed-off-by: Ren Wei Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Cc: Kees Cook Cc: Stanislav Kinsbursky Cc: Davidlohr Bueso Cc: Signed-off-by: Andrew Morton --- ipc/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/util.c b/ipc/util.c index 9eb89820594e..1737d776bc08 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -253,7 +253,7 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new) } else { new->seq = ipcid_to_seqx(next_id); idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id), - 0, GFP_NOWAIT); + ipc_mni, GFP_NOWAIT); } if (idx >= 0) new->id = (new->seq << ipcmni_seq_shift()) + idx; -- cgit v1.2.3 From 3b041514cb6eae45869b020f743c14d983363222 Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Tue, 5 May 2026 15:39:20 +0200 Subject: memfd: deny writeable mappings when implying SEAL_WRITE When SEAL_EXEC is added, SEAL_WRITE is implied to make W^X. But the implied seal is set after the check that makes sure the memfd can not have any writable mappings. This means one can use SEAL_EXEC to apply SEAL_WRITE while having writeable mappings. This breaks the contract that SEAL_WRITE provides and can be used by an attacker to pass a memfd that appears to be write sealed but can still be modified arbitrarily. Fix this by adding the implied seals before the call for mapping_deny_writable() is done. Link: https://lore.kernel.org/20260505133922.797635-1-pratyush@kernel.org Fixes: c4f75bc8bd6b ("mm/memfd: add write seals when apply SEAL_EXEC to executable memfd") Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Pasha Tatashin Acked-by: Jeff Xu Cc: Baolin Wang Cc: Brendan Jackman Cc: Greg Thelen Cc: Hugh Dickins Cc: Kees Cook Cc: "David Hildenbrand (Arm)" Cc: Signed-off-by: Andrew Morton --- mm/memfd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/memfd.c b/mm/memfd.c index fb425f4e315f..abe13b291ddc 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -283,6 +283,12 @@ int memfd_add_seals(struct file *file, unsigned int seals) goto unlock; } + /* + * SEAL_EXEC implies SEAL_WRITE, making W^X from the start. + */ + if (seals & F_SEAL_EXEC && inode->i_mode & 0111) + seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE; + if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) { error = mapping_deny_writable(file->f_mapping); if (error) @@ -295,12 +301,6 @@ int memfd_add_seals(struct file *file, unsigned int seals) } } - /* - * SEAL_EXEC implies SEAL_WRITE, making W^X from the start. - */ - if (seals & F_SEAL_EXEC && inode->i_mode & 0111) - seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE; - *file_seals |= seals; error = 0; -- cgit v1.2.3 From bf62f69574b19720ae5fbbbcdf24a0c4e3e05e43 Mon Sep 17 00:00:00 2001 From: Richard Chang Date: Tue, 12 May 2026 07:49:18 +0000 Subject: zram: fix use-after-free in zram_writeback_endio A crash was observed in zram_writeback_endio due to a NULL pointer dereference in wake_up. The root cause is a race condition between the bio completion handler (zram_writeback_endio) and the writeback task. In zram_writeback_endio, wake_up() is called on &wb_ctl->done_wait after releasing wb_ctl->done_lock. This creates a race window where the writeback task can see num_inflight become 0, return, and free wb_ctl before zram_writeback_endio calls wake_up(). CPU 0 (zram_writeback_endio) CPU 1 (writeback_store) ============================ ============================ zram_writeback_slots zram_submit_wb_request zram_submit_wb_request wait_event(wb_ctl->done_wait) spin_lock(&wb_ctl->done_lock); list_add(&req->entry, &wb_ctl->done_reqs); spin_unlock(&wb_ctl->done_lock); wake_up(&wb_ctl->done_wait); zram_complete_done_reqs spin_lock(&wb_ctl->done_lock); list_add(&req->entry, &wb_ctl->done_reqs); spin_unlock(&wb_ctl->done_lock); while (num_inflight) > 0) spin_lock(&wb_ctl->done_lock); list_del(&req->entry); spin_unlock(&wb_ctl->done_lock); // num_inflight becomes 0 atomic_dec(num_inflight); // Leave zram_writeback_slots // Free wb_ctl release_wb_ctl(wb_ctl); // UAF crash! wake_up(&wb_ctl->done_wait); This patch fixes this race by using RCU. By protecting wb_ctl with rcu_read_lock() in zram_writeback_endio and using kfree_rcu() to free it, we ensure that wb_ctl remains valid during the execution of zram_writeback_endio. Link: https://lore.kernel.org/20260512074918.2606208-1-richardycc@google.com Fixes: f405066a1f0d ("zram: introduce writeback bio batching") Signed-off-by: Richard Chang Suggested-by: Sergey Senozhatsky Suggested-by: Minchan Kim Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Brian Geffon Cc: Jens Axboe Cc: Martin Liu Cc: wang wei Cc: Signed-off-by: Andrew Morton --- drivers/block/zram/zram_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index aebc710f0d6a..07111455eecf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "zram_drv.h" @@ -504,6 +505,7 @@ struct zram_wb_ctl { wait_queue_head_t done_wait; spinlock_t done_lock; atomic_t num_inflight; + struct rcu_head rcu; }; struct zram_wb_req { @@ -847,7 +849,7 @@ static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) release_wb_req(req); } - kfree(wb_ctl); + kfree_rcu(wb_ctl, rcu); } static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) @@ -964,11 +966,13 @@ static void zram_writeback_endio(struct bio *bio) struct zram_wb_ctl *wb_ctl = bio->bi_private; unsigned long flags; + rcu_read_lock(); spin_lock_irqsave(&wb_ctl->done_lock, flags); list_add(&req->entry, &wb_ctl->done_reqs); spin_unlock_irqrestore(&wb_ctl->done_lock, flags); wake_up(&wb_ctl->done_wait); + rcu_read_unlock(); } static void zram_submit_wb_request(struct zram *zram, -- cgit v1.2.3 From 3f8968e9cbf95d5d87d32218906cab0b9b9eddbe Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Mon, 18 May 2026 12:06:56 +0530 Subject: mm/rmap: initialize nr_pages to 1 at loop start in try_to_unmap_one Initialize nr_pages to 1 at the start of each loop iteration, like folio_referenced_one() does. Without this, nr_pages computed by a previous folio_unmap_pte_batch() call can be reused on a later iteration that does not run folio_unmap_pte_batch() again. mmap a 64K large folio with MAP_ANONYMOUS | MAP_DROPPABLE, then call madvise(MADV_FREE), then make the last page device-exclusive via HMM_DMIRROR_EXCLUSIVE. Trigger node reclaim through sysfs. Now, in try_to_unmap_one(), we will first clear the first 15 out of 16 entries mapping the lazyfree folio. This will set nr_pages to 15. In the next pvmw walk, this nr_pages gets reused on a device-exclusive pte, thus potentially corrupting folio refcount/mapcount. At the moment, I have a userspace program which can make the kernel spit out a trace, but the blow up is in folio_referenced_one(), because there are existing bugs in the interaction between device-private and rmap (which too I am investigating). I did a one liner kernel change to avoid going into folio_referenced_one(), and the kernel blows up at folio_remove_rmap_ptes in try_to_unmap_one which is what I wanted. Note that the bug is there not since file folio batching but lazyfree folio batching, since device-exclusive only works for anonymous folios. Userspace visible effect is simply kernel crashing somewhere due to refcount/mapcount corruption. Link: https://lore.kernel.org/20260518063656.3721056-1-dev.jain@arm.com Fixes: 354dffd29575 ("mm: support batched unmap for lazyfree large folios during reclamation") Signed-off-by: Dev Jain Acked-by: Barry Song Acked-by: David Hildenbrand (Arm) Reviewed-by: Lorenzo Stoakes Cc: Anshuman Khandual Cc: Barry Song Cc: Dev Jain Cc: Harry Yoo Cc: Jann Horn Cc: Liam R. Howlett Cc: Rik van Riel Cc: Ryan Roberts Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/rmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/rmap.c b/mm/rmap.c index 78b7fb5f367c..99e1b3dc390b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2030,6 +2030,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, mmu_notifier_invalidate_range_start(&range); while (page_vma_mapped_walk(&pvmw)) { + nr_pages = 1; + /* * If the folio is in an mlock()d vma, we must not swap it out. */ -- cgit v1.2.3 From 441f92f7d386b85bad16de49db95a307cba048a2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 18 May 2026 08:25:58 -0700 Subject: mm/damon/sysfs-schemes: delete tried region in regions_rmdirs() DAMON sysfs maintains the DAMOS tried region directory objects via a linked list. When the user requests refresh of the directories, DAMON sysfs removes all the region directories first, and then generate updated regions directory on the empty space. The removal function (damon_sysfs_scheme_regions_rm_dirs()) only puts the kobj objects. Deletion of the container region object from the linked list is done inside the kobj release callback function. If somehow the callback invocation is delayed, the list will contain regions list that gonna be freed. If the updated region directories creation is started in this situation, the list can be corrupted and use-after-free can happen. Because the kobj objects are managed by only DAMON sysfs, the issue cannot happen in normal situation. But, such delays can be made on kernels that built with CONFIG_DEBUG_KOBJECT_RELEASE. On the kernel, the issue can indeed be reproduced like below. # damo start --damos_action stat # cd /sys/kernel/mm/damon/admin/kdamonds/0/ # for i in {1..10}; do echo update_schemes_tried_regions > state; done # dmesg | grep underflow [ 89.296152] refcount_t: underflow; use-after-free. Fix the issue by removing the region object from the list when decrementing the reference count. Also update damos_sysfs_populate_region_dir() to add the region object to the list only after the kobject_init_and_add() is success, so that fail of kobject_init_and_add() is not leaving the deallocated object on the list. The issue was discovered [1] by Sashiko. Link: https://lore.kernel.org/20260518152559.93038-1-sj@kernel.org Link: https://lore.kernel.org/20260513011920.119183-1-sj@kernel.org [1] Fixes: 9277d0367ba1 ("mm/damon/sysfs-schemes: implement scheme region directory") Signed-off-by: SeongJae Park Cc: # 6.2.x Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 04746cbb3327..a8014780edae 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -88,7 +88,6 @@ static void damon_sysfs_scheme_region_release(struct kobject *kobj) struct damon_sysfs_scheme_region *region = container_of(kobj, struct damon_sysfs_scheme_region, kobj); - list_del(®ion->list); kfree(region); } @@ -164,7 +163,7 @@ static void damon_sysfs_scheme_regions_rm_dirs( struct damon_sysfs_scheme_region *r, *next; list_for_each_entry_safe(r, next, ®ions->regions_list, list) { - /* release function deletes it from the list */ + list_del(&r->list); kobject_put(&r->kobj); regions->nr_regions--; } @@ -2928,14 +2927,15 @@ void damos_sysfs_populate_region_dir(struct damon_sysfs_schemes *sysfs_schemes, if (!region) return; region->sz_filter_passed = sz_filter_passed; - list_add_tail(®ion->list, &sysfs_regions->regions_list); - sysfs_regions->nr_regions++; if (kobject_init_and_add(®ion->kobj, &damon_sysfs_scheme_region_ktype, &sysfs_regions->kobj, "%d", sysfs_regions->nr_regions++)) { kobject_put(®ion->kobj); + return; } + list_add_tail(®ion->list, &sysfs_regions->regions_list); + sysfs_regions->nr_regions++; } int damon_sysfs_schemes_clear_regions( -- cgit v1.2.3 From e16f17a9c5af50221184d1ef4be4056bf3c4209e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 18 May 2026 10:28:19 +0200 Subject: mm: memcontrol: propagate NMI slab stats to memcg vmstats flush_nmi_stats() drains per-node NMI slab atomics into the per-node lruvec_stats, but does not propagate them to the memcg-level vmstats. For non NMI case, account_slab_nmi_safe() calls mod_memcg_lruvec_state() which updates both per-node lruvec_stats and memcg-level vmstats, so flush_nmi_stats() needs to flush to per-node lruvec_stats as well as memcg-level vmstats. So fix this by flushing to the memcg-level vmstats for NMI too. Link: https://lore.kernel.org/20260518082830.599102-1-alex@ghiti.fr Fixes: 940b01fc8dc1 ("memcg: nmi safe memcg stats for specific archs") Signed-off-by: Alexandre Ghiti Acked-by: Shakeel Butt Acked-by: Johannes Weiner Reviewed-by: Harry Yoo (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton --- mm/memcontrol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c03d4787d466..507be1cca392 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4352,6 +4352,9 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, lstats->state[index] += slab; if (plstats) plstats->state_pending[index] += slab; + memcg->vmstats->state[index] += slab; + if (parent) + parent->vmstats->state_pending[index] += slab; } if (atomic_read(&pn->slab_unreclaimable)) { int slab = atomic_xchg(&pn->slab_unreclaimable, 0); @@ -4360,6 +4363,9 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, lstats->state[index] += slab; if (plstats) plstats->state_pending[index] += slab; + memcg->vmstats->state[index] += slab; + if (parent) + parent->vmstats->state_pending[index] += slab; } } } -- cgit v1.2.3 From 09e7827e785729f391c8d46dc71becce70d296ab Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Mon, 16 Mar 2026 20:49:56 +0530 Subject: kernel/fork: validate exit_signal in kernel_clone() When a child process exits, it sends exit_signal to its parent via do_notify_parent(). The clone() syscall constructs exit_signal as: (lower_32_bits(clone_flags) & CSIGNAL) CSIGNAL is 0xff, so values in the range 65-255 are possible. However, valid_signal() only accepts signals up to _NSIG (64 on x86_64). A non-zero non-valid exit_signal acts the same as exit_signal == 0: the parent process is not signaled when the child terminates. The syzkaller reproducer triggers this by calling clone() with flags=0x80, resulting in exit_signal = (0x80 & CSIGNAL) = 128, which exceeds _NSIG and is not a valid signal. The v1 of this patch added the check only in the clone() syscall handler, which is incomplete. kernel_clone() has other callers such as sys_ia32_clone() which would remain unprotected. Move the check to kernel_clone() to cover all callers. Since the valid_signal() check is now in kernel_clone() and covers all callers including clone3(), the same check in copy_clone_args_from_user() becomes redundant and is removed. The higher 32bits check for clone3() is kept as it is clone3() specific. Note that this is a user-visible change: previously, passing an invalid exit_signal to clone() was silently accepted. The man page for clone() does not document any defined behavior for invalid exit_signal values, so rejecting them with -EINVAL is the correct behavior. It is unlikely that any sane application relies on passing an invalid exit_signal. [oleg@redhat.com: the comment above kernel_clone() should be updated] Link: https://lore.kernel.org/abwvgU17W8wuW2-J@redhat.com Link: https://lore.kernel.org/20260316151956.563558-1-kartikey406@gmail.com Fixes: 3f2c788a1314 ("fork: prevent accidental access to clone3 features") Signed-off-by: Deepanshu Kartikey Signed-off-by: Oleg Nesterov Reported-by: syzbot+bbe6b99feefc3a0842de@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=bbe6b99feefc3a0842de Tested-by: syzbot+bbe6b99feefc3a0842de@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/20260307064202.353405-1-kartikey406@gmail.com/T/ [v1] Link: https://lore.kernel.org/all/20260316104536.558108-1-kartikey406@gmail.com/T/ [v2] Acked-by: Oleg Nesterov Acked-by: Michal Hocko Cc: Ben Segall Cc: Christian Brauner Cc: David Hildenbrand Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Juri Lelli Cc: Kees Cook Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Mel Gorman Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Cc: Tetsuo Handa Signed-off-by: Andrew Morton --- kernel/fork.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 5f3fdfdb14c7..8ac38beae360 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2664,8 +2664,6 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. - * - * args->exit_signal is expected to be checked for sanity by the caller. */ pid_t kernel_clone(struct kernel_clone_args *args) { @@ -2700,6 +2698,9 @@ pid_t kernel_clone(struct kernel_clone_args *args) (args->pidfd == args->parent_tid)) return -EINVAL; + if (!valid_signal(args->exit_signal)) + return -EINVAL; + /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly @@ -2898,11 +2899,9 @@ static noinline int copy_clone_args_from_user(struct kernel_clone_args *kargs, return -EINVAL; /* - * Verify that higher 32bits of exit_signal are unset and that - * it is a valid signal + * Verify that higher 32bits of exit_signal are unset */ - if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) || - !valid_signal(args.exit_signal))) + if (unlikely(args.exit_signal & ~((u64)CSIGNAL))) return -EINVAL; if ((args.flags & CLONE_INTO_CGROUP) && -- cgit v1.2.3 From 2c6f81d58741349298f51ff697d988cb42881453 Mon Sep 17 00:00:00 2001 From: Sunny Patel Date: Fri, 1 May 2026 17:21:16 +0530 Subject: mm/migrate_device: fix pgtable leak in migrate_vma_insert_huge_pmd_page When migrate_vma_insert_huge_pmd_page() jumps to unlock_abort due to a PMD check failure, the pgtable allocated earlier via pte_alloc_one() is never freed, causing a memory leak. Added free_abort label to release the pgtable in error path. Link: https://lore.kernel.org/20260501115122.23288-1-nueralspacetech@gmail.com Fixes: a30b48bf1b24 ("mm/migrate_device: implement THP migration of zone device pages") Signed-off-by: Sunny Patel Acked-by: David Hildenbrand (Arm) Reviewed-by: Huang Ying Cc: Alistair Popple Cc: Balbir Singh Cc: Byungchul Park Cc: Gregory Price Cc: Joshua Hahn Cc: Matthew Brost Cc: Rakie Kim Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/migrate_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index ab49d4dcdb60..19cd14b34114 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -840,7 +840,7 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate, } else { if (folio_is_zone_device(folio) && !folio_is_device_coherent(folio)) { - goto abort; + goto free_abort; } entry = folio_mk_pmd(folio, vma->vm_page_prot); if (vma->vm_flags & VM_WRITE) @@ -893,6 +893,8 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate, unlock_abort: spin_unlock(ptl); +free_abort: + pte_free(vma->vm_mm, pgtable); abort: for (i = 0; i < HPAGE_PMD_NR; i++) src[i] &= ~MIGRATE_PFN_MIGRATE; -- cgit v1.2.3 From f0af98ff6b3077278974a460becbd05bbc710e60 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Sat, 25 Apr 2026 16:06:48 +0300 Subject: MAINTAINERS, mailmap: change email for Eugen Hristev Replace old bouncing emails with ehristev@kernel.org Link: https://lore.kernel.org/20260425-eh-mailmap-v1-1-58788d401eef@kernel.org Signed-off-by: Eugen Hristev Signed-off-by: Andrew Morton --- .mailmap | 5 +++-- MAINTAINERS | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.mailmap b/.mailmap index de41cfdae5d0..0b51bce3c05a 100644 --- a/.mailmap +++ b/.mailmap @@ -263,8 +263,9 @@ Enric Balletbo i Serra Enric Balletbo i Serra Erik Kaneda Ethan Carter Edwards Ethan Edwards -Eugen Hristev -Eugen Hristev +Eugen Hristev +Eugen Hristev +Eugen Hristev Evgeniy Polyakov Ezequiel Garcia Faith Ekstrand diff --git a/MAINTAINERS b/MAINTAINERS index 9eb15dacb939..8cf9ba51d981 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10832,7 +10832,7 @@ F: include/linux/generic-radix-tree.h F: lib/generic-radix-tree.c GENERIC RESISTIVE TOUCHSCREEN ADC DRIVER -M: Eugen Hristev +M: Eugen Hristev L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/resistive-adc-touch.c @@ -17343,7 +17343,7 @@ F: Documentation/devicetree/bindings/sound/mikroe,mikroe-proto.txt F: sound/soc/atmel MICROCHIP CSI2DC DRIVER -M: Eugen Hristev +M: Eugen Hristev L: linux-media@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml @@ -17370,7 +17370,7 @@ F: drivers/i2c/busses/i2c-at91-*.c F: drivers/i2c/busses/i2c-at91.h MICROCHIP ISC DRIVER -M: Eugen Hristev +M: Eugen Hristev L: linux-media@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/media/atmel,isc.yaml @@ -17382,7 +17382,7 @@ F: drivers/staging/media/deprecated/atmel/atmel-sama*-isc* F: include/linux/atmel-isc-media.h MICROCHIP ISI DRIVER -M: Eugen Hristev +M: Eugen Hristev L: linux-media@vger.kernel.org S: Supported F: drivers/media/platform/atmel/atmel-isi.c @@ -17572,7 +17572,7 @@ F: Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml F: drivers/gpu/drm/bridge/microchip-lvds.c MICROCHIP SAMA5D2-COMPATIBLE ADC DRIVER -M: Eugen Hristev +M: Eugen Hristev L: linux-iio@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/iio/adc/atmel,sama5d2-adc.yaml @@ -24123,7 +24123,7 @@ F: drivers/mmc/host/sdhci* SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) MICROCHIP DRIVER M: Aubin Constans -R: Eugen Hristev +R: Eugen Hristev L: linux-mmc@vger.kernel.org S: Supported F: drivers/mmc/host/sdhci-of-at91.c -- cgit v1.2.3 From 04aa71da5f35aacdc9ae9cb5150947daa624f641 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Fri, 15 May 2026 17:30:09 +0200 Subject: mm/vmalloc: do not trigger BUG() on BH disabled context __get_vm_area_node() currently triggers a BUG() if in_interrupt() returns true. However, in_interrupt() also reports true when BH are disabled. The bridge code can call rhashtable_lookup_insert_fast() with bottom halves disabled: __vlan_add() -> br_fdb_add_local() spin_lock_bh(&br->hash_lock); <-- Disable BH -> fdb_add_local() -> fdb_create() -> rhashtable_lookup_insert_fast() -> kvmalloc() -> vmalloc() -> __get_vm_area_node() -> BUG_ON(in_interrupt()) spin_unlock_bh(&br->hash_lock) this triggers the BUG() despite the caller not being in NMI or hard IRQ context. Replace the in_interrupt() check with in_nmi() || in_hardirq(). Link: https://lore.kernel.org/20260515153009.2296191-1-urezki@gmail.com Fixes: c6307674ed82 ("mm: kvmalloc: add non-blocking support for vmalloc") Signed-off-by: Uladzislau Rezki (Sony) Cc: Ido Schimmel Reported-by: syzbot+8b12fc6e0fb139765b58@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69ff8c7c.050a0220.1036b8.000b.GAE@google.com/ Reviewed-by: Baoquan He Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c31a8615a832..bb6ae08d18f5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3203,7 +3203,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size, struct vm_struct *area; unsigned long requested_size = size; - BUG_ON(in_interrupt()); + BUG_ON(in_nmi() || in_hardirq()); size = ALIGN(size, 1ul << shift); if (unlikely(!size)) return NULL; -- cgit v1.2.3 From 54cf41c969da6637cce790b7400da1451609db9b Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Fri, 15 May 2026 12:47:01 +0900 Subject: Revert "mm: introduce a new page type for page pool in page type" This reverts commit db359fccf212 ("mm: introduce a new page type for page pool in page type") and a part of 735a309b4bfb9e ("net: add net_iov_init() and use it to initialize ->page_type"). Netpp page_type'ed pages might be used in mapping so as to use @_mapcount. However, since @page_type and @_mapcount are union'ed in struct page, these two can't be used at the same time. Revert the commit introducing page_type for Netpp for now. The patch will be retried once @page_type and @_mapcount get allowed to be used at the same time. The revert also includes removal of @page_type initialization part introduced by commit 735a309b4bfb9e ("net: add net_iov_init() and use it to initialize ->page_type"), which will be restored on the retry. Link: https://lore.kernel.org/20260515034701.17027-1-byungchul@sk.com Fixes: db359fccf212 ("mm: introduce a new page type for page pool in page type") Signed-off-by: Byungchul Park Reported-by: Dragos Tatulea Closes: https://lore.kernel.org/all/982b9bc1-0a0a-4fc5-8e3a-3672db2b29a1@nvidia.com Acked-by: Jakub Kicinski Acked-by: David Hildenbrand (Arm) Acked-by: Harry Yoo (Oracle) Reviewed-by: Lorenzo Stoakes Cc: Alexei Starovoitov Cc: Baolin Wang Cc: Brendan Jackman Cc: David S. Miller Cc: Eric Dumazet Cc: Ilias Apalodimas Cc: Jesper Dangaard Brouer Cc: Johannes Weiner Cc: John Fastabend Cc: Leon Romanovsky Cc: Liam R. Howlett Cc: Mark Bloch Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Paolo Abeni Cc: Pavel Begunkov Cc: Saeed Mahameed Cc: Simon Horman Cc: Stanislav Fomichev Cc: Suren Baghdasaryan Cc: Tariq Toukan Cc: Toke Hoiland-Jorgensen Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- include/linux/mm.h | 27 +++++++++++++++++++++--- include/linux/page-flags.h | 6 ------ include/net/netmem.h | 19 ++--------------- mm/page_alloc.c | 13 ++++-------- net/core/netmem_priv.h | 23 +++++++++++--------- net/core/page_pool.c | 24 ++------------------- 7 files changed, 46 insertions(+), 68 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 190b8b66b3ce..d3bab198c99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -708,7 +708,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); page = xdpi.page.page; - /* No need to check PageNetpp() as we + /* No need to check page_pool_page_is_pp() as we * know this is a page_pool page. */ page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, diff --git a/include/linux/mm.h b/include/linux/mm.h index af23453e9dbd..06bbe9eba636 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5174,9 +5174,10 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); * DMA mapping IDs for page_pool * * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and - * stashes it in the upper bits of page->pp_magic. Non-PP pages can have - * arbitrary kernel pointers stored in the same field as pp_magic (since - * it overlaps with page->lru.next), so we must ensure that we cannot + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot * mistake a valid kernel pointer with any of the values we write into this * field. * @@ -5211,6 +5212,26 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ PP_DMA_INDEX_SHIFT) +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(const struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(const struct page *page) +{ + return false; +} +#endif + #define PAGE_SNAPSHOT_FAITHFUL (1 << 0) #define PAGE_SNAPSHOT_PG_BUDDY (1 << 1) #define PAGE_SNAPSHOT_PG_IDLE (1 << 2) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0e03d816e8b9..7223f6f4e2b4 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -923,7 +923,6 @@ enum pagetype { PGTY_zsmalloc = 0xf6, PGTY_unaccepted = 0xf7, PGTY_large_kmalloc = 0xf8, - PGTY_netpp = 0xf9, PGTY_mapcount_underflow = 0xff }; @@ -1056,11 +1055,6 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) PAGE_TYPE_OPS(LargeKmalloc, large_kmalloc, large_kmalloc) -/* - * Marks page_pool allocated pages. - */ -PAGE_TYPE_OPS(Netpp, netpp, netpp) - /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. diff --git a/include/net/netmem.h b/include/net/netmem.h index 78fe51e5756b..bccacd21b6c3 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -94,20 +94,10 @@ enum net_iov_type { */ struct net_iov { struct netmem_desc desc; - unsigned int page_type; enum net_iov_type type; struct net_iov_area *owner; }; -/* Make sure 'the offset of page_type in struct page == the offset of - * type in struct net_iov'. - */ -#define NET_IOV_ASSERT_OFFSET(pg, iov) \ - static_assert(offsetof(struct page, pg) == \ - offsetof(struct net_iov, iov)) -NET_IOV_ASSERT_OFFSET(page_type, page_type); -#undef NET_IOV_ASSERT_OFFSET - struct net_iov_area { /* Array of net_iovs for this area. */ struct net_iov *niovs; @@ -127,11 +117,7 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov) return niov - net_iov_owner(niov)->niovs; } -/* Initialize a niov: stamp the owning area, the memory provider type, - * and the page_type "no type" sentinel expected by the page-type API - * (see PAGE_TYPE_OPS in ) so that - * page_pool_set_pp_info() can later call __SetPageNetpp() on a niov - * cast to struct page. +/* Initialize a niov: stamp the owning area, the memory provider type. */ static inline void net_iov_init(struct net_iov *niov, struct net_iov_area *owner, @@ -139,7 +125,6 @@ static inline void net_iov_init(struct net_iov *niov, { niov->owner = owner; niov->type = type; - niov->page_type = UINT_MAX; } /* netmem */ @@ -245,7 +230,7 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem) */ #define pp_page_to_nmdesc(p) \ ({ \ - DEBUG_NET_WARN_ON_ONCE(!PageNetpp(p)); \ + DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ __pp_page_to_nmdesc(p); \ }) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23c7298d3be2..d49c254174da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1035,6 +1035,7 @@ static inline bool page_expected_state(struct page *page, #ifdef CONFIG_MEMCG page->memcg_data | #endif + page_pool_page_is_pp(page) | (page->flags.f & check_flags))) return false; @@ -1061,6 +1062,8 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) if (unlikely(page->memcg_data)) bad_reason = "page still charged to cgroup"; #endif + if (unlikely(page_pool_page_is_pp(page))) + bad_reason = "page_pool leak"; return bad_reason; } @@ -1377,17 +1380,9 @@ __always_inline bool __free_pages_prepare(struct page *page, mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); folio->mapping = NULL; } - if (unlikely(page_has_type(page))) { - /* networking expects to clear its page type before releasing */ - if (is_check_pages_enabled()) { - if (unlikely(PageNetpp(page))) { - bad_page(page, "page_pool leak"); - return false; - } - } + if (unlikely(page_has_type(page))) /* Reset the page_type (which overlays _mapcount) */ page->page_type = UINT_MAX; - } if (is_check_pages_enabled()) { if (free_page_is_bad(page)) diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index 3e6fde8f1726..23175cb2bd86 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -8,18 +8,21 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } -static inline bool netmem_is_pp(netmem_ref netmem) +static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) +{ + netmem_to_nmdesc(netmem)->pp_magic |= pp_magic; +} + +static inline void netmem_clear_pp_magic(netmem_ref netmem) { - struct page *page; + WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK); - /* XXX: Now that the offset of page_type is shared between - * struct page and net_iov, just cast the netmem to struct page - * unconditionally by clearing NET_IOV if any, no matter whether - * it comes from struct net_iov or struct page. This should be - * adjusted once the offset is no longer shared. - */ - page = (struct page *)((__force unsigned long)netmem & ~NET_IOV); - return PageNetpp(page); + netmem_to_nmdesc(netmem)->pp_magic = 0; +} + +static inline bool netmem_is_pp(netmem_ref netmem) +{ + return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE; } static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 6e576dec80db..8171d1173221 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -707,18 +707,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict) void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) { - struct page *page; - netmem_set_pp(netmem, pool); - - /* XXX: Now that the offset of page_type is shared between - * struct page and net_iov, just cast the netmem to struct page - * unconditionally by clearing NET_IOV if any, no matter whether - * it comes from struct net_iov or struct page. This should be - * adjusted once the offset is no longer shared. - */ - page = (struct page *)((__force unsigned long)netmem & ~NET_IOV); - __SetPageNetpp(page); + netmem_or_pp_magic(netmem, PP_SIGNATURE); /* Ensuring all pages have been split into one fragment initially: * page_pool_set_pp_info() is only called once for every page when it @@ -733,17 +723,7 @@ void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) void page_pool_clear_pp_info(netmem_ref netmem) { - struct page *page; - - /* XXX: Now that the offset of page_type is shared between - * struct page and net_iov, just cast the netmem to struct page - * unconditionally by clearing NET_IOV if any, no matter whether - * it comes from struct net_iov or struct page. This should be - * adjusted once the offset is no longer shared. - */ - page = (struct page *)((__force unsigned long)netmem & ~NET_IOV); - __ClearPageNetpp(page); - + netmem_clear_pp_magic(netmem); netmem_set_pp(netmem, NULL); } -- cgit v1.2.3 From e3ef9a28f558d1cbf0b42d6dcd16c60da557562b Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 22 May 2026 15:05:07 +0800 Subject: LoongArch: kprobes: Use larch_insn_text_copy() to patch instructions On SMP systems, kprobe handlers would occasionally fail to execute on certain CPU cores. The issue is hard to reproduce and typically occurs randomly under high system load. The root cause is a software-side instruction hazard. According to the LoongArch Reference Manual, while the cache coherency is maintained by hardware, software must explicitly use the "IBAR" instruction to ensure the instruction fetch unit (IFU) observes the effects of recent stores. The current arch_arm_kprobe() and arch_disarm_kprobe() only execute the "IBAR" barrier (via flush_insn_slot -> local_flush_icache_range) on the local CPU. This leaves a vulnerable window where remote CPU cores may continue executing stale instructions from their pipelines or prefetch buffers, as they have not executed an "IBAR" since the code modification. Switch to larch_insn_text_copy() to fix this: 1. Synchronization: It uses stop_machine_cpuslocked() to synchronize all online CPUs, ensuring no CPU is executing the target code area during modification. 2. Visibility: By passing cpu_online_mask to stop_machine_cpuslocked(), the callback text_copy_cb() is executed on all online cores. Each CPU core invokes local_flush_icache_range() to execute "IBAR", clearing instruction hazards system-wide and ensuring the "break" instruction is visible to the fetch units of all cores. 3. Robustness: It properly manages memory write permissions (ROX/RW) for the kernel text segment during patching, ensuring compatibility with CONFIG_STRICT_KERNEL_RWX. Cc: # 6.18+ Fixes: 6d4cc40fb5f5 ("LoongArch: Add kprobes support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kprobes.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c index 8ba391cfabb0..04b5b05715cd 100644 --- a/arch/loongarch/kernel/kprobes.c +++ b/arch/loongarch/kernel/kprobes.c @@ -60,16 +60,18 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); /* Install breakpoint in text */ void arch_arm_kprobe(struct kprobe *p) { - *p->addr = KPROBE_BP_INSN; - flush_insn_slot(p); + u32 insn = KPROBE_BP_INSN; + + larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE); } NOKPROBE_SYMBOL(arch_arm_kprobe); /* Remove breakpoint from text */ void arch_disarm_kprobe(struct kprobe *p) { - *p->addr = p->opcode; - flush_insn_slot(p); + u32 insn = p->opcode; + + larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE); } NOKPROBE_SYMBOL(arch_disarm_kprobe); -- cgit v1.2.3 From 1c856e158fd34ef2c4475a81c1dc386329989938 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 22 May 2026 15:05:07 +0800 Subject: LoongArch: kprobes: Fix handling of fatal unrecoverable recursions KPROBE_HIT_SS and KPROBE_REENTER are two types of fatal recursions that can not be safely recovered in kprobes. KPROBE_HIT_SS means that a kprobe is hit during single-stepping. At this point, the architecture-specific single-step context is already active. Nested single-stepping would corrupt the state, as the kprobe control block (kcb) and hardware registers cannot safely store multiple levels of stepping state. KPROBE_REENTER means that a third-level recursion occurs when a probe is hit while the system is already handling a nested probe (second- level). The kcb only provides a single slot (prev_kprobe) to backup the state. When a third probe is hit, there is no more space to save the state without corrupting the first-level backup. Kprobes work by replacing instructions with breakpoints. In order to execute the original instruction and continue, it must be moved to a temporary "single-step" slot. Since there is no backup space left to set up this slot safely, the CPU would be forced to return to the same original breakpoint address, triggering an endless loop. Currently, the code only prints a warning and returns. This leads to an infinite re-entry loop as the CPU repeatedly hits the same trap and a "stuck" CPU core because preemption was disabled at the start of the handler and never re-enabled in this early return path. Fix the logic by: 1. Merging KPROBE_HIT_SS and KPROBE_REENTER cases, as both represent fatal recursions that cannot be safely recovered. 2. Replacing WARN_ON_ONCE() with BUG() to terminate the system. This aligns LoongArch with other architectures (x86, arm64, riscv) and prevents stack overflow while providing diagnostic information. Fixes: 6d4cc40fb5f5 ("LoongArch: Add kprobes support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c index 04b5b05715cd..1985ed30dd16 100644 --- a/arch/loongarch/kernel/kprobes.c +++ b/arch/loongarch/kernel/kprobes.c @@ -186,16 +186,16 @@ static bool reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { - case KPROBE_HIT_SS: case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: kprobes_inc_nmissed_count(p); setup_singlestep(p, regs, kcb, 1); break; + case KPROBE_HIT_SS: case KPROBE_REENTER: pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); - WARN_ON_ONCE(1); + BUG(); break; default: WARN_ON(1); -- cgit v1.2.3 From 4a09f4a23a3003d31f8545dd0770f2b3b0f54d8b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Fri, 22 May 2026 15:05:12 +0800 Subject: LoongArch: KVM: Move some variable declarations to paravirt.h Some variables relative with paravirt feature are declared in the header file asm/qspinlock.h, however this file can be included only when option CONFIG_SMP is on. There is compiling warnings if CONFIG_SMP is off since variables are not declared. Move these variable declarations to header file asm/paravirt.h to avoid compiling warnings. Fixes: c43dce6f13fb ("LoongArch: KVM: Make vcpu_is_preempted() as a macro rather than function") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202605061313.O8Hswm2b-lkp@intel.com/ Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/paravirt.h | 6 ++++++ arch/loongarch/include/asm/qspinlock.h | 5 +---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index 0111f0ad5f73..acae1c5e5f88 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -4,6 +4,12 @@ #ifdef CONFIG_PARAVIRT +#include + +DECLARE_STATIC_KEY_FALSE(virt_preempt_key); +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); +DECLARE_PER_CPU(struct kvm_steal_time, steal_time); + int __init pv_ipi_init(void); int __init pv_time_init(void); int __init pv_spinlock_init(void); diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h index 0ee15b3b3937..fbfc6be82f26 100644 --- a/arch/loongarch/include/asm/qspinlock.h +++ b/arch/loongarch/include/asm/qspinlock.h @@ -3,12 +3,9 @@ #define _ASM_LOONGARCH_QSPINLOCK_H #include -#include +#include #ifdef CONFIG_PARAVIRT -DECLARE_STATIC_KEY_FALSE(virt_preempt_key); -DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); -DECLARE_PER_CPU(struct kvm_steal_time, steal_time); #define virt_spin_lock virt_spin_lock -- cgit v1.2.3 From 2982e599fff6faa21c8df147d96fc7af6c1a2f24 Mon Sep 17 00:00:00 2001 From: e521588 Date: Wed, 20 May 2026 09:27:17 +0200 Subject: esp: fix page frag reference leak on skb_to_sgvec failure In esp_output_tail(), when esp->inplace is false, the old skb page frags are replaced with a new page from the xfrm page_frag cache. The source scatterlist (sg) is built from the old frags before the replacement, and esp_ssg_unref() is responsible for releasing the old page references after the crypto operation completes. However, if the second skb_to_sgvec() call (which builds the destination scatterlist from the new page) fails, the code jumps to error_free which only calls kfree(tmp). The old page frag references captured in the source scatterlist are never released: 1. sg[] is built from old frags via skb_to_sgvec() (no extra get_page) 2. nr_frags is set to 1 and frag[0] is replaced with the new page 3. Second skb_to_sgvec() fails -> goto error_free 4. kfree(tmp) frees the sg[] memory but old frags are not unref'd 5. kfree_skb() only releases frag[0] (the new page), not the old ones Fix this by adding a bool parameter to esp_ssg_unref() that, when true, unconditionally unrefs the source scatterlist frags without checking req->src and req->dst, since those fields are not yet initialized by aead_request_set_crypt() at the point of the error. Existing callers pass false to preserve the original behavior. The same issue exists in both esp4 and esp6 as the code is identical. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Signed-off-by: Alessandro Schino <7991aleschino@gmail.com> Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 12 +++++++----- net/ipv6/esp6.c | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6a5febbdbee4..8314d7bddcb7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -96,7 +96,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) +static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref) { struct crypto_aead *aead = x->data; int extralen = 0; @@ -113,7 +113,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ - if (req->src != req->dst) + if (already_unref || req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); @@ -220,7 +220,7 @@ static void esp_output_done(void *data, int err) } tmp = ESP_SKB_CB(skb)->tmp; - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); kfree(tmp); if (xo && (xo->flags & XFRM_DEV_RESUME)) { @@ -569,8 +569,10 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + esp_ssg_unref(x, tmp, skb, true); goto error_free; + } } if ((x->props.flags & XFRM_STATE_ESN)) @@ -602,7 +604,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * } if (sg != dsg) - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9c06c5a1419d..9d0c4957ac62 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -113,7 +113,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) +static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref) { struct crypto_aead *aead = x->data; int extralen = 0; @@ -130,7 +130,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ - if (req->src != req->dst) + if (already_unref || req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); @@ -254,7 +254,7 @@ static void esp_output_done(void *data, int err) } tmp = ESP_SKB_CB(skb)->tmp; - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); kfree(tmp); esp_output_encap_csum(skb); @@ -600,8 +600,10 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + esp_ssg_unref(x, tmp, skb, true); goto error_free; + } } if ((x->props.flags & XFRM_STATE_ESN)) @@ -634,7 +636,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } if (sg != dsg) - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); -- cgit v1.2.3 From dfa0d7b0ff1eb6b2c416b8fdb9b4f2cefba57a40 Mon Sep 17 00:00:00 2001 From: Jingguo Tan Date: Mon, 18 May 2026 17:06:48 +0800 Subject: xfrm: esp: restore combined single-frag length gate The ESP out-of-place fast path appends the trailer in esp_output_head() before esp_output_tail() allocates the destination page frag. The head-side gate currently checks skb->data_len and tailen separately, but the tail code allocates a single destination frag from the combined post-trailer skb->data_len. Reject the page-frag fast path when the combined aligned length exceeds a page. Otherwise skb_page_frag_refill() may fall back to a single page while the destination sg still spans the combined skb->data_len. Restore this combined-length page gate for both IPv4 and IPv6. Fixes: 5bd8baab087d ("esp: limit skb_page_frag_refill use to a single page") Cc: stable@vger.kernel.org Signed-off-by: Lin Ma Signed-off-by: Chenyuan Mi Signed-off-by: Jingguo Tan Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 4 ++-- net/ipv6/esp6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 8314d7bddcb7..5d3a8656687e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -419,8 +419,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } - if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || - ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) + if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) > + PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9d0c4957ac62..b963b8e72604 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -448,8 +448,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info return err; } - if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || - ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) + if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) > + PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { -- cgit v1.2.3 From e194ce048f5a6c549b3a23a8c568c6470f40f772 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 9 Apr 2026 10:11:04 +0000 Subject: usb: musb: omap2430: Fix use-after-free in omap2430_probe() In omap2430_probe(), of_node_put(np) is called prematurely before the last access to np, leading to a use-after-free if the node's reference count drops to zero. Move the of_node_put() calls after the last use of np in both the success and error paths. Fixes: ffbe2feac59b ("usb: musb: omap2430: Fix probe regression for missing resources") Cc: stable Signed-off-by: Wentao Liang Link: https://patch.msgid.link/20260409101104.480623-1-vulab@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 48bb9bfb2204..333ab79f0ca9 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -337,7 +337,6 @@ static int omap2430_probe(struct platform_device *pdev) } else { device_set_of_node_from_dev(&musb->dev, &pdev->dev); } - of_node_put(np); glue->dev = &pdev->dev; glue->musb = musb; @@ -455,6 +454,7 @@ static int omap2430_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register musb device\n"); goto err_disable_rpm; } + of_node_put(np); return 0; @@ -464,6 +464,7 @@ err_put_control_otghs: if (!IS_ERR(glue->control_otghs)) put_device(glue->control_otghs); err_put_musb: + of_node_put(np); platform_device_put(musb); return ret; -- cgit v1.2.3 From 4f88d65def6f3c90121601b4f62a4c967f3063a6 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Mon, 13 Apr 2026 22:21:19 +0800 Subject: usb: gadget: f_hid: fix device reference leak in hidg_alloc() hidg_alloc() initializes hidg->dev with device_initialize() before calling dev_set_name(). If dev_set_name() fails, the function currently jumps to err_unlock and returns without calling put_device(). This leaves the device reference unbalanced and prevents hidg_release() from being called. Calling put_device() here is also safe, since hidg_release() only frees resources owned by hidg. The issue was identified by a static analysis tool I developed and confirmed by manual review. Route the dev_set_name() failure path through err_put_device so the device reference is dropped properly. Fixes: 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") Cc: stable Reviewed-by: Johan Hovold Signed-off-by: Guangshuo Li Reviewed-by: Johan Hovold johan@kernel.org Link: https://patch.msgid.link/20260413142119.2977716-1-lgs201920130244@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index c5a12a6760ea..3c6b43d06a6d 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -1622,7 +1622,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) hidg->dev.devt = MKDEV(major, opts->minor); ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor); if (ret) - goto err_unlock; + goto err_put_device; hidg->bInterfaceSubClass = opts->subclass; hidg->bInterfaceProtocol = opts->protocol; @@ -1659,7 +1659,6 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) err_put_device: put_device(&hidg->dev); -err_unlock: mutex_unlock(&opts->lock); return ERR_PTR(ret); } -- cgit v1.2.3 From c8547c74988e0b5f4cbb1b895e2a57aae084f070 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Mon, 27 Apr 2026 23:36:51 +0800 Subject: usb: gadget: net2280: Fix double free in probe error path usb_initialize_gadget() installs gadget_release() as the release callback for the embedded gadget device. The struct net2280 instance is therefore released through gadget_release() when the gadget device's last reference is dropped. The probe error path calls net2280_remove(), which tears down the partially initialized device and drops the gadget reference with usb_put_gadget(). Calling kfree(dev) afterwards can free the same object again. Drop the explicit kfree() and let the gadget device release callback handle the final free. This issue was found by a static analysis tool I am developing. Fixes: f770fbec4165 ("USB: UDC: net2280: Fix memory leaks") Cc: stable Signed-off-by: Guangshuo Li Reviewed-by: Alan Stern Link: https://patch.msgid.link/20260427153651.337846-1-lgs201920130244@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/net2280.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index d02765bd49ce..7c5f30cfd24d 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -3790,10 +3790,8 @@ static int net2280_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; done: - if (dev) { + if (dev) net2280_remove(pdev); - kfree(dev); - } return retval; } -- cgit v1.2.3 From 68aa70648b625fa684bc0b71bbfd905f4943ca20 Mon Sep 17 00:00:00 2001 From: Kai Aizen Date: Thu, 30 Apr 2026 20:56:43 +0300 Subject: usb: gadget: uvc: hold opts->lock across XU walks in uvc_function_bind uvc_function_bind() walks &opts->extension_units twice without holding opts->lock: - directly, for the iExtension string-descriptor fixup loop; - indirectly, four times via uvc_copy_descriptors() (once per speed), where the helper iterates uvc->desc.extension_units (which aliases &opts->extension_units) to size and emit XU descriptors. The configfs side (uvcg_extension_make / uvcg_extension_drop, in drivers/usb/gadget/function/uvc_configfs.c) takes opts->lock around its list_add_tail / list_del operations. A privileged userspace process that holds the configfs subtree open and writes the gadget UDC name to bind the function while concurrently rmdir()'ing an extensions subdir can race uvcg_extension_drop() against the bind-time list walks and dereference a freed struct uvcg_extension. Hold opts->lock from the start of the XU string-descriptor fixup through the last uvc_copy_descriptors() call, releasing on the descriptor-error path via a new error_unlock label that drops the lock before falling through to the existing error label. This matches the locking discipline of the configfs callbacks and removes the only remaining unsynchronised reader of the XU list during bind. Reachability: only privileged processes that can mount configfs and write to gadget UDC files can trigger the race, so this is a correctness fix rather than a security boundary. Fixes: 0525210c9840 ("usb: gadget: uvc: Allow definition of XUs in configfs") Cc: stable Signed-off-by: Kai Aizen Link: https://patch.msgid.link/20260430175643.67120-1-kai.aizen.dev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uvc.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 8d404d88391c..73dc7e42875f 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -768,6 +768,16 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_hs_streaming_ep.bEndpointAddress = uvc->video.ep->address; uvc_ss_streaming_ep.bEndpointAddress = uvc->video.ep->address; + /* + * Hold opts->lock across both the XU string-descriptor fixup below and + * the descriptor-copy block further down. Without this, configfs + * uvcg_extension_drop() (which takes opts->lock) can race with the + * list_for_each_entry() walks here and inside uvc_copy_descriptors(), + * leading to a UAF on a freed struct uvcg_extension. See + * drivers/usb/gadget/function/uvc_configfs.c::uvcg_extension_drop(). + */ + mutex_lock(&opts->lock); + /* * XUs can have an arbitrary string descriptor describing them. If they * have one pick up the ID. @@ -785,7 +795,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) ARRAY_SIZE(uvc_en_us_strings)); if (IS_ERR(us)) { ret = PTR_ERR(us); - goto error; + goto error_unlock; } uvc_iad.iFunction = opts->iad_index ? cdev->usb_strings[opts->iad_index].id : @@ -799,14 +809,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) /* Allocate interface IDs. */ if ((ret = usb_interface_id(c, f)) < 0) - goto error; + goto error_unlock; uvc_iad.bFirstInterface = ret; uvc_control_intf.bInterfaceNumber = ret; uvc->control_intf = ret; opts->control_interface = ret; if ((ret = usb_interface_id(c, f)) < 0) - goto error; + goto error_unlock; uvc_streaming_intf_alt0.bInterfaceNumber = ret; uvc_streaming_intf_alt1.bInterfaceNumber = ret; uvc->streaming_intf = ret; @@ -817,30 +827,32 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) if (IS_ERR(f->fs_descriptors)) { ret = PTR_ERR(f->fs_descriptors); f->fs_descriptors = NULL; - goto error; + goto error_unlock; } f->hs_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_HIGH); if (IS_ERR(f->hs_descriptors)) { ret = PTR_ERR(f->hs_descriptors); f->hs_descriptors = NULL; - goto error; + goto error_unlock; } f->ss_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_SUPER); if (IS_ERR(f->ss_descriptors)) { ret = PTR_ERR(f->ss_descriptors); f->ss_descriptors = NULL; - goto error; + goto error_unlock; } f->ssp_descriptors = uvc_copy_descriptors(uvc, USB_SPEED_SUPER_PLUS); if (IS_ERR(f->ssp_descriptors)) { ret = PTR_ERR(f->ssp_descriptors); f->ssp_descriptors = NULL; - goto error; + goto error_unlock; } + mutex_unlock(&opts->lock); + /* Preallocate control endpoint request. */ uvc->control_req = usb_ep_alloc_request(cdev->gadget->ep0, GFP_KERNEL); uvc->control_buf = kmalloc(UVC_MAX_REQUEST_SIZE, GFP_KERNEL); @@ -872,6 +884,8 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) return 0; +error_unlock: + mutex_unlock(&opts->lock); v4l2_error: v4l2_device_unregister(&uvc->v4l2_dev); error: -- cgit v1.2.3 From 5a4c828b8b29b47534814ade26d9aee09d5101fc Mon Sep 17 00:00:00 2001 From: Wei-Cheng Chen Date: Tue, 5 May 2026 19:26:30 +0800 Subject: xhci: tegra: Fix ghost USB device on dual-role port unplug When a USB device is unplugged from the dual-role port, the device-mode path in tegra_xhci_id_work() explicitly clears both SS and HS port power via direct hub_control ClearPortFeature(POWER) calls. This preempts the xHCI controller's normal disconnect processing -- PORT_CSC is never generated, the USB core never sees the disconnect, and the device remains in its internal tree as a ghost visible in lsusb. Add an otg_set_port_power flag to control whether the dual-role switch path performs explicit port power management. SoCs that need it (Tegra124 / Tegra210 / Tegra186) set the flag; later SoCs (Tegra194 and beyond) rely on the PHY mode change to handle disconnect naturally and skip all port power calls. Within the port power path, otg_reset_sspi additionally gates the SSPI reset sequence on host-mode entry for SoCs that require it. Flags set per SoC: Tegra124, Tegra186 -> otg_set_port_power Tegra210 -> otg_set_port_power, otg_reset_sspi Tegra194 and later -> (none) Fixes: f836e7843036 ("usb: xhci-tegra: Add OTG support") Cc: stable Signed-off-by: Wei-Cheng Chen Link: https://patch.msgid.link/20260505112630.217704-1-weichengc@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 73 ++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index d2214d309e96..d5637b376367 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -247,6 +247,7 @@ struct tegra_xusb_soc { bool has_ipfs; bool lpm_support; bool otg_reset_sspi; + bool otg_set_port_power; bool has_bar2; }; @@ -1352,12 +1353,13 @@ static void tegra_xhci_id_work(struct work_struct *work) struct tegra_xusb_mbox_msg msg; struct phy *phy = tegra_xusb_get_phy(tegra, "usb2", tegra->otg_usb2_port); + bool host_mode = tegra->host_mode; u32 status; int ret; - dev_dbg(tegra->dev, "host mode %s\n", str_on_off(tegra->host_mode)); + dev_dbg(tegra->dev, "host mode %s\n", str_on_off(host_mode)); - if (tegra->host_mode) + if (host_mode) phy_set_mode_ext(phy, PHY_MODE_USB_OTG, USB_ROLE_HOST); else phy_set_mode_ext(phy, PHY_MODE_USB_OTG, USB_ROLE_NONE); @@ -1366,41 +1368,43 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra->otg_usb2_port); pm_runtime_get_sync(tegra->dev); - if (tegra->host_mode) { - /* switch to host mode */ - if (tegra->otg_usb3_port >= 0) { - if (tegra->soc->otg_reset_sspi) { - /* set PP=0 */ - tegra_xhci_hc_driver.hub_control( - xhci->shared_hcd, GetPortStatus, - 0, tegra->otg_usb3_port+1, - (char *) &status, sizeof(status)); - if (status & USB_SS_PORT_STAT_POWER) - tegra_xhci_set_port_power(tegra, false, - false); - - /* reset OTG port SSPI */ - msg.cmd = MBOX_CMD_RESET_SSPI; - msg.data = tegra->otg_usb3_port+1; - - ret = tegra_xusb_mbox_send(tegra, &msg); - if (ret < 0) { - dev_info(tegra->dev, - "failed to RESET_SSPI %d\n", - ret); + if (tegra->soc->otg_set_port_power) { + if (host_mode) { + /* switch to host mode */ + if (tegra->otg_usb3_port >= 0) { + if (tegra->soc->otg_reset_sspi) { + /* set PP=0 */ + tegra_xhci_hc_driver.hub_control( + xhci->shared_hcd, GetPortStatus, + 0, tegra->otg_usb3_port+1, + (char *) &status, sizeof(status)); + if (status & USB_SS_PORT_STAT_POWER) + tegra_xhci_set_port_power(tegra, false, + false); + + /* reset OTG port SSPI */ + msg.cmd = MBOX_CMD_RESET_SSPI; + msg.data = tegra->otg_usb3_port+1; + + ret = tegra_xusb_mbox_send(tegra, &msg); + if (ret < 0) { + dev_info(tegra->dev, + "failed to RESET_SSPI %d\n", + ret); + } } - } - tegra_xhci_set_port_power(tegra, false, true); - } + tegra_xhci_set_port_power(tegra, false, true); + } - tegra_xhci_set_port_power(tegra, true, true); + tegra_xhci_set_port_power(tegra, true, true); - } else { - if (tegra->otg_usb3_port >= 0) - tegra_xhci_set_port_power(tegra, false, false); + } else { + if (tegra->otg_usb3_port >= 0) + tegra_xhci_set_port_power(tegra, false, false); - tegra_xhci_set_port_power(tegra, true, false); + tegra_xhci_set_port_power(tegra, true, false); + } } pm_runtime_put_autosuspend(tegra->dev); } @@ -2553,6 +2557,7 @@ static const struct tegra_xusb_soc tegra124_soc = { .scale_ss_clock = true, .has_ipfs = true, .otg_reset_sspi = false, + .otg_set_port_power = true, .ops = &tegra124_ops, .mbox = { .cmd = 0xe4, @@ -2593,6 +2598,7 @@ static const struct tegra_xusb_soc tegra210_soc = { .scale_ss_clock = false, .has_ipfs = true, .otg_reset_sspi = true, + .otg_set_port_power = true, .ops = &tegra124_ops, .mbox = { .cmd = 0xe4, @@ -2640,6 +2646,7 @@ static const struct tegra_xusb_soc tegra186_soc = { .scale_ss_clock = false, .has_ipfs = false, .otg_reset_sspi = false, + .otg_set_port_power = true, .ops = &tegra124_ops, .mbox = { .cmd = 0xe4, @@ -2673,6 +2680,7 @@ static const struct tegra_xusb_soc tegra194_soc = { .scale_ss_clock = false, .has_ipfs = false, .otg_reset_sspi = false, + .otg_set_port_power = false, .ops = &tegra124_ops, .mbox = { .cmd = 0x68, @@ -2708,6 +2716,7 @@ static const struct tegra_xusb_soc tegra234_soc = { .scale_ss_clock = false, .has_ipfs = false, .otg_reset_sspi = false, + .otg_set_port_power = false, .ops = &tegra234_ops, .mbox = { .cmd = XUSB_BAR2_ARU_MBOX_CMD, -- cgit v1.2.3 From 52f2ad3f7e5eb3b5908e1d685d4342519dc9cfcd Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Tue, 5 May 2026 15:56:03 -0300 Subject: usb: usbtmc: check URB actual_length for interrupt-IN notifications USBTMC devices can use an optional interrupt endpoint for notification messages. These typically contain two-byte headers indicating the payload format, but the driver does not check if these headers are present before accessing the data buffers. In cases where the URB actual_length is not enough to fit these headers, the driver will either cause an out-of-bounds read, or consume stale leftover data from a previous notification. Fix by checking if actual_data contains enough bytes for the headers, otherwise resubmit URB to the interrupt endpoint. Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Reported-by: syzbot+abbfd103085885cf16a2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=abbfd103085885cf16a2 Cc: stable Suggested-by: Michal Pecio Signed-off-by: Heitor Alves de Siqueira Link: https://patch.msgid.link/20260505-usbtmc-iin-size-v3-1-a36113f62db7@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index bd9347804dec..e15efd0c5ca7 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -2306,6 +2306,14 @@ static void usbtmc_interrupt(struct urb *urb) switch (status) { case 0: /* SUCCESS */ + /* ensure at least two bytes of headers were transferred */ + if (urb->actual_length < 2) { + dev_warn(dev, + "actual length %d not sufficient for interrupt headers\n", + urb->actual_length); + goto exit; + } + /* check for valid STB notification */ if (data->iin_buffer[0] > 0x81) { data->bNotify1 = data->iin_buffer[0]; -- cgit v1.2.3 From 121d2f682ba912b1427cddca7cf84840f41cc620 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Tue, 5 May 2026 15:56:04 -0300 Subject: usb: usbtmc: reject interrupt endpoints with small wMaxPacketSize The USB488 subclass specification requires interrupt wMaxPacketSize to be 0x02, unless the device sends vendor-specific notifications. Endpoints that advertise less than 2 bytes for wMaxPacketSize are unlikely to work with the current driver, as URBs will not have enough space for interrupt headers. Considering that any notification URBs will be ignored by the driver, reject these endpoints early during probe. Fixes: 041370cce889 ("USB: usbtmc: refactor endpoint retrieval") Cc: stable Suggested-by: Michal Pecio Signed-off-by: Heitor Alves de Siqueira Link: https://patch.msgid.link/20260505-usbtmc-iin-size-v3-2-a36113f62db7@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index e15efd0c5ca7..af9ae55dae14 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -2440,6 +2440,12 @@ static int usbtmc_probe(struct usb_interface *intf, data->iin_ep = int_in->bEndpointAddress; data->iin_wMaxPacketSize = usb_endpoint_maxp(int_in); data->iin_interval = int_in->bInterval; + /* wMaxPacketSize should be 0x02 or more as per USB488 Table 22 */ + if (iface_desc->desc.bInterfaceProtocol == 1 && + data->iin_wMaxPacketSize < 2) { + retcode = -EINVAL; + goto err_put; + } dev_dbg(&intf->dev, "Found Int in endpoint at %u\n", data->iin_ep); } -- cgit v1.2.3 From 9ddb9c0deca48d2c2a22ebf4d2f35c925a520328 Mon Sep 17 00:00:00 2001 From: "Stephen J. Fuhry" Date: Wed, 13 May 2026 13:14:19 -0400 Subject: USB: quirks: add NO_LPM for Lenovo ThinkPad USB-C Dock Gen2 hub controllers The Lenovo ThinkPad USB-C Dock Gen2 (17ef:a391, 17ef:a392) hub controllers exhibit link instability when USB Link Power Management is enabled, similar to the dock's Ethernet adapter (17ef:a387) which already carries USB_QUIRK_NO_LPM. When the dock reconnects after a transient disconnect, the hub controllers enter LPM states between re-enumeration retries, causing repeated disconnect/reconnect cycles lasting up to two minutes. Disabling LPM for these devices restores stable enumeration. Signed-off-by: Stephen J. Fuhry Cc: stable Link: https://patch.msgid.link/20260513171419.44849-1-fuhrysteve@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 0ffdaefba508..87810eff974e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -513,6 +513,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Lenovo ThinkPad USB-C Dock Gen2 Ethernet (RTL8153 GigE) */ { USB_DEVICE(0x17ef, 0xa387), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo ThinkPad USB-C Dock Gen2 USB 3.1 and USB 2.0 hub controllers */ + { USB_DEVICE(0x17ef, 0xa391), .driver_info = USB_QUIRK_NO_LPM }, + { USB_DEVICE(0x17ef, 0xa392), .driver_info = USB_QUIRK_NO_LPM }, + /* BUILDWIN Photo Frame */ { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, -- cgit v1.2.3 From b53ebb811e00be50a779ce4e7aee604178b4a825 Mon Sep 17 00:00:00 2001 From: Sam Burkels Date: Fri, 1 May 2026 15:23:46 +0200 Subject: usb: storage: Add quirks for PNY Elite Portable SSD The PNY Elite Portable SSD (USB ID 154b:f009) is a sibling of the already-quirked PNY Pro Elite SSDs (154b:f00b and 154b:f00d). Like its siblings, it uses a Phison-based USB-SATA bridge that exhibits firmware bugs when bound to the uas driver. Without quirks, the device fails to complete READ CAPACITY commands when accessed over UAS on a SuperSpeed (USB 3) port. The device enumerates and reports as a SCSI direct-access device, but reports zero logical blocks and never finishes spin-up: usb 2-3: new SuperSpeed USB device number 8 using xhci_hcd usb 2-3: New USB device found, idVendor=154b, idProduct=f009 usb 2-3: Product: PNY ELITE PSSD usb 2-3: Manufacturer: PNY scsi host0: uas scsi 0:0:0:0: Direct-Access PNY PNY ELITE PSSD 0 sd 0:0:0:0: [sda] Spinning up disk... [...10+ seconds of polling, no progress...] sd 0:0:0:0: [sda] Read Capacity(16) failed: hostbyte=DID_ERROR sd 0:0:0:0: [sda] Read Capacity(10) failed: hostbyte=DID_ERROR sd 0:0:0:0: [sda] 0 512-byte logical blocks: (0 B/0 B) Tested each individual quirk to find the minimum that fixes this: - US_FL_NO_ATA_1X alone: device hangs on spin-up - US_FL_NO_REPORT_OPCODES alone: works on USB 2.0, hangs on USB 3.0 - US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES: works on both With both quirks the device enumerates correctly while still using the uas driver, and delivers full UAS throughput (~281 MB/s sequential read on a USB 3.0 Gen 1 port). The existing PNY Pro Elite entries (f00b, f00d) only set NO_ATA_1X, but this device additionally chokes on REPORT OPCODES under SuperSpeed. Signed-off-by: Sam Burkels Acked-by: Oliver Neukum Cc: stable Link: https://patch.msgid.link/20260501132346.86572-1-sam@1a38.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 939a98c2d3f7..d6f86d5db3bf 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -132,6 +132,13 @@ UNUSUAL_DEV(0x152d, 0x0583, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES), +/* Reported-by: Sam Burkels */ +UNUSUAL_DEV(0x154b, 0xf009, 0x0000, 0x9999, + "PNY", + "PNY ELITE PSSD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, "PNY", -- cgit v1.2.3 From 0b5bde9e1c4df3bbf93a01a0e00c05085f3449fd Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 8 May 2026 13:25:55 -0500 Subject: dt-bindings: usb: ti,omap4-musb: Drop duplicate 'usb-phy' property constraints The deprecated 'usb-phy' property is documented already in usb.yaml and doesn't need a type definition here. It just needs constraints on how many entries there are. As this is a host controller, reference usb-hcd.yaml which then references usb.yaml. Fixes: 70fcdc82cf4a ("dt-bindings: usb: ti,omap4-musb: convert to DT schema") Signed-off-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260508182556.1759173-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/ti,omap4-musb.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/ti,omap4-musb.yaml b/Documentation/devicetree/bindings/usb/ti,omap4-musb.yaml index a3d15f217658..e1887e490e02 100644 --- a/Documentation/devicetree/bindings/usb/ti,omap4-musb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,omap4-musb.yaml @@ -81,9 +81,7 @@ properties: const: usb2-phy usb-phy: - $ref: /schemas/types.yaml#/definitions/phandle-array - description: Phandle for the PHY device. - deprecated: true + maxItems: 1 ctrl-module: $ref: /schemas/types.yaml#/definitions/phandle @@ -96,6 +94,9 @@ required: - interrupts - interrupt-names +allOf: + - $ref: usb-hcd.yaml# + unevaluatedProperties: false examples: -- cgit v1.2.3 From d96209626a29ea64666be98c30b30ac82e5f1be6 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 17 Apr 2026 12:35:52 -0400 Subject: usbip: vudc: Fix use after free bug in vudc_remove due to race condition This patch follows up Zheng Wang's 2023 report of a use-after-free in vudc_remove(). The original thread stalled on Shuah Khan's request for runtime testing of the unplug/unbind path. This patch supplies that testing and keeps Zheng's original fix shape. In vudc_probe(), v_init_timer() binds udc->tr_timer.timer to v_timer(). usbip_sockfd_store() starts the timer via v_start_timer()/v_kick_timer(). vudc_remove() can then free the containing struct vudc while the timer is still pending or executing. KASAN confirms the race on an unpatched x86_64 QEMU guest with CONFIG_KASAN=y, CONFIG_USBIP_VUDC=y, CONFIG_USB_ZERO=y, and a tight loop that repeatedly writes a socket fd to usbip_sockfd, closes the socket pair, and unbinds/rebinds usbip-vudc.0: BUG: KASAN: slab-use-after-free in __run_timer_base.part.0+0x8ba/0x8e0 Write of size 8 at addr ffff888001b80740 by task trigger_and_unb/239 Allocated by task 239: vudc_probe+0x4d/0xaa0 Freed by task 239: kfree+0x18f/0x520 device_release_driver_internal+0x388/0x540 unbind_store+0xd9/0x100 This lands in the timer core rather than v_timer() itself because the embedded timer_list is being walked after its containing struct vudc has already been freed. The underlying lifetime bug is the same one Zheng reported. With v_stop_timer() called from vudc_remove() and the timer deleted synchronously, the same harness completed 5000 bind/unbind iterations with no KASAN report. Fixes: b6a0ca111867 ("usbip: vudc: Add UDC specific ops") Cc: stable Reported-by: Zheng Wang Closes: https://lore.kernel.org/linux-usb/20230317100954.2626573-1-zyytlz.wz@163.com/ Signed-off-by: Michael Bommarito Acked-by: Shuah Khan Link: https://patch.msgid.link/20260417163552.807548-1-michael.bommarito@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_dev.c | 1 + drivers/usb/usbip/vudc_transfer.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/vudc_dev.c b/drivers/usb/usbip/vudc_dev.c index 90383107b660..c5f079c5a1ea 100644 --- a/drivers/usb/usbip/vudc_dev.c +++ b/drivers/usb/usbip/vudc_dev.c @@ -632,6 +632,7 @@ void vudc_remove(struct platform_device *pdev) { struct vudc *udc = platform_get_drvdata(pdev); + v_stop_timer(udc); usb_del_gadget_udc(&udc->gadget); cleanup_vudc_hw(udc); kfree(udc); diff --git a/drivers/usb/usbip/vudc_transfer.c b/drivers/usb/usbip/vudc_transfer.c index a4f02ea3e3ef..d4ce85c4c6a2 100644 --- a/drivers/usb/usbip/vudc_transfer.c +++ b/drivers/usb/usbip/vudc_transfer.c @@ -490,7 +490,8 @@ void v_stop_timer(struct vudc *udc) { struct transfer_timer *t = &udc->tr_timer; - /* timer itself will take care of stopping */ + /* Delete the timer synchronously before teardown frees udc. */ dev_dbg(&udc->pdev->dev, "timer stop"); + timer_delete_sync(&t->timer); t->state = VUDC_TR_STOPPED; } -- cgit v1.2.3 From f1ecb0e563595d4ba9a3b8e39ed52a3dc2d8e328 Mon Sep 17 00:00:00 2001 From: Hang Cao Date: Wed, 15 Apr 2026 14:42:38 +0800 Subject: dt-bindings: usb: Fix EIC7700 USB reset's issue The EIC7700 USB requires a USB PHY reset operation; otherwise, the USB will not work. The reason why the USB driver that was applied can work properly is that the USB PHY has already been reset in ESWIN's U-Boot. However, the proper functioning of the USB driver should not be dependent on the bootloader. Therefore, it is necessary to incorporate the USB PHY reset signal into the DT bindings. This patch does not introduce any backward incompatibility since the dts is not upstream yet. As array of reset operations are used in the driver, no modifications to the USB controller driver are needed. Fixes: c640a4239db5 ("dt-bindings: usb: Add ESWIN EIC7700 USB controller") Cc: stable Signed-off-by: Hang Cao Acked-by: Conor Dooley Link: https://patch.msgid.link/20260415064238.1784-1-caohang@eswincomputing.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/eswin,eic7700-usb.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/eswin,eic7700-usb.yaml b/Documentation/devicetree/bindings/usb/eswin,eic7700-usb.yaml index 41c3b1b98991..658260619423 100644 --- a/Documentation/devicetree/bindings/usb/eswin,eic7700-usb.yaml +++ b/Documentation/devicetree/bindings/usb/eswin,eic7700-usb.yaml @@ -41,12 +41,13 @@ properties: - const: usb_en resets: - maxItems: 2 + maxItems: 3 reset-names: items: - const: vaux - const: usb_rst + - const: usb_phy eswin,hsp-sp-csr: description: @@ -85,8 +86,8 @@ examples: interrupt-parent = <&plic>; interrupts = <85>; interrupt-names = "peripheral"; - resets = <&reset 84>, <&hspcrg 2>; - reset-names = "vaux", "usb_rst"; + resets = <&reset 84>, <&hspcrg 2>, <&hspcrg 4>; + reset-names = "vaux", "usb_rst", "usb_phy"; dr_mode = "peripheral"; maximum-speed = "high-speed"; phy_type = "utmi"; -- cgit v1.2.3 From 7d9633528dd40e33964d2dc74a5abbf5c4d116ce Mon Sep 17 00:00:00 2001 From: Seungjin Bae Date: Mon, 18 May 2026 19:43:14 -0400 Subject: usb: gadget: dummy_hcd: Reject hub port requests for non-existent ports The `dummy_hub_control()` function handles USB hub class requests to the virtual root hub. The `GetPortStatus` case returns -EPIPE for requests with `wIndex != 1`, since the virtual root hub has only a single port. However, the `ClearPortFeature` and `SetPortFeature` cases lack the same check. Fix this by extending the `wIndex != 1` rejection to both cases, matching the existing behavior of `GetPortStatus`. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Suggested-by: Alan Stern Signed-off-by: Seungjin Bae Reviewed-by: Alan Stern Link: https://patch.msgid.link/20260518234314.1889396-1-eeodqql09@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index f094491b1041..f47903461ed5 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2134,6 +2134,8 @@ static int dummy_hub_control( case ClearHubFeature: break; case ClearPortFeature: + if (wIndex != 1) + goto error; switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { @@ -2248,6 +2250,8 @@ static int dummy_hub_control( retval = -EPIPE; break; case SetPortFeature: + if (wIndex != 1) + goto error; switch (wValue) { case USB_PORT_FEAT_LINK_STATE: if (hcd->speed != HCD_USB3) { -- cgit v1.2.3 From 4e036c10e7f4df5d951c69cc3697bc8e209c6d02 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 12:03:59 -0400 Subject: usb: gadget: f_fs: copy only received bytes on short ep0 read ffs_ep0_read() allocates its control-OUT data buffer with kmalloc() (not kzalloc) at the Length value from the Setup packet, then copies that full len to userspace regardless of how many bytes were actually received: data = kmalloc(len, GFP_KERNEL); ... ret = __ffs_ep0_queue_wait(ffs, data, len); if ((ret > 0) && (copy_to_user(buf, data, len))) ret = -EFAULT; __ffs_ep0_queue_wait() returns req->actual, which on a short control OUT transfer is strictly less than len. The copy_to_user() call still copies len bytes, so on a short OUT the last (len - ret) bytes of the kmalloc() buffer -- uninitialised slab residue -- are delivered to the FunctionFS daemon. Short ep0 OUT completions are specified USB control-transfer behavior and are produced by in-tree UDCs: * dwc2 continues on req->actual < req->length for ep0 DATA OUT (short-not-ok is the only ep0-OUT stall path). * aspeed_udc ends ep0 OUT on rx_len < ep->ep.maxpacket. * renesas_usbf logs "ep0 short packet" and completes the request. * dwc3 stalls on short IN but not on short OUT. A short ep0 OUT is therefore not evidence of a broken UDC; it is a normal condition f_fs has to cope with. The sibling gadgetfs implementation in drivers/usb/gadget/legacy/inode.c already does this correctly via min(len, dev->req->actual) before copy_to_user(). This patch brings f_fs.c to the same safe pattern rather than trimming at a defensive layer. The bug is reached from the FunctionFS device node, which in real deployments is owned by the privileged gadget daemon (adbd, UMS, composite gadget services, etc.); it is not reachable from unprivileged userspace. Linux host stacks normally reject short-wLength control OUTs before they reach the gadget, so reproducing this required a build that bypasses that host-side check. With the bypass in place, a 1-byte payload on a 64-byte Setup produces 63 bytes of non-canary slab residue in the daemon's read buffer. Fix by copying only ret (actually received) bytes to userspace. Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Cc: stable Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419160359.1577270-1-michael.bommarito@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 002c3441bea3..815639506520 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -619,7 +619,7 @@ static ssize_t ffs_ep0_read(struct file *file, char __user *buf, /* unlocks spinlock */ ret = __ffs_ep0_queue_wait(ffs, data, len); - if ((ret > 0) && (copy_to_user(buf, data, len))) + if ((ret > 0) && (copy_to_user(buf, data, ret))) ret = -EFAULT; goto done_mutex; -- cgit v1.2.3 From 2796646f6d892c1eb6818c7ca41fdfa12568e8d1 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 19 Apr 2026 12:12:27 -0400 Subject: usb: gadget: f_fs: serialize DMABUF cancel against request completion ffs_epfile_dmabuf_io_complete() calls usb_ep_free_request() on the completed request but leaves priv->req, the back-pointer that ffs_dmabuf_transfer() set on submission, pointing at the freed memory. A later FUNCTIONFS_DMABUF_DETACH ioctl or ffs_epfile_release() on the close path still sees priv->req non-NULL under ffs->eps_lock: if (priv->ep && priv->req) usb_ep_dequeue(priv->ep, priv->req); so usb_ep_dequeue() is called on a freed usb_request. On dummy_hcd the dequeue path only walks a live queue and pointer-compares, so the freed pointer reads without faulting and KASAN requires an explicit check at the FunctionFS call site to surface the use-after-free. On SG-capable in-tree UDCs the dequeue path dereferences the supplied request immediately: * chipidea's ep_dequeue() does container_of(req, struct ci_hw_req, req) and reads hwreq->req.status before acquiring its own lock. * cdnsp's cdnsp_gadget_ep_dequeue() reads request->status first. The narrower option of clearing priv->req via cmpxchg() in the completion does not close the race: the completion runs without eps_lock, so a cancel path holding eps_lock can still observe priv->req non-NULL, race a concurrent completion that clears and frees, and pass the freed pointer to usb_ep_dequeue(). A slightly longer fix that moves the free into the cleanup work is needed. Same class of lifetime race as the recent usbip-vudc timer fix [1]. Take eps_lock in the sole place that mutates priv->req from the callback direction by moving usb_ep_free_request() out of the completion into ffs_dmabuf_cleanup(), the existing work handler scheduled by ffs_dmabuf_signal_done() on ffs->io_completion_wq. Clear priv->req there under eps_lock before freeing, and only clear if priv->req still names our request (a subsequent ffs_dmabuf_transfer() on the same attachment may have queued a new one). This keeps the existing dummy_hcd sync-dequeue invariant: the completion callback is still invoked by the UDC without eps_lock held (dummy_hcd drops its own lock before calling the callback), and the callback now takes no f_fs lock at all. Serialization against the cancel path happens in cleanup, which runs from the workqueue with no f_fs lock held on entry. The priv ref count protects the containing ffs_dmabuf_priv: ffs_dmabuf_transfer() takes a ref via ffs_dmabuf_get(), cleanup drops it via ffs_dmabuf_put(), so priv stays live for the cleanup even after the cancel path's list_del + ffs_dmabuf_put. The ffs_dmabuf_transfer() error path no longer frees usb_req inline: fence->req and fence->ep are set before usb_ep_queue(), so ffs_dmabuf_cleanup() (scheduled by the error-path ffs_dmabuf_signal_done()) owns the free regardless of whether the queue succeeded. Reproduced under KASAN on both detach and close paths against dummy_hcd with an observability hook (kasan_check_byte(priv->req) immediately before usb_ep_dequeue) at the two FunctionFS cancel sites to surface the stale-pointer access; the hook is not part of this patch. The KASAN allocator / free stacks in the captured splats identify the same request: alloc in dummy_alloc_request, free in dummy_timer, fault reached from ffs_epfile_release (close) and from the FUNCTIONFS_DMABUF_DETACH ioctl (detach). With the patch applied, both paths are silent under the same hook. The bug is reached from the FunctionFS device node, which in real deployments is owned by the privileged gadget daemon (adbd, UMS, composite gadget services, etc.); it is not reachable from unprivileged userspace or from a USB host on the cable. FunctionFS mounts default to GLOBAL_ROOT_UID, but the filesystem supports uid=, gid=, and fmode= delegation to a non-root gadget daemon, so on real deployments the attacker may be a less-privileged service rather than root. Fixes: 7b07a2a7ca02 ("usb: gadget: functionfs: Add DMABUF import interface") Link: https://lore.kernel.org/all/20260417163552.807548-1-michael.bommarito@gmail.com/ [1] Cc: stable Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260419161227.1587668-1-michael.bommarito@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 815639506520..75912ce6ab55 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -150,6 +150,8 @@ struct ffs_dma_fence { struct dma_fence base; struct ffs_dmabuf_priv *priv; struct work_struct work; + struct usb_ep *ep; + struct usb_request *req; }; struct ffs_epfile { @@ -1385,6 +1387,21 @@ static void ffs_dmabuf_cleanup(struct work_struct *work) struct ffs_dmabuf_priv *priv = dma_fence->priv; struct dma_buf_attachment *attach = priv->attach; struct dma_fence *fence = &dma_fence->base; + struct usb_request *req = dma_fence->req; + struct usb_ep *ep = dma_fence->ep; + + /* + * eps_lock pairs with the cancel paths so they cannot pass a freed + * req to usb_ep_dequeue(). Only clear if priv->req still names ours; + * a re-queue on the same attachment may have taken that slot. + */ + spin_lock_irq(&priv->ffs->eps_lock); + if (priv->req == req) + priv->req = NULL; + spin_unlock_irq(&priv->ffs->eps_lock); + + if (ep && req) + usb_ep_free_request(ep, req); ffs_dmabuf_put(attach); dma_fence_put(fence); @@ -1414,8 +1431,8 @@ static void ffs_epfile_dmabuf_io_complete(struct usb_ep *ep, struct usb_request *req) { pr_vdebug("FFS: DMABUF transfer complete, status=%d\n", req->status); + /* req is freed by ffs_dmabuf_cleanup() under eps_lock. */ ffs_dmabuf_signal_done(req->context, req->status); - usb_ep_free_request(ep, req); } static const char *ffs_dmabuf_get_driver_name(struct dma_fence *fence) @@ -1699,6 +1716,10 @@ static int ffs_dmabuf_transfer(struct file *file, usb_req->context = fence; usb_req->complete = ffs_epfile_dmabuf_io_complete; + /* ffs_dmabuf_cleanup() frees usb_req via these two fields. */ + fence->req = usb_req; + fence->ep = ep->ep; + cookie = dma_fence_begin_signalling(); ret = usb_ep_queue(ep->ep, usb_req, GFP_ATOMIC); dma_fence_end_signalling(cookie); @@ -1708,7 +1729,6 @@ static int ffs_dmabuf_transfer(struct file *file, } else { pr_warn("FFS: Failed to queue DMABUF: %d\n", ret); ffs_dmabuf_signal_done(fence, ret); - usb_ep_free_request(ep->ep, usb_req); } spin_unlock_irq(&epfile->ffs->eps_lock); -- cgit v1.2.3 From 8f6aa392653e52a45858cff5c063df550028836b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 27 Apr 2026 15:57:55 +0800 Subject: usb: chipidea: core: convert ci_role_switch to local variable When a system contains multiple USB controllers, the global ci_role_switch variable may be overwritten by subsequent driver initialization code. This can cause issues in the following cases: - The 2nd ci_hdrc_probe() sees ci_role_switch.fwnode as non-NULL even though the "usb-role-switch" property is not present for the controller. - When the ci_hdrc device is unbound and bound again, ci_role_switch fwnode will not be reassigned, and the old value will be used instead. Convert ci_role_switch to a local variable to fix these issues. Fixes: 05559f10ed79 ("usb: chipidea: add role switch class support") Cc: stable Acked-by: Peter Chen Reviewed-by: Frank Li Signed-off-by: Xu Yang Link: https://patch.msgid.link/20260427075755.3611217-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 7cfabb04a4fb..2ab3db3c1015 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -655,12 +655,6 @@ static enum ci_role ci_get_role(struct ci_hdrc *ci) return role; } -static struct usb_role_switch_desc ci_role_switch = { - .set = ci_usb_role_switch_set, - .get = ci_usb_role_switch_get, - .allow_userspace_control = true, -}; - static int ci_get_platdata(struct device *dev, struct ci_hdrc_platform_data *platdata) { @@ -787,9 +781,6 @@ static int ci_get_platdata(struct device *dev, cable->connected = false; } - if (device_property_read_bool(dev, "usb-role-switch")) - ci_role_switch.fwnode = dev->fwnode; - platdata->pctl = devm_pinctrl_get(dev); if (!IS_ERR(platdata->pctl)) { struct pinctrl_state *p; @@ -1033,6 +1024,7 @@ ATTRIBUTE_GROUPS(ci); static int ci_hdrc_probe(struct platform_device *pdev) { + struct usb_role_switch_desc ci_role_switch = {}; struct device *dev = &pdev->dev; struct ci_hdrc *ci; struct resource *res; @@ -1179,7 +1171,11 @@ static int ci_hdrc_probe(struct platform_device *pdev) } } - if (ci_role_switch.fwnode) { + if (device_property_read_bool(dev, "usb-role-switch")) { + ci_role_switch.set = ci_usb_role_switch_set; + ci_role_switch.get = ci_usb_role_switch_get; + ci_role_switch.allow_userspace_control = true; + ci_role_switch.fwnode = dev_fwnode(dev); ci_role_switch.driver_data = ci; ci->role_switch = usb_role_switch_register(dev, &ci_role_switch); -- cgit v1.2.3 From 9ea06a3fbf9f16e0d98c52cb3b99642be15ec281 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 May 2026 08:59:28 +0300 Subject: usb: dwc2: Fix use after free in debug code We're not allowed to dereference "urb" after calling usb_hcd_giveback_urb() so save the urb->status ahead of time. Fixes: 7359d482eb4d ("staging: HCD files for the DWC2 driver") Cc: stable Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/ag1NwBpqT4IEQcdJ@stanley.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 1a763ad4f721..2414291aa908 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -4804,6 +4804,7 @@ static int _dwc2_hcd_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, struct dwc2_hsotg *hsotg = dwc2_hcd_to_hsotg(hcd); int rc; unsigned long flags; + int urb_status; dev_dbg(hsotg->dev, "DWC OTG HCD URB Dequeue\n"); dwc2_dump_urb_info(hcd, urb, "urb_dequeue"); @@ -4828,11 +4829,12 @@ static int _dwc2_hcd_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, /* Higher layer software sets URB status */ spin_unlock(&hsotg->lock); + urb_status = urb->status; usb_hcd_giveback_urb(hcd, urb, status); spin_lock(&hsotg->lock); dev_dbg(hsotg->dev, "Called usb_hcd_giveback_urb()\n"); - dev_dbg(hsotg->dev, " urb->status = %d\n", urb->status); + dev_dbg(hsotg->dev, " urb->status = %d\n", urb_status); out: spin_unlock_irqrestore(&hsotg->lock, flags); -- cgit v1.2.3 From d0f2eb4493d1c3c8fecb5eadb5c1382074873ef9 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 19 May 2026 17:01:10 +0200 Subject: KVM: s390: vsie: Fix memory leak when unshadowing When performing a partial unshadowing, the rmap was being leaked. Add the missing kfree(). Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Signed-off-by: Claudio Imbrenda Reviewed-by: Christoph Schlameuss Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 3c26e35af0ef..fd1927761980 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -1143,8 +1143,10 @@ void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn) } scoped_guard(spinlock, &sg->host_to_rmap_lock) head = radix_tree_delete(&sg->host_to_rmap, gfn); - gmap_for_each_rmap_safe(rmap, rnext, head) + gmap_for_each_rmap_safe(rmap, rnext, head) { gmap_unshadow_level(sg, rmap->r_gfn, rmap->level); + kfree(rmap); + } } } -- cgit v1.2.3 From 4df4b7cdf54620aa848e7d83d253bb944313f7bd Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 19 May 2026 17:01:11 +0200 Subject: KVM: s390: Fix leaking kvm_s390_mmu_cache in case of errors Fix a memory leak that can happen if gmap_ucas_map_one() or kvm_s390_mmu_cache_topup() return error values. Also fix a similar issue in gmap_set_limit(). Signed-off-by: Claudio Imbrenda Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Reported-by: Jiaxin Fan Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gmap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index fd1927761980..10c98c8cc1d8 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -125,7 +125,7 @@ struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit) int gmap_set_limit(struct gmap *gmap, gfn_t limit) { - struct kvm_s390_mmu_cache *mc; + struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL; int rc, type; type = gmap_limit_to_type(limit); @@ -142,7 +142,6 @@ int gmap_set_limit(struct gmap *gmap, gfn_t limit) rc = dat_set_asce_limit(mc, &gmap->asce, type); } while (rc == -ENOMEM); - kvm_s390_free_mmu_cache(mc); return 0; } @@ -822,8 +821,8 @@ int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count) { - struct kvm_s390_mmu_cache *mc; - int rc; + struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL; + int rc = 0; mc = kvm_s390_new_mmu_cache(); if (!mc) -- cgit v1.2.3 From 2d505c290667eba67352c5db303ec92b7de860ad Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 19 May 2026 17:01:12 +0200 Subject: KVM: s390: vsie: Fix unshadowing logic In some cases (i.e. under extreme memory pressure on the host), attempting to shadow memory will result in the same memory being unshadowed, causing a loop. Add a PGSTE bit to distinguish between shadowed memory and shadowed DAT tables, fix the unshadowing logic in _gmap_ptep_xchg() to prevent unnecessary unshadowing and perform better checks. Also fix the unshadowing logic in _gmap_crstep_xchg_atomic() which did not unshadow properly when the large page would become unprotected. Opportunistically add a check in gmap_protect_rmap() to make sure it won't be called with level == TABLE_TYPE_PAGE_TABLE. Signed-off-by: Claudio Imbrenda Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/dat.c | 1 + arch/s390/kvm/dat.h | 3 ++- arch/s390/kvm/gaccess.c | 1 + arch/s390/kvm/gmap.c | 3 ++- arch/s390/kvm/gmap.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 63 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 7b8d70fe406d..4a41c0247ffa 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g /* No need to take locks as the page table is not installed yet. */ pgste_init.prefix_notif = old.s.fc1.prefix_notif; pgste_init.vsie_notif = old.s.fc1.vsie_notif; + pgste_init.vsie_gmem = old.s.fc1.vsie_notif; pgste_init.pcl = uses_skeys && init.h.i; dat_init_pgstes(pt, pgste_init.val); } else { diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 8f8278c44879..873e13ac5a27 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -145,7 +145,8 @@ union pgste { unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 5; + unsigned long vsie_gmem : 1; /* Contains nested guest memory */ + unsigned long : 4; unsigned long : 8; }; struct { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b07accd19618..4f8d5592c9a9 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union } else { pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); pgste.vsie_notif = 1; + pgste.vsie_gmem = 1; } pgste_set_unlock(ptep_h, pgste); if (rc) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 10c98c8cc1d8..8cff0cf5ce24 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -1031,7 +1031,8 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf union pte pte; int flags, rc; - KVM_BUG_ON(!is_shadow(sg), sg->kvm); + if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm)) + return -EINVAL; lockdep_assert_held(&sg->parent->children_lock); flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0); diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 96ee1395a592..6e51ec6066b4 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end) return _gmap_unmap_prefix(gmap, gfn, end, false); } +/** + * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing. + * @oldpte: the previous value for the guest pte. + * @newpte: the new pte being set. + * @pgste: the pgste for the pte entry. + * + * If the pgste.vsie_notif bit is not set, return false: the page is not + * involved in vsie and thus should not trigger an unshadow operation. + * + * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest + * memory. The access rights on g3's memory should be synchronized with g1's + * and g2's. Therefore unshadowing is triggered if the new and old pte + * differ in protection, or if the new pte is invalid. + * + * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables + * for g3. If the entry becomes writable or absent, it becomes impossible to + * guarantee that the shadow mapping will match g2's mapping. In that case, + * trigger an unshadow event. + * + * Return: true if an unshadow event should be triggered, otherwise false. + */ +static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste) +{ + if (!pgste.vsie_notif) + return false; + if (pgste.vsie_gmem) + return (oldpte.h.p != newpte.h.p) || newpte.h.i; + return !newpte.h.p || !newpte.s.pr; +} + static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte, union pgste pgste, gfn_t gfn, bool needs_lock) { @@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un pgste.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + 1); } - if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) { + if (pte_needs_unshadow(*ptep, newpte, pgste)) { pgste.vsie_notif = 0; + pgste.vsie_gmem = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); else @@ -198,6 +229,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true); } +/** + * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing. + * @oldcrste: the previous value for the crste. + * @newcrste: the new value for the crste. + * + * If the old crste did not have the vsie_notif bit set, return false: the + * page is not involved in vsie and thus should not trigger an unshadow + * operation. Conversely, if the bit is set, it can only be g3 memory, since + * dat tables are never mapped using large pages. + * + * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the + * protection bit is changing or the new page is invalid, trigger an + * unshadow event. Also trigger an unshadow event if the new crste does not + * have the vsie_notif bit set. + * + * Return: true if an unshadow event should be triggered, otherwise false. + */ +static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste) +{ + if (!oldcrste.s.fc1.vsie_notif) + return false; + return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif; +} + static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, union crste oldcrste, union crste newcrste, gfn_t gfn, bool needs_lock) @@ -216,8 +271,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio newcrste.s.fc1.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + align); } - if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif && - (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) { + if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) { newcrste.s.fc1.vsie_notif = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); -- cgit v1.2.3 From a488e753de5853bec2e2e4d0c5a73f25d464bd2e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 19 May 2026 17:01:13 +0200 Subject: KVM: s390: vsie: Fix redundant rmap entries The address passed to the gmap rmap was not being masked. As a consequence several different (but functionally equivalent) rmap entries were being created for each shadowed table. Fix this by properly masking the address depending on the table level. Signed-off-by: Claudio Imbrenda Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 8cff0cf5ce24..957126ab991c 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -1025,6 +1025,7 @@ int gmap_insert_rmap(struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, int level) int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, kvm_pfn_t pfn, int level, bool wr) { + unsigned long bitmask; union crste *crstep; union pgste pgste; union pte *ptep; @@ -1041,8 +1042,9 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf if (rc) return rc; if (level <= TABLE_TYPE_REGION1) { + bitmask = -1UL << (8 + 11 * level); scoped_guard(spinlock, &sg->host_to_rmap_lock) - rc = gmap_insert_rmap(sg, p_gfn, r_gfn, level); + rc = gmap_insert_rmap(sg, p_gfn, r_gfn & bitmask, level); } if (rc) return rc; -- cgit v1.2.3 From 9029496abfae3c208336855ae6f3e1f5f881ef76 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 19 May 2026 17:01:14 +0200 Subject: KVM: s390: Properly reset zero bit in PGSTE In case of memory pressure, it's possible that a guest page gets freed and then almost immediately reused by the guest. If CMMA is enabled, _essa_clear_cbrl() will discard all pages that are either unused or zero. If a discarded page is reused before _essa_clear_cbrl() is called, and the pgste.zero bit is not cleared, the page will be discarded despite not being unused. When calling _gmap_ptep_xchg(), always clear the pgste.zero bit. This prevents the page from being accidentally discarded when not unused. Signed-off-by: Claudio Imbrenda Fixes: a2c17f9270cc ("KVM: s390: New gmap code") Reviewed-by: Steffen Eiden Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 6e51ec6066b4..742e42a31744 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -220,6 +220,7 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un } if (!ptep->s.d && newpte.s.d && !newpte.s.s) SetPageDirty(pfn_to_page(newpte.h.pfra)); + pgste.zero = 0; return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap)); } -- cgit v1.2.3 From 5eb070769ea5e18405535609d1d3f6886f3755bd Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Fri, 22 May 2026 17:13:58 +0800 Subject: USB: cdc-acm: Fix bit overlap and move quirk definitions to header The VENDOR_CLASS_DATA_IFACE and ALWAYS_POLL_CTRL quirk flags added in commit f58752ebcb35 ("USB: cdc-acm: Add quirks for Yoga Book 9 14IAH10 INGENIC touchscreen") were placed inside the acm_ctrl_msg() function rather than in the header with the other quirk flags. Then, their values (BIT(9) and BIT(10)) collided with NO_UNION_12 which is already BIT(9). Move the definitions to drivers/usb/class/cdc-acm.h where they belong and shift them to BIT(10) and BIT(11) to avoid the overlap. Fixes: f58752ebcb35 ("USB: cdc-acm: Add quirks for Yoga Book 9 14IAH10 INGENIC touchscreen") Cc: stable Signed-off-by: Wentao Guan Link: https://patch.msgid.link/20260522091357.1301196-1-guanwentao@uniontech.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 -- drivers/usb/class/cdc-acm.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 54059e4fc6ed..ddf0b5963859 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -114,8 +114,6 @@ static int acm_ctrl_msg(struct acm *acm, int request, int value, int retval; retval = usb_autopm_get_interface(acm->control); -#define VENDOR_CLASS_DATA_IFACE BIT(9) /* data interface uses vendor-specific class */ -#define ALWAYS_POLL_CTRL BIT(10) /* keep ctrl URB active even without an open TTY */ if (retval) return retval; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 25fd5329a878..01f448a783c0 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -115,3 +115,5 @@ struct acm { #define DISABLE_ECHO BIT(7) #define MISSING_CAP_BRK BIT(8) #define NO_UNION_12 BIT(9) +#define VENDOR_CLASS_DATA_IFACE BIT(10) /* data interface uses vendor-specific class */ +#define ALWAYS_POLL_CTRL BIT(11) /* keep ctrl URB active even without an open TTY */ -- cgit v1.2.3 From ea66be25f0e934f49d24cd0c5845d13cdba3520b Mon Sep 17 00:00:00 2001 From: Myeonghun Pak Date: Tue, 12 May 2026 15:56:57 +0900 Subject: serial: altera_jtaguart: handle uart_add_one_port() failures altera_jtaguart_probe() maps the register window before registering the UART port, but it ignores failures from uart_add_one_port(). If port registration fails, probe still returns success and the mapping remains live until a later remove path that is not part of probe failure cleanup. Return the uart_add_one_port() error and unmap the register window on that failure path. This issue was identified during our ongoing static-analysis research while reviewing kernel code. Fixes: 5bcd601049c6 ("serial: Add driver for the Altera JTAG UART") Cc: stable Co-developed-by: Ijae Kim Signed-off-by: Ijae Kim Signed-off-by: Myeonghun Pak Link: https://patch.msgid.link/20260512065837.79528-1-mhun512@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/altera_jtaguart.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c index d47a62d1c9f7..20f079fe11d8 100644 --- a/drivers/tty/serial/altera_jtaguart.c +++ b/drivers/tty/serial/altera_jtaguart.c @@ -379,6 +379,7 @@ static int altera_jtaguart_probe(struct platform_device *pdev) struct resource *res_mem; int i = pdev->id; int irq; + int ret; /* -1 emphasizes that the platform must have one port, no .N suffix */ if (i == -1) @@ -418,7 +419,11 @@ static int altera_jtaguart_probe(struct platform_device *pdev) port->flags = UPF_BOOT_AUTOCONF; port->dev = &pdev->dev; - uart_add_one_port(&altera_jtaguart_driver, port); + ret = uart_add_one_port(&altera_jtaguart_driver, port); + if (ret) { + iounmap(port->membase); + return ret; + } return 0; } -- cgit v1.2.3 From a3bb136bff5e6a5e48cdd813246c9c4686feaaa9 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 15 May 2026 12:41:21 +0000 Subject: tty: serial: samsung: Remove redundant port lock acquisition in rx helpers Sashiko identified a deadlock when the console flow is engaged [1]. When console flow control is enabled (UPF_CONS_FLOW), s3c24xx_serial_stop_tx() calls s3c24xx_serial_rx_enable() and s3c24xx_serial_start_tx() calls s3c24xx_serial_rx_disable(). The serial core framework invokes the .stop_tx() and .start_tx() callbacks with the port->lock spinlock already held. Furthermore, all internal driver paths that invoke stop_tx (such as the DMA TX completion handler s3c24xx_serial_tx_dma_complete() or the PIO TX IRQ handler s3c24xx_serial_tx_irq()) also acquire port->lock prior to calling it. (Note that s3c24xx_serial_start_tx() is only invoked by the serial core). However, s3c24xx_serial_rx_enable() and s3c24xx_serial_rx_disable() unconditionally attempt to acquire port->lock again using uart_port_lock_irqsave(). Since spinlocks are not recursive, this causes a deadlock on the same CPU when console flow control is engaged. Remove the redundant lock acquisition from both rx helper functions. Cc: stable Fixes: b497549a035c ("[ARM] S3C24XX: Split serial driver into core and per-cpu drivers") Reported-by: John Ogness Closes: https://sashiko.dev/#/patchset/20260506121606.5805-1-john.ogness%40linutronix.de [1] Signed-off-by: Tudor Ambarus Link: https://patch.msgid.link/20260515-samsung-tty-flow-control-deadlock-v1-1-93255edbc9bc@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index e27806bf2cf3..17cd5bb100b1 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -245,12 +245,9 @@ static bool s3c24xx_serial_txempty_nofifo(const struct uart_port *port) static void s3c24xx_serial_rx_enable(struct uart_port *port) { struct s3c24xx_uart_port *ourport = to_ourport(port); - unsigned long flags; int count = 10000; u32 ucon, ufcon; - uart_port_lock_irqsave(port, &flags); - while (--count && !s3c24xx_serial_txempty_nofifo(port)) udelay(100); @@ -263,23 +260,18 @@ static void s3c24xx_serial_rx_enable(struct uart_port *port) wr_regl(port, S3C2410_UCON, ucon); ourport->rx_enabled = 1; - uart_port_unlock_irqrestore(port, flags); } static void s3c24xx_serial_rx_disable(struct uart_port *port) { struct s3c24xx_uart_port *ourport = to_ourport(port); - unsigned long flags; u32 ucon; - uart_port_lock_irqsave(port, &flags); - ucon = rd_regl(port, S3C2410_UCON); ucon &= ~S3C2410_UCON_RXIRQMODE; wr_regl(port, S3C2410_UCON, ucon); ourport->rx_enabled = 0; - uart_port_unlock_irqrestore(port, flags); } static void s3c24xx_serial_stop_tx(struct uart_port *port) -- cgit v1.2.3 From c3cce2e67bb22a223f5b8ef05db0fcde70994068 Mon Sep 17 00:00:00 2001 From: Jacques Nilo Date: Wed, 13 May 2026 15:30:23 +0200 Subject: serial: core: introduce guard(uart_port_lock_check_sysrq_irqsave) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uart_handle_break() and uart_prepare_sysrq_char() (in include/linux/serial_core.h) capture a SysRq character into port->sysrq_ch while the port lock is held and rely on the unlock helper -- uart_unlock_and_check_sysrq_irqrestore() -- to dispatch the captured character to handle_sysrq() on scope exit. The existing guard(uart_port_lock_irqsave) cannot be used by IRQ handlers that process RX, because its destructor calls plain uart_port_unlock_irqrestore() and silently drops port->sysrq_ch. Add a dedicated guard(uart_port_lock_check_sysrq_irqsave) variant whose destructor is the sysrq-aware unlock helper. The lock side is identical to uart_port_lock_irqsave -- only the unlock-time behaviour differs. Callers that may capture SysRq characters must use guard(uart_port_lock_check_sysrq_irqsave); the existing guard(uart_port_lock_irqsave) keeps its current plain-unlock semantics for the many callers that do not process RX. The new macro is placed after the CONFIG_MAGIC_SYSRQ_SERIAL block so both definitions of uart_unlock_and_check_sysrq_irqrestore() (sysrq enabled and disabled) are visible at expansion time. When CONFIG_MAGIC_SYSRQ_SERIAL=n the destructor degenerates to plain uart_port_unlock_irqrestore(), so there is no overhead. No functional change on its own; users are converted in the following patches. Cc: stable@vger.kernel.org Signed-off-by: Jacques Nilo Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/3849af4bc55d5d2a424fa850844e94d641b2f8a6.1778675349.git.jnilo@free.fr Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 666430b47899..110ad4e2aef9 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -1274,6 +1274,18 @@ static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ +/* + * Variant of guard(uart_port_lock_irqsave) for IRQ handlers that may capture + * a SysRq character via uart_prepare_sysrq_char(). The destructor uses the + * sysrq-aware unlock helper so that a captured port->sysrq_ch is dispatched + * to handle_sysrq() on scope exit. The plain guard variant silently drops + * sysrq_ch and must not be used by callers that process RX. + */ +DEFINE_LOCK_GUARD_1(uart_port_lock_check_sysrq_irqsave, struct uart_port, + uart_port_lock_irqsave(_T->lock, &_T->flags), + uart_unlock_and_check_sysrq_irqrestore(_T->lock, _T->flags), + unsigned long flags); + /* * We do the SysRQ and SAK checking like this... */ -- cgit v1.2.3 From 71f42b2149a1307a97165b409493665579462ea0 Mon Sep 17 00:00:00 2001 From: Jacques Nilo Date: Wed, 13 May 2026 15:30:24 +0200 Subject: serial: 8250: dispatch SysRq character in serial8250_handle_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit serial8250_handle_irq() captures a SysRq character into port->sysrq_ch inside serial8250_handle_irq_locked() via uart_prepare_sysrq_char() (reached from serial8250_read_char()). Dispatch of that captured character to handle_sysrq() is expected to happen at port-unlock time, through uart_unlock_and_check_sysrq[_irqrestore](). After commit 8324a54f604d ("serial: 8250: Add serial8250_handle_irq_locked()") the function was reduced to a wrapper that takes the port lock via guard(uart_port_lock_irqsave) whose destructor is plain uart_port_unlock_irqrestore(). The sysrq-aware unlock helper is no longer called, so port->sysrq_ch is captured but never dispatched: BREAK + SysRq key is consumed silently. This was the very condition Johan Hovold's 853a9ae29e978 ("serial: 8250: fix handle_irq locking", 2021) introduced uart_unlock_and_check_sysrq_irqrestore() to address. Switch to the new guard(uart_port_lock_check_sysrq_irqsave), whose destructor is the sysrq-aware unlock helper, restoring the pre-split behaviour. Update the Context: comment on serial8250_handle_irq_locked() so future HW-specific 8250 wrappers know to use the same guard or the explicit sysrq-aware unlock. Verified on RTL8196E with CONFIG_MAGIC_SYSRQ_SERIAL=y: BREAK + 'h' on the console UART produces the SysRq help dump in dmesg and the brk counter in /proc/tty/driver/serial increments correctly. Fixes: 8324a54f604d ("serial: 8250: Add serial8250_handle_irq_locked()") Cc: stable@vger.kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Jacques Nilo Link: https://patch.msgid.link/52692ae6c3501f7940347cef364ad7fcacaab7e5.1778675349.git.jnilo@free.fr Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index af78cc02f38e..c66ba714caa5 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1784,7 +1784,10 @@ static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) } /* - * Context: port's lock must be held by the caller. + * Context: port's lock must be held by the caller. The caller must + * release it via guard(uart_port_lock_check_sysrq_irqsave) or + * uart_unlock_and_check_sysrq_irqrestore(), which captures SysRq + * character on unlock. */ void serial8250_handle_irq_locked(struct uart_port *port, unsigned int iir) { @@ -1837,7 +1840,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) if (iir & UART_IIR_NO_INT) return 0; - guard(uart_port_lock_irqsave)(port); + guard(uart_port_lock_check_sysrq_irqsave)(port); serial8250_handle_irq_locked(port, iir); return 1; -- cgit v1.2.3 From 2e211723953f7740e54b53f3d3a0d5e351a5e223 Mon Sep 17 00:00:00 2001 From: Jacques Nilo Date: Wed, 13 May 2026 15:30:25 +0200 Subject: serial: 8250_dw: dispatch SysRq character in dw8250_handle_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dw8250_handle_irq() calls serial8250_handle_irq_locked() with the port lock held via guard(uart_port_lock_irqsave). The guard destructor is plain uart_port_unlock_irqrestore(), so a SysRq character captured into port->sysrq_ch by uart_prepare_sysrq_char() is dropped without ever being dispatched to handle_sysrq(). This is the same regression pattern as in serial8250_handle_irq(), introduced when 883c5a2bc934 ("serial: 8250_dw: Rework dw8250_handle_irq() locking and IIR handling") moved the function to the guard()-based locking scheme without using the sysrq-aware unlock helper. Switch to guard(uart_port_lock_check_sysrq_irqsave) so that captured sysrq_ch is dispatched on scope exit, matching the fix in serial8250_handle_irq(). Fixes: 883c5a2bc934 ("serial: 8250_dw: Rework dw8250_handle_irq() locking and IIR handling") Cc: stable@vger.kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Jacques Nilo Link: https://patch.msgid.link/ed56fcaf4af24e4ed011a7bce206e0182acb761c.1778675349.git.jnilo@free.fr Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 94beadb4024d..2af0c4d0ad82 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -427,7 +427,7 @@ static int dw8250_handle_irq(struct uart_port *p) unsigned int quirks = d->pdata->quirks; unsigned int status; - guard(uart_port_lock_irqsave)(p); + guard(uart_port_lock_check_sysrq_irqsave)(p); switch (FIELD_GET(DW_UART_IIR_IID, iir)) { case UART_IIR_NO_INT: -- cgit v1.2.3 From ca904f4b42355287bc5ce8b7550ebe909cda4c2c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:31 +0100 Subject: serial: dz: Fix bootconsole message clobbering at chip reset In the DZ interface as implemented by the DC7085 gate array the serial transmitters are double buffered, meaning that at the time a transmitter is ready to accept the next character there is one in the transmit shift register still being sent to the line. Issuing a master clear at this time causes this character to be lost, so wait an extra amount of time sufficient for the transmit shift register to drain at 9600bps, which is the baud rate setting used by the firmware console. Mind the specified 1.4us TRDY recovery time in the course and continue using iob() as the completion barrier, since the platforms involved use a write buffer that can delay and combine writes, and reorder them with respect to reads regardless of the MMIO locations accessed and we still lack a platform-independent handler for that. When called from dz_serial_console_init() this is too early for fsleep() to work and even before lpj has been calculated and therefore the delay is actually not sufficient for the transmitter to drain and is merely a placeholder now. This will be addressed in a follow-up change. Fixes: e6ee512f5a77 ("dz.c: Resource management") Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org # v2.6.25+ Link: https://patch.msgid.link/alpine.DEB.2.21.2605062259080.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/dz.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c index e53c54353c3e..d14a40e8cc0a 100644 --- a/drivers/tty/serial/dz.c +++ b/drivers/tty/serial/dz.c @@ -542,10 +542,31 @@ static int dz_encode_baud_rate(unsigned int baud) static void dz_reset(struct dz_port *dport) { struct dz_mux *mux = dport->mux; + unsigned short tcr; + int loops = 10000; if (mux->initialised) return; + tcr = dz_in(dport, DZ_TCR); + + /* Do not disturb any ongoing transmissions. */ + if (dz_in(dport, DZ_CSR) & DZ_MSE) { + unsigned short csr, mask; + + mask = tcr; + while ((mask & DZ_LNENB) && loops--) { + csr = dz_in(dport, DZ_CSR); + if (!(csr & DZ_TRDY)) + continue; + mask &= ~(1 << ((csr & DZ_TLINE) >> 8)); + dz_out(dport, DZ_TCR, mask); + iob(); + udelay(2); /* 1.4us TRDY recovery. */ + } + udelay(1200); /* Transmitter drain. */ + } + dz_out(dport, DZ_CSR, DZ_CLR); while (dz_in(dport, DZ_CSR) & DZ_CLR); iob(); -- cgit v1.2.3 From 7f127b2208e5e2b817243cad41fe4211a6d5a7a3 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:35 +0100 Subject: serial: dz: Fix bootconsole handover lockup Calling dz_reset() in the course of setting up the serial device causes line parameters to be reset and the transmitter disabled. We've been lucky in that no message is usually produced to the kernel log between this call and the later call to uart_set_options() in the course of console setup done by dz_serial_console_init(), or the system would hang as the console output handler in the firmware tried to access a port the transmitter of which has been disabled and line parameters messed up. This will change with the next change to the driver, so fix dz_reset() such that line parameters are set for 9600n8 console operation as with the system firmware and the transmitter re-enabled after reset. This also means dz_pm() serves no purpose anymore, so drop it. Fixes: e6ee512f5a77 ("dz.c: Resource management") Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org # v2.6.25+ Link: https://patch.msgid.link/alpine.DEB.2.21.2605062302010.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/dz.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c index d14a40e8cc0a..03f4fc9248b8 100644 --- a/drivers/tty/serial/dz.c +++ b/drivers/tty/serial/dz.c @@ -571,6 +571,18 @@ static void dz_reset(struct dz_port *dport) while (dz_in(dport, DZ_CSR) & DZ_CLR); iob(); + /* + * Set parameters across all lines such as not to interfere + * with the initial PROM-based console. Otherwise any output + * produced before the console handover would cause the system + * firmware to produce rubbish. + */ + for (int line = 0; line < DZ_NB_PORT; line++) + dz_out(dport, DZ_LPR, DZ_B9600 | DZ_CS8 | line); + + /* Re-enable transmission for the initial PROM-based console. */ + dz_out(dport, DZ_TCR, tcr); + /* Enable scanning. */ dz_out(dport, DZ_CSR, DZ_MSE); @@ -654,26 +666,6 @@ static void dz_set_termios(struct uart_port *uport, struct ktermios *termios, uart_port_unlock_irqrestore(&dport->port, flags); } -/* - * Hack alert! - * Required solely so that the initial PROM-based console - * works undisturbed in parallel with this one. - */ -static void dz_pm(struct uart_port *uport, unsigned int state, - unsigned int oldstate) -{ - struct dz_port *dport = to_dport(uport); - unsigned long flags; - - uart_port_lock_irqsave(&dport->port, &flags); - if (state < 3) - dz_start_tx(&dport->port); - else - dz_stop_tx(&dport->port); - uart_port_unlock_irqrestore(&dport->port, flags); -} - - static const char *dz_type(struct uart_port *uport) { return "DZ"; @@ -769,7 +761,6 @@ static const struct uart_ops dz_ops = { .startup = dz_startup, .shutdown = dz_shutdown, .set_termios = dz_set_termios, - .pm = dz_pm, .type = dz_type, .release_port = dz_release_port, .request_port = dz_request_port, @@ -894,10 +885,7 @@ static int __init dz_console_setup(struct console *co, char *options) if (ret) return ret; - spin_lock_init(&dport->port.lock); /* For dz_pm(). */ - dz_reset(dport); - dz_pm(uport, 0, -1); if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); -- cgit v1.2.3 From 6c05cf72e13314ce9b770b5951695dc5a2152920 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:39 +0100 Subject: serial: zs: Fix bootconsole handover lockup Calling zs_reset() in the course of setting up the serial device causes line parameters to be reset and the transmitter disabled. We've been lucky in that no message is usually produced to the kernel log between this call and the later call to uart_set_options() in the course of console setup done by zs_serial_console_init(), or the system would hang as the console output handler in the firmware tried to access a port the transmitter of which has been disabled and line parameters messed up. This will change with the next change to the driver, so fix zs_reset() such that line parameters are set for 9600n8 console operation as with the system firmware and the transmitter re-enabled after reset. This also means zs_pm() serves no purpose anymore, so drop it. Fixes: 8b4a40809e53 ("zs: move to the serial subsystem") Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org # v2.6.23+ Link: https://patch.msgid.link/alpine.DEB.2.21.2605062308040.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/zs.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c index 31e12645f66b..bb10cd8aa7dc 100644 --- a/drivers/tty/serial/zs.c +++ b/drivers/tty/serial/zs.c @@ -105,18 +105,24 @@ struct zs_parms { static struct zs_scc zs_sccs[ZS_NUM_SCCS]; +/* + * Set parameters in WR5, WR12, WR13 such as not to interfere + * with the initial PROM-based console. Otherwise any output + * produced before the console handover would cause the system + * firmware to hang (TxENAB) or produce rubbish (Tx8, B9600). + */ static u8 zs_init_regs[ZS_NUM_REGS] __initdata = { 0, /* write 0 */ PAR_SPEC, /* write 1 */ 0, /* write 2 */ 0, /* write 3 */ X16CLK | SB1, /* write 4 */ - 0, /* write 5 */ + Tx8 | TxENAB, /* write 5 */ 0, 0, 0, /* write 6, 7, 8 */ MIE | DLC | NV, /* write 9 */ NRZ, /* write 10 */ TCBR | RCBR, /* write 11 */ - 0, 0, /* BRG time constant, write 12 + 13 */ + 0x16, 0x00, /* BRG time constant, write 12 + 13 */ BRSRC | BRENABL, /* write 14 */ 0, /* write 15 */ }; @@ -956,23 +962,6 @@ static void zs_set_termios(struct uart_port *uport, struct ktermios *termios, spin_unlock_irqrestore(&scc->zlock, flags); } -/* - * Hack alert! - * Required solely so that the initial PROM-based console - * works undisturbed in parallel with this one. - */ -static void zs_pm(struct uart_port *uport, unsigned int state, - unsigned int oldstate) -{ - struct zs_port *zport = to_zport(uport); - - if (state < 3) - zport->regs[5] |= TxENAB; - else - zport->regs[5] &= ~TxENAB; - write_zsreg(zport, R5, zport->regs[5]); -} - static const char *zs_type(struct uart_port *uport) { @@ -1055,7 +1044,6 @@ static const struct uart_ops zs_ops = { .startup = zs_startup, .shutdown = zs_shutdown, .set_termios = zs_set_termios, - .pm = zs_pm, .type = zs_type, .release_port = zs_release_port, .request_port = zs_request_port, @@ -1210,7 +1198,6 @@ static int __init zs_console_setup(struct console *co, char *options) return ret; zs_reset(zport); - zs_pm(uport, 0, -1); if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); -- cgit v1.2.3 From 8572955630f30948837088aa98bcbe0532d1ceac Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:43 +0100 Subject: serial: zs: Switch to using channel reset Switch the driver to using the channel reset rather than hardware reset, simplifying handling by removing an interference between channels that causes the other channel to become uninitialised afterwards. There is little difference between the two kinds of reset in terms of register settings that result, and we initialise the whole register set right away anyway. However this prevents a hang from happening should the console output handler in the firmware try to access the other port whose transmitter has been disabled and line parameters messed up. For example this will happen if the keyboard port (port A) is chosen for the system console, unusually but not insanely for a headless system, as the port is wired to a standard DA-15 connector and an adapter can be easily made. Or with the next change in place this would happen for the regular console port (port B), since the keyboard port (port A) will be initialised first. Just remove the unnecessary complication then, a channel reset is good enough. We still need the initialisation marker, now per channel rather than per SCC, as for the console port zs_reset() will be called twice: once early on via zs_serial_console_init() for the console setup only, and then again via zs_config_port() as the port is associated with a TTY device. Fixes: 8b4a40809e53 ("zs: move to the serial subsystem") Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org # v2.6.23+ Link: https://patch.msgid.link/alpine.DEB.2.21.2605062323430.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/zs.c | 7 ++++--- drivers/tty/serial/zs.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c index bb10cd8aa7dc..71cab10a33c3 100644 --- a/drivers/tty/serial/zs.c +++ b/drivers/tty/serial/zs.c @@ -832,21 +832,22 @@ static void zs_shutdown(struct uart_port *uport) static void zs_reset(struct zs_port *zport) { + struct zs_port *zport_a = &zport->scc->zport[ZS_CHAN_A]; struct zs_scc *scc = zport->scc; int irq; unsigned long flags; spin_lock_irqsave(&scc->zlock, flags); irq = !irqs_disabled_flags(flags); - if (!scc->initialised) { + if (!zport->initialised) { /* Reset the pointer first, just in case... */ read_zsreg(zport, R0); /* And let the current transmission finish. */ zs_line_drain(zport, irq); - write_zsreg(zport, R9, FHWRES); + write_zsreg(zport, R9, zport == zport_a ? CHRA : CHRB); udelay(10); write_zsreg(zport, R9, 0); - scc->initialised = 1; + zport->initialised = 1; } load_zsregs(zport, zport->regs, irq); spin_unlock_irqrestore(&scc->zlock, flags); diff --git a/drivers/tty/serial/zs.h b/drivers/tty/serial/zs.h index 26ef8eafa1c1..8e51f847bc03 100644 --- a/drivers/tty/serial/zs.h +++ b/drivers/tty/serial/zs.h @@ -22,6 +22,7 @@ struct zs_port { struct zs_scc *scc; /* Containing SCC. */ struct uart_port port; /* Underlying UART. */ + int initialised; /* For the console port. */ int clk_mode; /* May be 1, 16, 32, or 64. */ @@ -41,7 +42,6 @@ struct zs_scc { struct zs_port zport[2]; spinlock_t zlock; atomic_t irq_guard; - int initialised; }; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 5d7a49d60b8fda66da60e240fd7315232fa1754f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:48 +0100 Subject: serial: dz: Convert to use a platform device Prevent a crash from happening as the first serial port is initialised: Console: switching to colour frame buffer device 160x64 tgafb: SFB+ detected, rev=0x02 fb0: Digital ZLX-E1 frame buffer device at 0x1e000000 DECstation DZ serial driver version 1.04 CPU 0 Unable to handle kernel paging request at virtual address 000000bc, epc == 8048b3a4, ra == 80470a78 Oops[#1]: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.19.0-dirty #35 NONE $ 0 : 00000000 1000ac00 00000004 804707ac $ 4 : 00000000 80e20850 80e20858 81000030 $ 8 : 00000000 8072c81c 00000008 fefefeff $12 : 6c616972 00000006 80c5917f 69726420 $16 : 80e20800 00000000 808f8968 80e20800 $20 : 00000000 807f5a90 808b0094 808d3bc8 $24 : 00000018 80479030 $28 : 80c2e000 80c2fd70 00000069 80470a78 Hi : 00000004 Lo : 00000000 epc : 8048b3a4 __dev_fwnode+0x0/0xc ra : 80470a78 serial_base_ctrl_add+0xa0/0x168 Status: 1000ac04 IEp Cause : 30000008 (ExcCode 02) BadVA : 000000bc PrId : 00000220 (R3000) Modules linked in: Process swapper/0 (pid: 1, threadinfo=(ptrval), task=(ptrval), tls=00000000) Stack : 00400044 00400040 8046f4cc 00000000 808a6148 808a0000 808f8968 8086983c 808e0000 8046fc84 1000ac01 00000028 80e20700 802ba3f8 80e20700 80d34a94 80c1b900 80e20700 80e20700 80e20700 80e20700 80444650 00000000 00000000 00000000 807f5a90 808b0094 80447080 00400040 808e0000 80d34a94 808a6148 80d34a94 00000004 80e20700 00000000 8076974c 80469810 80c2fe3c 1000ac01 ... Call Trace: [<8048b3a4>] __dev_fwnode+0x0/0xc [<80470a78>] serial_base_ctrl_add+0xa0/0x168 [<8046fc84>] serial_core_register_port+0x1c8/0x974 [<808c6af0>] dz_init+0x74/0xc8 [<800470e0>] do_one_initcall+0x44/0x2d4 [<808b111c>] kernel_init_freeable+0x258/0x308 [<8072e434>] kernel_init+0x20/0x114 [<80049cd0>] ret_from_kernel_thread+0x14/0x1c Code: 27bd0018 03e00008 2402ffea <8c8200bc> 03e00008 00000000 27bdffc0 afbe0038 afb30024 ---[ end trace 0000000000000000 ]--- -- where a pointer is dereferenced that has been derived from a null pointer to the port's parent device. Since no device is available with legacy probing and it's not anymore a preferable way to discover devices anyway, switch the driver to using a platform device and use it as the port's parent device. Update resource handling accordingly and only request the actual span of addresses used within the slot, which will have had its resource already requested by generic platform device code. Use platform_driver_probe() not just because the DZ device is fixed with solder on board and not straightforward to remove, but foremost because the associated TTY's major device number is the same as used by the zs driver and the first driver to claim it will prevent the other one from using it. Either one DZ device or some SCC devices will be present in a given system but never both at a time, and therefore we want the major device number to be claimed by the first driver to actually successfully bind to its device and platform_driver_probe() is a way to fulfil that. An unfortunate consequence of the switch to a platform device is we now hand the console over from the bootconsole much later in the bootstrap. The firmware console handler appears good enough though to work so late and in particular with interrupts enabled. Conversely only starting the console port so late lets the reset code fully utilise our delay handlers, so switch from udelay() to fsleep() for transmitter draining so as to avoid busy-waiting for an excessive amount of time. Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org # needs to use .remove_new for <= 6.10 Link: https://patch.msgid.link/alpine.DEB.2.21.2605062326540.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- arch/mips/dec/platform.c | 55 +++++++++++++++++++++- drivers/tty/serial/dz.c | 116 +++++++++++++++++++++++------------------------ 2 files changed, 110 insertions(+), 61 deletions(-) diff --git a/arch/mips/dec/platform.c b/arch/mips/dec/platform.c index c4fcb8c58e01..fdecc91ee22a 100644 --- a/arch/mips/dec/platform.c +++ b/arch/mips/dec/platform.c @@ -10,6 +10,13 @@ #include #include +#include + +#include +#include +#include +#include + static struct resource dec_rtc_resources[] = { { .name = "rtc", @@ -30,11 +37,57 @@ static struct platform_device dec_rtc_device = { .num_resources = ARRAY_SIZE(dec_rtc_resources), }; +static struct resource dec_dz_resources[] = { + { .name = "dz", .flags = IORESOURCE_MEM, }, + { .name = "dz", .flags = IORESOURCE_IRQ, }, +}; + +static struct platform_device dec_dz_device = { + .name = "dz", + .id = PLATFORM_DEVID_NONE, + .resource = dec_dz_resources, + .num_resources = ARRAY_SIZE(dec_dz_resources), +}; + +static struct platform_device *dec_dz_devices[] __initdata = { + &dec_dz_device, +}; + static int __init dec_add_devices(void) { + int ret1, ret2; + int num_dz; + int irq, i; + dec_rtc_resources[0].start = RTC_PORT(0); dec_rtc_resources[0].end = RTC_PORT(0) + dec_kn_slot_size - 1; - return platform_device_register(&dec_rtc_device); + + i = 0; + irq = dec_interrupt[DEC_IRQ_DZ11]; + if (IS_ENABLED(CONFIG_32BIT) && irq >= 0) { + resource_size_t base; + + switch (mips_machtype) { + case MACH_DS23100: + case MACH_DS5100: + base = dec_kn_slot_base + KN01_DZ11; + break; + default: + base = dec_kn_slot_base + KN02_DZ11; + break; + } + dec_dz_device.resource[0].start = base; + dec_dz_device.resource[0].end = base + dec_kn_slot_size - 1; + dec_dz_device.resource[1].start = irq; + dec_dz_device.resource[1].end = irq; + i++; + } + num_dz = i; + + ret1 = platform_device_register(&dec_rtc_device); + ret2 = IS_ENABLED(CONFIG_32BIT) ? + platform_add_devices(dec_dz_devices, num_dz) : 0; + return ret1 ? ret1 : ret2; } device_initcall(dec_add_devices); diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c index 03f4fc9248b8..39d93e9c2d15 100644 --- a/drivers/tty/serial/dz.c +++ b/drivers/tty/serial/dz.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -48,14 +49,6 @@ #include #include -#include - -#include -#include -#include -#include -#include -#include #include "dz.h" @@ -65,7 +58,9 @@ MODULE_LICENSE("GPL"); static char dz_name[] __initdata = "DECstation DZ serial driver version "; -static char dz_version[] __initdata = "1.04"; +static char dz_version[] __initdata = "1.05"; + +#define DZ_IO_SIZE 0x20 /* IOMEM space size. */ struct dz_port { struct dz_mux *mux; @@ -81,6 +76,7 @@ struct dz_mux { }; static struct dz_mux dz_mux; +static struct uart_driver dz_reg; static inline struct dz_port *to_dport(struct uart_port *uport) { @@ -564,7 +560,7 @@ static void dz_reset(struct dz_port *dport) iob(); udelay(2); /* 1.4us TRDY recovery. */ } - udelay(1200); /* Transmitter drain. */ + fsleep(1200); /* Transmitter drain. */ } dz_out(dport, DZ_CSR, DZ_CLR); @@ -681,14 +677,13 @@ static void dz_release_port(struct uart_port *uport) map_guard = atomic_add_return(-1, &mux->map_guard); if (!map_guard) - release_mem_region(uport->mapbase, dec_kn_slot_size); + release_mem_region(uport->mapbase, DZ_IO_SIZE); } static int dz_map_port(struct uart_port *uport) { if (!uport->membase) - uport->membase = ioremap(uport->mapbase, - dec_kn_slot_size); + uport->membase = ioremap(uport->mapbase, DZ_IO_SIZE); if (!uport->membase) { printk(KERN_ERR "dz: Cannot map MMIO\n"); return -ENOMEM; @@ -704,8 +699,7 @@ static int dz_request_port(struct uart_port *uport) map_guard = atomic_add_return(1, &mux->map_guard); if (map_guard == 1) { - if (!request_mem_region(uport->mapbase, dec_kn_slot_size, - "dz")) { + if (!request_mem_region(uport->mapbase, DZ_IO_SIZE, "dz")) { atomic_add(-1, &mux->map_guard); printk(KERN_ERR "dz: Unable to reserve MMIO resource\n"); @@ -716,7 +710,7 @@ static int dz_request_port(struct uart_port *uport) if (ret) { map_guard = atomic_add_return(-1, &mux->map_guard); if (!map_guard) - release_mem_region(uport->mapbase, dec_kn_slot_size); + release_mem_region(uport->mapbase, DZ_IO_SIZE); return ret; } return 0; @@ -768,20 +762,15 @@ static const struct uart_ops dz_ops = { .verify_port = dz_verify_port, }; -static void __init dz_init_ports(void) +static int __init dz_probe(struct platform_device *pdev) { - static int first = 1; - unsigned long base; + struct resource *mem_resource, *irq_resource; int line; - if (!first) - return; - first = 0; - - if (mips_machtype == MACH_DS23100 || mips_machtype == MACH_DS5100) - base = dec_kn_slot_base + KN01_DZ11; - else - base = dec_kn_slot_base + KN02_DZ11; + mem_resource = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq_resource = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!mem_resource || !irq_resource) + return -ENODEV; for (line = 0; line < DZ_NB_PORT; line++) { struct dz_port *dport = &dz_mux.dport[line]; @@ -789,14 +778,33 @@ static void __init dz_init_ports(void) dport->mux = &dz_mux; - uport->irq = dec_interrupt[DEC_IRQ_DZ11]; + uport->dev = &pdev->dev; + uport->irq = irq_resource->start; uport->fifosize = 1; uport->iotype = UPIO_MEM; uport->flags = UPF_BOOT_AUTOCONF; uport->ops = &dz_ops; uport->line = line; - uport->mapbase = base; + uport->mapbase = mem_resource->start; uport->has_sysrq = IS_ENABLED(CONFIG_SERIAL_DZ_CONSOLE); + + if (uart_add_one_port(&dz_reg, uport)) + uport->dev = NULL; + } + + return 0; +} + +static void __exit dz_remove(struct platform_device *pdev) +{ + int line; + + for (line = DZ_NB_PORT - 1; line >= 0; line--) { + struct dz_port *dport = &dz_mux.dport[line]; + struct uart_port *uport = &dport->port; + + if (uport->dev) + uart_remove_one_port(&dz_reg, uport); } } @@ -879,21 +887,14 @@ static int __init dz_console_setup(struct console *co, char *options) int bits = 8; int parity = 'n'; int flow = 'n'; - int ret; - - ret = dz_map_port(uport); - if (ret) - return ret; - - dz_reset(dport); + if (!dport->mux) + return -ENODEV; if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); - - return uart_set_options(&dport->port, co, baud, parity, bits, flow); + return uart_set_options(uport, co, baud, parity, bits, flow); } -static struct uart_driver dz_reg; static struct console dz_console = { .name = "ttyS", .write = dz_console_print, @@ -904,18 +905,6 @@ static struct console dz_console = { .data = &dz_reg, }; -static int __init dz_serial_console_init(void) -{ - if (!IOASIC) { - dz_init_ports(); - register_console(&dz_console); - return 0; - } else - return -ENXIO; -} - -console_initcall(dz_serial_console_init); - #define SERIAL_DZ_CONSOLE &dz_console #else #define SERIAL_DZ_CONSOLE NULL @@ -931,25 +920,32 @@ static struct uart_driver dz_reg = { .cons = SERIAL_DZ_CONSOLE, }; +static struct platform_driver dz_driver = { + .remove = __exit_p(dz_remove), + .driver = { .name = "dz" }, +}; + static int __init dz_init(void) { - int ret, i; - - if (IOASIC) - return -ENXIO; + int ret; printk("%s%s\n", dz_name, dz_version); - dz_init_ports(); - ret = uart_register_driver(&dz_reg); if (ret) return ret; + ret = platform_driver_probe(&dz_driver, dz_probe); + if (ret) + uart_unregister_driver(&dz_reg); - for (i = 0; i < DZ_NB_PORT; i++) - uart_add_one_port(&dz_reg, &dz_mux.dport[i].port); + return ret; +} - return 0; +static void __exit dz_exit(void) +{ + platform_driver_unregister(&dz_driver); + uart_unregister_driver(&dz_reg); } module_init(dz_init); +module_exit(dz_exit); -- cgit v1.2.3 From 7cac59d08a73cb866ec51a483a6f3fe0f531947c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:52 +0100 Subject: serial: zs: Convert to use a platform device Prevent a crash from happening as the first serial port is initialised: Console: switching to mono frame buffer device 160x64 fb0: PMAG-AA frame buffer device at tc0 DECstation Z85C30 serial driver version 0.10 CPU 0 Unable to handle kernel paging request at virtual address 0000002c, epc == 803ab00c, ra == 803aafe0 Oops[#1]: CPU: 0 PID: 1 Comm: swapper Not tainted 6.4.0-rc3-00031-g84a9582fd203-dirty #57 $ 0 : 00000000 10012c00 803aaeb0 00000000 $ 4 : 80e12f60 80e12f50 80e12f58 81000030 $ 8 : 00000000 805ff37c 00000000 33433538 $12 : 65732030 00000006 80c2915d 6c616972 $16 : 80e12f00 807b7630 00000000 00000000 $20 : 00000004 00000348 000001a0 807623b8 $24 : 00000018 00000000 $28 : 80c24000 80c25d60 8078b148 803aafe0 Hi : 00000000 Lo : 00000000 epc : 803ab00c serial_base_ctrl_add+0x78/0xf4 ra : 803aafe0 serial_base_ctrl_add+0x4c/0xf4 Status: 10012c03 KERNEL EXL IE Cause : 00000008 (ExcCode 02) BadVA : 0000002c PrId : 00000440 (R4400SC) Modules linked in: Process swapper (pid: 1, threadinfo=(ptrval), task=(ptrval), tls=00000000) Stack : 80760000 00000cc0 00400044 00400040 803aa02c 80d61ab8 00000000 807b7630 80760000 807623b8 807b7628 803aa644 80386998 00000000 80e17780 80220f68 80e17780 80d61ab8 80c17d80 80e17780 80e17780 8063c798 80e17780 80383fa0 00000010 80e17780 00000000 80386998 807a0000 00000000 00400040 8038f848 807623b8 80d61ab8 00000004 80e17780 00000000 803a68e4 80c25e2c 803bb884 ... Call Trace: [<803ab00c>] serial_base_ctrl_add+0x78/0xf4 [<803aa644>] serial_core_register_port+0x174/0x69c [<8077e9ac>] zs_init+0xc8/0xfc [<800404d4>] do_one_initcall+0x40/0x2ac [<8076cecc>] kernel_init_freeable+0x1e4/0x270 [<80605bec>] kernel_init+0x20/0x108 [<800431e8>] ret_from_kernel_thread+0x14/0x1c Code: 2442aeb0 ae120024 ae0200d0 <8c67002c> 50e00001 8c670000 3c06806e 3c05806e afb30010 ---[ end trace 0000000000000000 ]--- (report at the offending commit) -- where a pointer is dereferenced that has been derived from a null pointer to the port's parent device. Since no device is available with legacy probing and it's not anymore a preferable way to discover devices anyway, switch the driver to using a platform device and use it as the port's parent device. Update resource handling accordingly and only request the actual span of addresses used within the slot, which will have had its resource already requested by generic platform device code. Use platform_driver_probe() not just because SCC devices are fixed with solder on board and not straightforward to remove, but foremost because the associated TTY's major device number is the same as used by the dz driver and the first driver to claim it will prevent the other one from using it. Either one DZ device or some SCC devices will be present in a given system but never both at a time, and therefore we want the major device number to be claimed by the first driver to actually successfully bind to its device and platform_driver_probe() is a way to fulfil that. An unfortunate consequence of the switch to a platform device is we now hand the console over from the bootconsole much later in the bootstrap. The firmware console handler appears good enough though to work so late and in particular with interrupts enabled. Since there is one way only remaining to reach zs_reset() now, remove the port initialisation marker as no longer needed and go through the channel reset unconditionally. Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Signed-off-by: Maciej W. Rozycki Cc: stable@vger.kernel.org # needs to use .remove_new for <= 6.10 Link: https://patch.msgid.link/alpine.DEB.2.21.2605062328480.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- arch/mips/dec/platform.c | 60 ++++++++++++++- drivers/tty/serial/zs.c | 192 ++++++++++++++++++----------------------------- drivers/tty/serial/zs.h | 1 - 3 files changed, 129 insertions(+), 124 deletions(-) diff --git a/arch/mips/dec/platform.c b/arch/mips/dec/platform.c index fdecc91ee22a..723ce16cbfc0 100644 --- a/arch/mips/dec/platform.c +++ b/arch/mips/dec/platform.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -53,10 +54,37 @@ static struct platform_device *dec_dz_devices[] __initdata = { &dec_dz_device, }; +static struct resource dec_zs_resources[][2] = { + { + { .name = "scc0", .flags = IORESOURCE_MEM, }, + { .name = "scc0", .flags = IORESOURCE_IRQ, }, + }, + { + { .name = "scc1", .flags = IORESOURCE_MEM, }, + { .name = "scc1", .flags = IORESOURCE_IRQ, }, + }, +}; + +static struct platform_device dec_zs_device[] = { + { + .name = "zs", + .id = 0, + .resource = dec_zs_resources[0], + .num_resources = ARRAY_SIZE(dec_zs_resources[0]), + }, + { + .name = "zs", + .id = 1, + .resource = dec_zs_resources[1], + .num_resources = ARRAY_SIZE(dec_zs_resources[1]), + }, +}; + static int __init dec_add_devices(void) { - int ret1, ret2; - int num_dz; + struct platform_device *dec_zs_devices[ARRAY_SIZE(dec_zs_device)]; + int ret1, ret2, ret3; + int num_dz, num_zs; int irq, i; dec_rtc_resources[0].start = RTC_PORT(0); @@ -84,10 +112,36 @@ static int __init dec_add_devices(void) } num_dz = i; + i = 0; + irq = dec_interrupt[DEC_IRQ_SCC0]; + if (irq >= 0) { + resource_size_t base = dec_kn_slot_base + IOASIC_SCC0; + + dec_zs_device[i].resource[0].start = base; + dec_zs_device[i].resource[0].end = base + dec_kn_slot_size - 1; + dec_zs_device[i].resource[1].start = irq; + dec_zs_device[i].resource[1].end = irq; + dec_zs_devices[i] = &dec_zs_device[i]; + i++; + } + irq = dec_interrupt[DEC_IRQ_SCC1]; + if (irq >= 0) { + resource_size_t base = dec_kn_slot_base + IOASIC_SCC1; + + dec_zs_device[i].resource[0].start = base; + dec_zs_device[i].resource[0].end = base + dec_kn_slot_size - 1; + dec_zs_device[i].resource[1].start = irq; + dec_zs_device[i].resource[1].end = irq; + dec_zs_devices[i] = &dec_zs_device[i]; + i++; + } + num_zs = i; + ret1 = platform_device_register(&dec_rtc_device); ret2 = IS_ENABLED(CONFIG_32BIT) ? platform_add_devices(dec_dz_devices, num_dz) : 0; - return ret1 ? ret1 : ret2; + ret3 = platform_add_devices(dec_zs_devices, num_zs); + return ret1 ? ret1 : ret2 ? ret2 : ret3; } device_initcall(dec_add_devices); diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c index 71cab10a33c3..8f92b4129a38 100644 --- a/drivers/tty/serial/zs.c +++ b/drivers/tty/serial/zs.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -66,10 +67,6 @@ #include -#include -#include -#include - #include "zs.h" @@ -79,7 +76,7 @@ MODULE_LICENSE("GPL"); static char zs_name[] __initdata = "DECstation Z85C30 serial driver version "; -static char zs_version[] __initdata = "0.10"; +static char zs_version[] __initdata = "0.11"; /* * It would be nice to dynamically allocate everything that @@ -98,12 +95,8 @@ static char zs_version[] __initdata = "0.10"; #define to_zport(uport) container_of(uport, struct zs_port, port) -struct zs_parms { - resource_size_t scc[ZS_NUM_SCCS]; - int irq[ZS_NUM_SCCS]; -}; - static struct zs_scc zs_sccs[ZS_NUM_SCCS]; +static struct uart_driver zs_reg; /* * Set parameters in WR5, WR12, WR13 such as not to interfere @@ -839,16 +832,15 @@ static void zs_reset(struct zs_port *zport) spin_lock_irqsave(&scc->zlock, flags); irq = !irqs_disabled_flags(flags); - if (!zport->initialised) { - /* Reset the pointer first, just in case... */ - read_zsreg(zport, R0); - /* And let the current transmission finish. */ - zs_line_drain(zport, irq); - write_zsreg(zport, R9, zport == zport_a ? CHRA : CHRB); - udelay(10); - write_zsreg(zport, R9, 0); - zport->initialised = 1; - } + + /* Reset the pointer first, just in case... */ + read_zsreg(zport, R0); + /* And let the current transmission finish. */ + zs_line_drain(zport, irq); + write_zsreg(zport, R9, zport == zport_a ? CHRA : CHRB); + udelay(10); + write_zsreg(zport, R9, 0); + load_zsregs(zport, zport->regs, irq); spin_unlock_irqrestore(&scc->zlock, flags); } @@ -1055,63 +1047,62 @@ static const struct uart_ops zs_ops = { /* * Initialize Z85C30 port structures. */ -static int __init zs_probe_sccs(void) +static int __init zs_probe(struct platform_device *pdev) { - static int probed; - struct zs_parms zs_parms; - int chip, side, irq; - int n_chips = 0; + struct resource *mem_resource, *irq_resource; + int chip, side; int i; - if (probed) - return 0; + mem_resource = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq_resource = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!mem_resource || !irq_resource) + return -ENODEV; - irq = dec_interrupt[DEC_IRQ_SCC0]; - if (irq >= 0) { - zs_parms.scc[n_chips] = IOASIC_SCC0; - zs_parms.irq[n_chips] = dec_interrupt[DEC_IRQ_SCC0]; - n_chips++; - } - irq = dec_interrupt[DEC_IRQ_SCC1]; - if (irq >= 0) { - zs_parms.scc[n_chips] = IOASIC_SCC1; - zs_parms.irq[n_chips] = dec_interrupt[DEC_IRQ_SCC1]; - n_chips++; - } - if (!n_chips) - return -ENXIO; - - probed = 1; - - for (chip = 0; chip < n_chips; chip++) { - spin_lock_init(&zs_sccs[chip].zlock); - for (side = 0; side < ZS_NUM_CHAN; side++) { - struct zs_port *zport = &zs_sccs[chip].zport[side]; - struct uart_port *uport = &zport->port; - - zport->scc = &zs_sccs[chip]; - zport->clk_mode = 16; - - uport->has_sysrq = IS_ENABLED(CONFIG_SERIAL_ZS_CONSOLE); - uport->irq = zs_parms.irq[chip]; - uport->uartclk = ZS_CLOCK; - uport->fifosize = 1; - uport->iotype = UPIO_MEM; - uport->flags = UPF_BOOT_AUTOCONF; - uport->ops = &zs_ops; - uport->line = chip * ZS_NUM_CHAN + side; - uport->mapbase = dec_kn_slot_base + - zs_parms.scc[chip] + - (side ^ ZS_CHAN_B) * ZS_CHAN_IO_SIZE; - - for (i = 0; i < ZS_NUM_REGS; i++) - zport->regs[i] = zs_init_regs[i]; - } + chip = pdev->id; + spin_lock_init(&zs_sccs[chip].zlock); + for (side = 0; side < ZS_NUM_CHAN; side++) { + struct zs_port *zport = &zs_sccs[chip].zport[side]; + struct uart_port *uport = &zport->port; + + zport->scc = &zs_sccs[chip]; + zport->clk_mode = 16; + + uport->dev = &pdev->dev; + uport->has_sysrq = IS_ENABLED(CONFIG_SERIAL_ZS_CONSOLE); + uport->irq = irq_resource->start; + uport->uartclk = ZS_CLOCK; + uport->fifosize = 1; + uport->iotype = UPIO_MEM; + uport->flags = UPF_BOOT_AUTOCONF; + uport->ops = &zs_ops; + uport->line = chip * ZS_NUM_CHAN + side; + uport->mapbase = mem_resource->start + + (side ^ ZS_CHAN_B) * ZS_CHAN_IO_SIZE; + + for (i = 0; i < ZS_NUM_REGS; i++) + zport->regs[i] = zs_init_regs[i]; + + if (uart_add_one_port(&zs_reg, uport)) + uport->dev = NULL; } return 0; } +static void __exit zs_remove(struct platform_device *pdev) +{ + int chip, side; + + chip = pdev->id; + for (side = ZS_NUM_CHAN - 1; side >= 0; side--) { + struct zs_port *zport = &zs_sccs[chip].zport[side]; + struct uart_port *uport = &zport->port; + + if (uport->dev) + uart_remove_one_port(&zs_reg, uport); + } +} + #ifdef CONFIG_SERIAL_ZS_CONSOLE static void zs_console_putchar(struct uart_port *uport, unsigned char ch) @@ -1192,20 +1183,14 @@ static int __init zs_console_setup(struct console *co, char *options) int bits = 8; int parity = 'n'; int flow = 'n'; - int ret; - - ret = zs_map_port(uport); - if (ret) - return ret; - - zs_reset(zport); + if (!zport->scc) + return -ENODEV; if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); return uart_set_options(uport, co, baud, parity, bits, flow); } -static struct uart_driver zs_reg; static struct console zs_console = { .name = "ttyS", .write = zs_console_write, @@ -1216,23 +1201,6 @@ static struct console zs_console = { .data = &zs_reg, }; -/* - * Register console. - */ -static int __init zs_serial_console_init(void) -{ - int ret; - - ret = zs_probe_sccs(); - if (ret) - return ret; - register_console(&zs_console); - - return 0; -} - -console_initcall(zs_serial_console_init); - #define SERIAL_ZS_CONSOLE &zs_console #else #define SERIAL_ZS_CONSOLE NULL @@ -1248,47 +1216,31 @@ static struct uart_driver zs_reg = { .cons = SERIAL_ZS_CONSOLE, }; +static struct platform_driver zs_driver = { + .remove = __exit_p(zs_remove), + .driver = { .name = "zs" }, +}; + /* zs_init inits the driver. */ static int __init zs_init(void) { - int i, ret; + int ret; pr_info("%s%s\n", zs_name, zs_version); - /* Find out how many Z85C30 SCCs we have. */ - ret = zs_probe_sccs(); - if (ret) - return ret; - ret = uart_register_driver(&zs_reg); if (ret) return ret; + ret = platform_driver_probe(&zs_driver, zs_probe); + if (ret) + uart_unregister_driver(&zs_reg); - for (i = 0; i < ZS_NUM_SCCS * ZS_NUM_CHAN; i++) { - struct zs_scc *scc = &zs_sccs[i / ZS_NUM_CHAN]; - struct zs_port *zport = &scc->zport[i % ZS_NUM_CHAN]; - struct uart_port *uport = &zport->port; - - if (zport->scc) - uart_add_one_port(&zs_reg, uport); - } - - return 0; + return ret; } static void __exit zs_exit(void) { - int i; - - for (i = ZS_NUM_SCCS * ZS_NUM_CHAN - 1; i >= 0; i--) { - struct zs_scc *scc = &zs_sccs[i / ZS_NUM_CHAN]; - struct zs_port *zport = &scc->zport[i % ZS_NUM_CHAN]; - struct uart_port *uport = &zport->port; - - if (zport->scc) - uart_remove_one_port(&zs_reg, uport); - } - + platform_driver_unregister(&zs_driver); uart_unregister_driver(&zs_reg); } diff --git a/drivers/tty/serial/zs.h b/drivers/tty/serial/zs.h index 8e51f847bc03..e0d3c189b33f 100644 --- a/drivers/tty/serial/zs.h +++ b/drivers/tty/serial/zs.h @@ -22,7 +22,6 @@ struct zs_port { struct zs_scc *scc; /* Containing SCC. */ struct uart_port port; /* Underlying UART. */ - int initialised; /* For the console port. */ int clk_mode; /* May be 1, 16, 32, or 64. */ -- cgit v1.2.3 From e4240d8845445d58b4b96f7066adfe175a61bd0c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 6 May 2026 23:42:56 +0100 Subject: serial: dz: Enable modular build Enable modular build since the driver now has a proper module_exit() handler. There's nothing specific to DZ hardware to prevent driver unloading and reloading from working. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2605062331420.46195@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 9aa61c93d7bc..ec284aceb909 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -334,7 +334,7 @@ config SERIAL_MAX310X Say Y here if you want to support this ICs. config SERIAL_DZ - bool "DECstation DZ serial driver" + tristate "DECstation DZ serial driver" depends on MACH_DECSTATION && 32BIT select SERIAL_CORE default y -- cgit v1.2.3 From 4c19719eb8b8df08c5bec7c499f73ddaea6f09fc Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 14 Apr 2026 12:02:34 +0000 Subject: rust_binder: avoid calling pending_oneway_finished() on TF_UPDATE_TXN When an outdated transaction is removed from `oneway_todo` due to `TF_UPDATE_TXN`, its `Allocation` is dropped. The current implementation of `Allocation::drop` calls `pending_oneway_finished()`, assuming the transaction was executed. This leads to premature execution of the next queued one-way transaction. Fix this by taking the `oneway_node` from the `Allocation` of the outdated transaction before it is dropped. This prevents `Allocation::drop` from signaling completion. We do not call `take_oneway_node()` from `Transaction::cancel` because it's actually correct to call `pending_oneway_finished()` on cancel if the transaction did not come from `oneway_todo`. This ensures that if `BINDER_THREAD_EXIT` is invoked and cancels a oneway transaction, then the next transaction is taken from `oneway_todo`. This bug does not lead to any issues in the kernel, but may lead to Binder delivering transactions to userspace earlier than userspace expected to receive them. Cc: stable Fixes: eafedbc7c050 ("rust_binder: add Rust Binder driver") Assisted-by: Antigravity:gemini Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Link: https://patch.msgid.link/20260414-tf-update-txn-fix-v1-1-d2b83303acc9@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/allocation.rs | 8 ++++++++ drivers/android/binder/transaction.rs | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder/allocation.rs b/drivers/android/binder/allocation.rs index 0cab959e4b7e..b7b05e72970a 100644 --- a/drivers/android/binder/allocation.rs +++ b/drivers/android/binder/allocation.rs @@ -157,6 +157,14 @@ impl Allocation { self.get_or_init_info().target_node = Some(target_node); } + pub(crate) fn take_oneway_node(&mut self) -> Option> { + if let Some(info) = self.allocation_info.as_mut() { + info.oneway_node.take() + } else { + None + } + } + /// Reserve enough space to push at least `num_fds` fds. pub(crate) fn info_add_fd_reserve(&mut self, num_fds: usize) -> Result { self.get_or_init_info() diff --git a/drivers/android/binder/transaction.rs b/drivers/android/binder/transaction.rs index 47d5e4d88b07..1d9b66920a21 100644 --- a/drivers/android/binder/transaction.rs +++ b/drivers/android/binder/transaction.rs @@ -270,7 +270,8 @@ impl Transaction { /// Not used for replies. pub(crate) fn submit(self: DLArc, info: &mut TransactionInfo) -> BinderResult { // Defined before `process_inner` so that the destructor runs after releasing the lock. - let mut _t_outdated; + let _t_outdated; + let _oneway_node; let oneway = self.flags & TF_ONE_WAY != 0; let process = self.to.clone(); @@ -287,6 +288,14 @@ impl Transaction { if let Some(t_outdated) = target_node.take_outdated_transaction(&self, &mut process_inner) { + let mut alloc_guard = t_outdated.allocation.lock(); + if let Some(alloc) = (*alloc_guard).as_mut() { + // Take the oneway node to prevent `Allocation::drop` from calling + // `pending_oneway_finished()`, which would be incorrect as this + // transaction is not being submitted. + _oneway_node = alloc.take_oneway_node(); + } + drop(alloc_guard); // Save the transaction to be dropped after locks are released. _t_outdated = t_outdated; } -- cgit v1.2.3 From f6d8fea9e3953151a4adb4f603503dc3dc9c69da Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Fri, 3 Apr 2026 18:18:58 +0000 Subject: rust_binder: Avoid holding lock when dropping delivered_death In 6c37bebd8c926, we switched to looping over the list and dropping each individual node, ostensibly without the lock held in the loop body. If the kernel were using Rust Edition 2024, the comment would be accurate, and the lock would not be held across the drop. However, the kernel is currently using 2021, so tail expression lifetime extension results in the lock being held across the drop. Explicitly binding the expression result to a variable makes the lockguard no longer part of a tail expression, causing the lock to be dropped before entering the loop body. This was detected via `CONFIG_PROVE_LOCKING` identifying an invalid wait context at the drop site. Reported-by: David Stevens Signed-off-by: Matthew Maurer Cc: stable Fixes: 6c37bebd8c92 ("rust_binder: avoid mem::take on delivered_deaths") Reviewed-by: Alice Ryhl Acked-by: Carlos Llamas Link: https://patch.msgid.link/20260403-lockhold-v1-1-c332b56cd8ae@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/process.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index 820cbd541435..96b8440ceac6 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -1402,7 +1402,12 @@ impl Process { // Clear delivered_deaths list. // // Scope ensures that MutexGuard is dropped while executing the body. - while let Some(delivered_death) = { self.inner.lock().delivered_deaths.pop_front() } { + while let Some(delivered_death) = { + // Explicitly bind to avoid tail expression lifetime extension of the lockguard + // Can be removed when the kernel moves to edition 2024 + let maybe_death = self.inner.lock().delivered_deaths.pop_front(); + maybe_death + } { drop(delivered_death); } -- cgit v1.2.3 From c2ff4764e03e7a8d758352f4aceb8fe1be6ac971 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 21 May 2026 15:30:11 +0800 Subject: arm64: tlb: Flush walk cache when unsharing PMD tables When huge_pmd_unshare() is called to unshare a PMD table, the tlb_unshare_pmd_ptdesc() function sets tlb->unshared_tables=true but the aarch64 tlb_flush() only checked tlb->freed_tables to determine whether to use TLBF_NONE (vae1is, invalidates walk cache) or TLBF_NOWALKCACHE (vale1is, leaf-only). This caused the stale PMD page table entry to remain in the walk cache after unshare, potentially leading to incorrect page table walks. Fix by including unshared_tables in the check, so that when unsharing tables, TLBF_NONE is used and the walk cache is properly invalidated. Here is the detailed distinction between vae1is and vale1is: | Instruction Combination | Actual Invalidation Scope | | ------------------------ | --------------------------------------------------| | `VAE1IS` + TTL=`0` | All entries at all levels (full invalidation) | | `VAE1IS` + TTL=`2` (L2) | Non-leaf at Level 0/1 + leaf at Level 2 | | `VALE1IS` + TTL=`0` | Leaf entries at all levels (non-leaf not cleared) | | `VALE1IS` + TTL=`2` (L2) | Leaf entry at Level 2 only | Signed-off-by: Zeng Heng Fixes: 8ce720d5bd91 ("mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather") Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 10869d7731b8..751bd57bc3ba 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -53,7 +53,8 @@ static inline int tlb_get_level(struct mmu_gather *tlb) static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); - tlbf_t flags = tlb->freed_tables ? TLBF_NONE : TLBF_NOWALKCACHE; + tlbf_t flags = (tlb->freed_tables || tlb->unshared_tables) ? + TLBF_NONE : TLBF_NOWALKCACHE; unsigned long stride = tlb_get_unmap_size(tlb); int tlb_level = tlb_get_level(tlb); -- cgit v1.2.3 From f74c8696f14149d5e43cc28b015326a759c48f00 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Tue, 5 May 2026 23:02:56 +0800 Subject: uio: uio_pci_generic_sva: fix double free of devm_kzalloc() memory uio_pci_sva allocates struct uio_pci_sva_dev with devm_kzalloc() in probe(), but then calls kfree(udev) both on the probe() error path (label out_free) and again in remove(). Because devm_kzalloc() allocations are devres-managed and are freed automatically when the device is detached (including after a failing probe() and during driver unbind), the explicit kfree() can lead to a double free. If probe() fails after devm_kzalloc(), the error path frees udev and devres cleanup will free it again when the core unwinds the partially bound device. On normal driver removal, remove() frees udev and devres will free it again when the device is detached. This issue was identified by a static analysis tool I developed and confirmed by manual review. Fix by removing the manual kfree() calls and dropping the now-unused label. Fixes: 3397c3cd859a2 ("uio: Add SVA support for PCI devices via uio_pci_generic_sva.c") Cc: stable Signed-off-by: Guangshuo Li Link: https://patch.msgid.link/20260505150256.614071-1-lgs201920130244@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_pci_generic_sva.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/uio/uio_pci_generic_sva.c b/drivers/uio/uio_pci_generic_sva.c index 4a46acd994a8..d05ef77f7e32 100644 --- a/drivers/uio/uio_pci_generic_sva.c +++ b/drivers/uio/uio_pci_generic_sva.c @@ -129,15 +129,13 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *id) ret = devm_uio_register_device(&pdev->dev, &udev->info); if (ret) { dev_err(&pdev->dev, "Failed to register uio device\n"); - goto out_free; + goto out_disable; } pci_set_drvdata(pdev, udev); return 0; -out_free: - kfree(udev); out_disable: pci_disable_device(pdev); @@ -146,11 +144,8 @@ out_disable: static void remove(struct pci_dev *pdev) { - struct uio_pci_sva_dev *udev = pci_get_drvdata(pdev); - pci_release_regions(pdev); pci_disable_device(pdev); - kfree(udev); } static ssize_t pasid_show(struct device *dev, -- cgit v1.2.3 From ef15ccbb3e8640a723c42ad90eaf81d66ae02017 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 5 May 2026 20:45:12 +0200 Subject: parport: Fix race between port and client registration The parport subsystem registers port devices before they are fully initialised, resulting in a race condition where client drivers such as lp can attach to ports that are not completely initialised or even being torn down. When the port and client drivers are built as modules and loaded around the same time during boot, this occasionally results in a crash. I was able to make this happen reliably in a VM with a PC-style parallel port by patching parport_pc to fail probing: > --- a/drivers/parport/parport_pc.c > +++ b/drivers/parport/parport_pc.c > @@ -2069,7 +2069,7 @@ static struct parport *__parport_pc_probe_port(unsigned long int base, > if (!p) > goto out3; > > - base_res = request_region(base, 3, p->name); > + base_res = NULL; > if (!base_res) > goto out4; > and then running: while true; do modprobe lp & modprobe parport_pc wait rmmod lp parport_pc done for a few seconds. In the long term I think port registration should be changed to put the call to device_add() inside parport_announce_port(), but since the latter currently cannot fail this will require changing all port drivers. For now, add a flag to indicate whether a port has been "announced" and only try to attach client drivers to ports when the flag is set. Fixes: 6fa45a226897 ("parport: add device-model to parport subsystem") Closes: https://bugs.debian.org/1130365 Closes: https://lore.kernel.org/all/6ba903ad-9897-42bb-8c2d-337385cc3746@molgen.mpg.de/ Cc: stable Signed-off-by: Ben Hutchings Acked-by: Sudip Mukherjee Link: https://patch.msgid.link/afo6uBv68GDevbMD@decadent.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/parport/share.c | 11 +++++++++-- include/linux/parport.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/parport/share.c b/drivers/parport/share.c index ba5292828703..eb0977ca1605 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -214,10 +214,14 @@ static void get_lowlevel_driver(void) static int port_check(struct device *dev, void *dev_drv) { struct parport_driver *drv = dev_drv; + struct parport *port; /* only send ports, do not send other devices connected to bus */ - if (is_parport(dev)) - drv->match_port(to_parport_dev(dev)); + if (is_parport(dev)) { + port = to_parport_dev(dev); + if (test_bit(PARPORT_ANNOUNCED, &port->devflags)) + drv->match_port(port); + } return 0; } @@ -532,6 +536,7 @@ void parport_announce_port(struct parport *port) if (slave) attach_driver_chain(slave); } + set_bit(PARPORT_ANNOUNCED, &port->devflags); mutex_unlock(®istration_lock); } EXPORT_SYMBOL(parport_announce_port); @@ -561,6 +566,8 @@ void parport_remove_port(struct parport *port) mutex_lock(®istration_lock); + clear_bit(PARPORT_ANNOUNCED, &port->devflags); + /* Spread the word. */ detach_driver_chain(port); diff --git a/include/linux/parport.h b/include/linux/parport.h index 464c2ad28039..f64cb0676e3b 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -240,6 +240,7 @@ struct parport { unsigned long devflags; #define PARPORT_DEVPROC_REGISTERED 0 +#define PARPORT_ANNOUNCED 1 struct pardevice *proc_device; /* Currently register proc device */ struct list_head full_list; -- cgit v1.2.3 From 2eae90a457baa0048a96ed38ad93090ee38c8b2f Mon Sep 17 00:00:00 2001 From: Hongling Zeng Date: Mon, 18 May 2026 10:29:39 +0800 Subject: gpib: cb7210: Fix region leak when request_irq fails When request_irq() fails, the region allocated by request_region() is not released. Fix this by adding an error handling path with proper goto labels to release the region. Fixes: e9dc69956d4d ("staging: gpib: Add Computer Boards GPIB driver") Closes: https://lore.kernel.org/oe-kbuild-all/202605160620.ReBOadPX-lkp@intel.com/ Signed-off-by: Hongling Zeng Cc: stable Link: https://patch.msgid.link/20260518022939.16881-1-zenghongling@kylinos.cn Signed-off-by: Greg Kroah-Hartman --- drivers/gpib/cb7210/cb7210.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpib/cb7210/cb7210.c b/drivers/gpib/cb7210/cb7210.c index 6dd8637c5964..673b5bfe2e7d 100644 --- a/drivers/gpib/cb7210/cb7210.c +++ b/drivers/gpib/cb7210/cb7210.c @@ -1049,7 +1049,8 @@ static int cb_isa_attach(struct gpib_board *board, const struct gpib_board_confi if (!request_region(config->ibbase, cb7210_iosize, DRV_NAME)) { dev_err(board->gpib_dev, "ioports starting at 0x%x are already in use\n", config->ibbase); - return -EBUSY; + retval = -EBUSY; + goto err_release_region; } nec_priv->iobase = config->ibbase; cb_priv->fifo_iobase = nec7210_iobase(cb_priv); @@ -1062,11 +1063,16 @@ static int cb_isa_attach(struct gpib_board *board, const struct gpib_board_confi // install interrupt handler if (request_irq(config->ibirq, cb7210_interrupt, isr_flags, DRV_NAME, board)) { dev_err(board->gpib_dev, "failed to obtain IRQ %d\n", config->ibirq); - return -EBUSY; + retval = -EBUSY; + goto err_release_region; } cb_priv->irq = config->ibirq; return cb7210_init(cb_priv, board); + +err_release_region: + release_region(nec7210_iobase(cb_priv), cb7210_iosize); + return retval; } static void cb_isa_detach(struct gpib_board *board) -- cgit v1.2.3 From 36770417153644bc88281c7284730ef1d14d8d3c Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 18 May 2026 15:34:05 +0800 Subject: misc: rp1: Send IACK on IRQ activate to fix kdump/kexec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a kexec/kdump reboot, the macb Ethernet controller fails to receive any packets, causing DHCP to hang indefinitely and the network interface to be unusable despite link being up. The root cause is that RP1's level-triggered MSI-X interrupt sources (such as macb on hwirq 6) may have their internal state machines stuck in the "waiting for IACK" state. This happens because the previous kernel crashed before sending the acknowledgment for a pending level interrupt. In this stuck state, RP1 will not generate new MSI-X writes even though the interrupt source remains asserted. Since no new MSI-X is sent, the GIC never sees a new edge, the chained IRQ handler is never invoked, and the interrupt is permanently lost. Fix this by sending MSIX_CFG_IACK in rp1_irq_activate(). This unconditionally resets the MSI-X state machine back to idle when a child device requests its interrupt. If the interrupt source is still asserted, RP1 will immediately issue a new MSI-X with the freshly configured msg_addr/msg_data, and normal interrupt delivery resumes. Writing IACK when the state machine is already idle (i.e., on a normal cold boot) is harmless — it has no effect. Fixes: 49d63971f963 ("misc: rp1: RaspberryPi RP1 misc driver") Cc: stable Signed-off-by: Xiaolei Wang Link: https://patch.msgid.link/20260518073405.2115003-1-xiaolei.wang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/rp1/rp1_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/rp1/rp1_pci.c b/drivers/misc/rp1/rp1_pci.c index d210da84c30a..81685e3f3296 100644 --- a/drivers/misc/rp1/rp1_pci.c +++ b/drivers/misc/rp1/rp1_pci.c @@ -143,6 +143,7 @@ static int rp1_irq_activate(struct irq_domain *d, struct irq_data *irqd, struct rp1_dev *rp1 = d->host_data; msix_cfg_set(rp1, (unsigned int)irqd->hwirq, MSIX_CFG_ENABLE); + msix_cfg_set(rp1, (unsigned int)irqd->hwirq, MSIX_CFG_IACK); return 0; } -- cgit v1.2.3 From 215c90ee656114f5e8c32408228d97082f8e0eef Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 6 May 2026 13:57:00 +0200 Subject: device property: set fwnode->secondary to NULL in fwnode_init() If a firmware node is allocated on the stack (for instance: temporary software node whose life-time we control) or on the heap - but using a non-zeroing allocation function - and initialized using fwnode_init(), its secondary pointer will contain uninitalized memory which likely will be neither NULL nor IS_ERR() and so may end up being dereferenced (for example: in dev_to_swnode()). Set fwnode->secondary to NULL on initialization. Cc: stable Fixes: 01bb86b380a3 ("driver core: Add fwnode_init()") Signed-off-by: Bartosz Golaszewski Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Andy Shevchenko Reviewed-by: Sakari Ailus Link: https://patch.msgid.link/20260506115701.23035-1-bartosz.golaszewski@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 80b38fbf2121..31df7608737e 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -208,6 +208,7 @@ struct fwnode_operations { static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) { + fwnode->secondary = NULL; fwnode->ops = ops; INIT_LIST_HEAD(&fwnode->consumers); INIT_LIST_HEAD(&fwnode->suppliers); -- cgit v1.2.3 From bed6e04be8e6b9133d8b16d5a42d0e0ce674fa9a Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Mon, 11 May 2026 10:43:14 -0400 Subject: netfilter: conntrack: tcp: do not force CLOSE on invalid-seq RST without direction check An unintended behavior in the TCP conntrack state machine allows a connection to be forced into the CLOSE state using an RST packet with an invalid sequence number. Specifically, after a SYN packet is observed, an RST with an invalid SEQ can transition the conntrack entry to TCP_CONNTRACK_CLOSE, regardless of whether the RST corresponds to the expected reply direction. The relevant code path assumes the RST is a response to an outgoing SYN, but does not validate packet direction or ensure that a matching SYN was actually sent in the opposite direction. As a result, a crafted packet sequence consisting of a SYN followed by an invalid-sequence RST can prematurely terminate an active NAT entry. This makes connection teardown easier than intended. So, tighten the state transition logic to ensure that RST-triggered CLOSE transitions only occur when the RST is a valid response to a previously observed SYN in the correct direction. Cc: stable@vger.kernel.org Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Hamza Mahfooz Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_proto_tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b67426c2189b..e99ab1e88e9f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1221,7 +1221,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, new_state = old_state; } if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) - && ct->proto.tcp.last_index == TCP_SYN_SET) + && ct->proto.tcp.last_index == TCP_SYN_SET + && ct->proto.tcp.last_dir != dir) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { -- cgit v1.2.3 From 92170e6afe927ab2792a3f71902845789c8e31b1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 19 May 2026 12:36:14 -0700 Subject: netfilter: synproxy: refresh tcphdr after skb_ensure_writable synproxy_tstamp_adjust() rewrites the TCP timestamp option in place and then patches the TCP checksum via inet_proto_csum_replace4() on the caller-supplied tcphdr pointer. Both ipv4_synproxy_hook() and ipv6_synproxy_hook() obtain that pointer with skb_header_pointer() before calling in, so it may either alias skb->head directly or point at the caller's on-stack _tcph buffer. Between obtaining the pointer and using it, the function calls skb_ensure_writable(skb, optend), which on a cloned or non-linear skb invokes pskb_expand_head() and frees the old skb->head. After that point the cached th is stale: caller (ipv[46]_synproxy_hook) th = skb_header_pointer(skb, ..., &_tcph) synproxy_tstamp_adjust(skb, protoff, th, ...) skb_ensure_writable(skb, optend) pskb_expand_head() /* kfree(old skb->head) */ ... inet_proto_csum_replace4(&th->check, ...) /* writes into freed head, or into the caller's stack copy leaving the on-wire checksum stale */ The option bytes are written through skb->data and are fine; only the checksum update goes through th and so lands in the wrong place. The result is either a write into freed slab memory or a packet leaving with a checksum that does not match its payload. Fix by re-deriving th from skb->data + protoff immediately after skb_ensure_writable() succeeds, so the subsequent checksum update targets the linear, writable header. Fixes: 48b1de4c110a ("netfilter: add SYNPROXY core/target") Assisted-by: kres (claude-opus-4-7) Signed-off-by: Chris Mason Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nf_synproxy_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 57f57e2fc80a..036c8586f49b 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -200,6 +200,8 @@ synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, if (skb_ensure_writable(skb, optend)) return 0; + th = (struct tcphdr *)(skb->data + protoff); + while (optoff < optend) { unsigned char *op = skb->data + optoff; -- cgit v1.2.3 From 47980b6dbf83961eec1c1363ea986e9c06ff8054 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 14 May 2026 14:21:57 +0200 Subject: netfilter: nf_conntrack_gre: fix gre keymap list corruption Quoting reporter: A race between GRE keymap insertion and destruction can corrupt the kernel list or use a freed object. `nf_ct_gre_keymap_add()` publishes a new keymap pointer before the embedded `list_head` is linked, while `nf_ct_gre_keymap_destroy()` can concurrently delete and free that same object. An unprivileged user can reach this through the PPTP conntrack helper by racing PPTP control messages or helper teardown, leading to KASAN-detectable list corruption/UAF in kernel context. ## Root Cause Analysis `exp_gre()` installs GRE expectations for a PPTP control flow and then adds two GRE keymap entries [..] The add path publishes `ct_pptp_info->keymap[dir]` before linking the embedded list node [..] Concurrent teardown deletes that partially initialized object. Make add/destroy symmetric: install both, destroy both while under lock. Furthermore, we should refuse to publish a new mapping in case ct is going away, else we may leak the allocation. The "retrans" detection is strange: existing mapping is checked for key equality with the new mapping, then for "is on the list" via list walk. But I can't see how an existing keymap entry can be NOT on list. Change this to only check if we're asked to map same tuple again -- if so, skip re-install, else signal failure. Last, add a bug trap for the keymap list; it has to be empty when namespace is going away. Reported-by: Leo Lin Signed-off-by: Florian Westphal --- include/linux/netfilter/nf_conntrack_proto_gre.h | 7 +- net/netfilter/nf_conntrack_core.c | 8 ++ net/netfilter/nf_conntrack_pptp.c | 8 +- net/netfilter/nf_conntrack_proto_gre.c | 106 +++++++++++++++++------ 4 files changed, 95 insertions(+), 34 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 9ee7014400e8..ad5563f0f864 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -18,9 +18,10 @@ struct nf_ct_gre_keymap { struct rcu_head rcu; }; -/* add new tuple->key_reply pair to keymap */ -int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, - struct nf_conntrack_tuple *t); +/* add tuple->key_reply pairs to keymap */ +bool nf_ct_gre_keymap_add(struct nf_conn *ct, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8ba5b22a1eef..b521b5ebd664 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -568,6 +568,13 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } +static void warn_on_keymap_list_leak(const struct net *net) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + WARN_ON_ONCE(!list_empty(&net->ct.nf_ct_proto.gre.keymap_list)); +#endif +} + void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; @@ -2510,6 +2517,7 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { + warn_on_keymap_list_leak(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 4c679638df06..dc23e4181618 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -225,13 +225,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) if (nf_ct_expect_related(exp_reply, 0) != 0) goto out_unexpect_orig; - /* Add GRE keymap entries */ - if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0) + if (!nf_ct_gre_keymap_add(ct, &exp_orig->tuple, + &exp_reply->tuple)) goto out_unexpect_both; - if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) { - nf_ct_gre_keymap_destroy(ct); - goto out_unexpect_both; - } ret = 0; out_put_both: diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 94c19bc4edc5..35e22082d65a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -87,41 +87,97 @@ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) return key; } -/* add a single keymap entry, associate with specified master ct */ -int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, - struct nf_conntrack_tuple *t) +enum nf_ct_gre_km_act { + NF_CT_GRE_KM_NEW, + NF_CT_GRE_KM_BAD, + NF_CT_GRE_KM_DUP +}; + +static enum nf_ct_gre_km_act +nf_ct_gre_km_acceptable(const struct nf_ct_pptp_master *ct_pptp_info, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) +{ + struct nf_ct_gre_keymap *km_orig, *km_repl; + + lockdep_assert_held(&keymap_lock); + + km_orig = ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL]; + km_repl = ct_pptp_info->keymap[IP_CT_DIR_REPLY]; + + if (km_orig && km_repl) { + if (!gre_key_cmpfn(km_orig, orig)) + return NF_CT_GRE_KM_BAD; + + if (!gre_key_cmpfn(km_repl, repl)) + return NF_CT_GRE_KM_BAD; + + return NF_CT_GRE_KM_DUP; + } + + DEBUG_NET_WARN_ON_ONCE(km_orig); + DEBUG_NET_WARN_ON_ONCE(km_repl); + return NF_CT_GRE_KM_NEW; +} + +/* add keymap entries, associate with specified master ct */ +bool nf_ct_gre_keymap_add(struct nf_conn *ct, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) { struct net *net = nf_ct_net(ct); struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); - struct nf_ct_gre_keymap **kmp, *km; - - kmp = &ct_pptp_info->keymap[dir]; - if (*kmp) { - /* check whether it's a retransmission */ - list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) - return 0; - } - pr_debug("trying to override keymap_%s for ct %p\n", - dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); - return -EEXIST; - } + struct nf_ct_gre_keymap *km_orig, *km_repl; + bool ret = false; - km = kmalloc_obj(*km, GFP_ATOMIC); - if (!km) - return -ENOMEM; - memcpy(&km->tuple, t, sizeof(*t)); - *kmp = km; + km_orig = kmalloc_obj(*km_orig, GFP_ATOMIC); + if (!km_orig) + return false; + km_repl = kmalloc_obj(*km_repl, GFP_ATOMIC); + if (!km_repl) + goto km_free; - pr_debug("adding new entry %p: ", km); - nf_ct_dump_tuple(&km->tuple); + memcpy(&km_orig->tuple, orig, sizeof(*orig)); + memcpy(&km_repl->tuple, repl, sizeof(*repl)); spin_lock_bh(&keymap_lock); - list_add_tail(&km->list, &net_gre->keymap_list); + if (nf_ct_is_dying(ct)) + goto unlock_free; + + switch (nf_ct_gre_km_acceptable(ct_pptp_info, orig, repl)) { + case NF_CT_GRE_KM_NEW: + break; + case NF_CT_GRE_KM_DUP: + ret = true; + goto unlock_free; + case NF_CT_GRE_KM_BAD: + pr_debug("trying to override keymap for ct %p\n", ct); + goto unlock_free; + } + + if (ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL] || + ct_pptp_info->keymap[IP_CT_DIR_REPLY]) + goto unlock_free; + + pr_debug("adding new entries %p,%p: ", km_orig, km_repl); + nf_ct_dump_tuple(&km_orig->tuple); + nf_ct_dump_tuple(&km_repl->tuple); + + list_add_tail_rcu(&km_orig->list, &net_gre->keymap_list); + list_add_tail_rcu(&km_repl->list, &net_gre->keymap_list); + ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL] = km_orig; + ct_pptp_info->keymap[IP_CT_DIR_REPLY] = km_repl; spin_unlock_bh(&keymap_lock); - return 0; + return true; + +unlock_free: + spin_unlock_bh(&keymap_lock); +km_free: + kfree(km_orig); + kfree(km_repl); + return ret; } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); -- cgit v1.2.3 From c376f07e16c02239ed44cabb97145d03f65b4d15 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 May 2026 20:10:08 +0200 Subject: netfilter: xt_cpu: prefer raw_smp_processor_id With PREEMPT_RCU we get splat: BUG: using smp_processor_id() in preemptible [..] caller is cpu_mt+0x53/0xd0 net/netfilter/xt_cpu.c:37 CPU: 1 .. Comm: syz.3.1377 #0 PREEMPT(full) Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 check_preemption_disabled+0xd3/0xe0 lib/smp_processor_id.c:47 cpu_mt+0x53/0xd0 net/netfilter/xt_cpu.c:37 [..] Just use raw version instead. This is similar to 14d14a5d2957 ("netfilter: nft_meta: use raw_smp_processor_id()"). Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Reported-by: syzbot+690d3e3ffa7335ac10eb@syzkaller.appspotmail.com Signed-off-by: Florian Westphal --- net/netfilter/xt_cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index 3bdc302a0f91..9cb259902a58 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c @@ -34,7 +34,7 @@ static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cpu_info *info = par->matchinfo; - return (info->cpu == smp_processor_id()) ^ info->invert; + return (info->cpu == raw_smp_processor_id()) ^ info->invert; } static struct xt_match cpu_mt_reg __read_mostly = { -- cgit v1.2.3 From 968cc2c96390f06e56ed6a43f935bfebdefed28f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 16 May 2026 23:23:21 +0800 Subject: netfilter: disable payload mangling in userns Several parts of network stack rely on iph->ihl validation done by network stack before PRE_ROUTING. Disable this feature for user namespaces for now. tcp option handling is likely safe even for LOCAL_IN, so this this leaves tcp option mangling via nft_exthdr.c as-is. I don't think these are the only means to alter packets, but these appear to be relatively prominent. This could be relaxed later. Example: - allow userns for ingress hook. - allow userns if base is transport header. Also, we should revalidate or restrict generally: - Don't allow linklayer writes to spill into network header - restrict ipv4 and ipv6 to 'known safe' writes, e.g. saddr/daddr/check/tos Reported-by: Qi Tang Reported-by: Tong Liu Tested-by: Qi Tang Link: https://lore.kernel.org/netfilter-devel/20260515100411.3141-1-fw@strlen.de/ Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_queue.c | 6 ++++-- net/netfilter/nft_payload.c | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 984a0eb9e149..60ab88d45096 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1141,6 +1141,9 @@ nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int di { struct sk_buff *nskb; + if (e->state.net->user_ns != &init_user_ns) + return -EPERM; + if (diff < 0) { unsigned int min_len = skb_transport_offset(e->skb); @@ -1537,8 +1540,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), payload_len, entry, diff) < 0) verdict = NF_DROP; - - if (ct && diff) + else if (ct && diff) nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff); } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 01e13e5255a9..484a5490832e 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -917,6 +917,9 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, struct nft_payload_set *priv = nft_expr_priv(expr); int err; + if (ctx->net->user_ns != &init_user_ns) + return -EPERM; + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); -- cgit v1.2.3 From f438d1786d657d57790c5d138d6db3fc9fdac392 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 May 2026 22:52:07 +0200 Subject: netfilter: ebtables: fix OOB read in compat_mtw_from_user Luxiao Xu says: The function compat_mtw_from_user() converts ebtables extensions from 32-bit user structures to kernel native structures. However, it lacks proper validation of the user-supplied match_size/target_size. When certain extensions are processed, the kernel-side translation logic may perform memory accesses based on the extension's expected size. If the user provides a size smaller than what the extension requires, it results in an out-of-bounds read as reported by KASAN. This fix introduces a check to ensure match_size is at least as large as the extension's required compatsize. This covers matches, watchers, and targets, while maintaining compatibility with standard targets. AFAIU this is relevant for matches that need to go though match->compat_from_user() call. Those that use plain memcpy with the user-provided size are ok because the caller checks that size vs the start of the next rule entry offset (which itself is checked vs. total size copied from userspace). The ->compat_from_user() callbacks assume they can read compatsize bytes, so they need this extra check. Based on an earlier patch from Luxiao Xu. Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support") Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Signed-off-by: Luxiao Xu Signed-off-by: Ren Wei Reviewed-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/bridge/netfilter/ebtables.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index b9f4daac09af..8a6a069329d2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1956,6 +1956,25 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; +static bool match_size_ok(const struct xt_match *match, unsigned int match_size) +{ + u16 csize; + + if (match->matchsize == -1) /* cannot validate ebt_among */ + return true; + + csize = match->compatsize ? : match->matchsize; + + return match_size >= csize; +} + +static bool tgt_size_ok(const struct xt_target *tgt, unsigned int tgt_size) +{ + u16 csize = tgt->compatsize ? : tgt->targetsize; + + return tgt_size >= csize; +} + static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, @@ -1981,6 +2000,11 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, if (IS_ERR(match)) return PTR_ERR(match); + if (!match_size_ok(match, match_size)) { + module_put(match->me); + return -EINVAL; + } + off = ebt_compat_match_offset(match, match_size); if (dst) { if (match->compat_from_user) @@ -2000,6 +2024,12 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, mwt->u.revision); if (IS_ERR(wt)) return PTR_ERR(wt); + + if (!tgt_size_ok(wt, match_size)) { + module_put(wt->me); + return -EINVAL; + } + off = xt_compat_target_offset(wt); if (dst) { -- cgit v1.2.3 From 1d001b0a6182b0d2f41a8d687f7522b6f1e94280 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 20 May 2026 10:34:09 +0800 Subject: netfilter: nft_fib_ipv6: walk fib6_siblings under RCU nft_fib6_info_nh_uses_dev() runs from nft_fib6_eval() in softirq under rcu_read_lock(). fib6_siblings is modified by writers that hold tb6_lock but do not wait for RCU readers, so the sibling walk should use list_for_each_entry_rcu(): it adds READ_ONCE() on the ->next pointer and lets CONFIG_PROVE_RCU_LIST validate the locking. No functional change for non-debug builds. Fixes: 1c32b24c234b ("netfilter: nft_fib_ipv6: switch to fib6_lookup") Signed-off-by: Jiayuan Chen Signed-off-by: Florian Westphal --- net/ipv6/netfilter/nft_fib_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 8b2dba88ee96..5e192a446ec8 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -170,7 +170,7 @@ static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, if (nft_fib6_info_nh_dev_match(nh_dev, dev)) return true; - list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + list_for_each_entry_rcu(iter, &rt->fib6_siblings, fib6_siblings) { nh_dev = fib6_info_nh_dev(iter); if (nft_fib6_info_nh_dev_match(nh_dev, dev)) -- cgit v1.2.3 From f81b0c2d281faa93e4c2b7247047922aaf3e4ba6 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 20 May 2026 10:34:10 +0800 Subject: netfilter: nft_fib_ipv6: handle routes via external nexthop fib6_info has a union: union { struct list_head fib6_siblings; struct list_head nh_list; }; Old-style multipath (ip -6 route add ... nexthop ... nexthop ...) uses fib6_siblings. External nexthop (ip -6 route add ... nhid N) uses nh_list, linked into &nh->f6i_list. nft_fib6_info_nh_uses_dev() blindly walks &rt->fib6_siblings, causing an OOB read past the struct nexthop slab when rt->nh is set: ================================================================== BUG: KASAN: slab-out-of-bounds in nft_fib6_eval+0x1362/0x16c0 Read of size 8 at addr ffff888103a099d0 by task ping/386 CPU: 2 UID: 0 PID: 386 Comm: ping Not tainted 7.1.0-rc3+ #251 PREEMPT Call Trace: dump_stack_lvl+0x76/0xa0 print_report+0xd1/0x5f0 kasan_report+0xe7/0x130 __asan_report_load8_noabort+0x14/0x30 nft_fib6_eval+0x1362/0x16c0 nft_do_chain+0x279/0x18c0 nft_do_chain_ipv6+0x1a8/0x230 nf_hook_slow+0xad/0x200 ipv6_rcv+0x152/0x380 __netif_receive_skb_one_core+0x118/0x1c0 ================================================================== Branch by route shape: when rt->nh is set, walk via nexthop_for_each_fib6_nh() (also covers nh groups, which the original code missed); otherwise walk fib6_siblings, guarded by READ_ONCE() of rt->fib6_nsiblings as required by commit 31d7d67ba127 ("ipv6: annotate data-races around rt->fib6_nsiblings"). Fixes: 1c32b24c234b ("netfilter: nft_fib_ipv6: switch to fib6_lookup") Signed-off-by: Jiayuan Chen Signed-off-by: Florian Westphal --- net/ipv6/netfilter/nft_fib_ipv6.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 5e192a446ec8..c0a0075e2590 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -160,16 +160,32 @@ static bool nft_fib6_info_nh_dev_match(const struct net_device *nh_dev, l3mdev_master_ifindex_rcu(nh_dev) == dev->ifindex; } +static int nft_fib6_nh_match_dev_cb(struct fib6_nh *nh, void *arg) +{ + const struct net_device *dev = arg; + + return nft_fib6_info_nh_dev_match(nh->fib_nh_dev, dev); +} + static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, const struct net_device *dev) { const struct net_device *nh_dev; struct fib6_info *iter; + /* External nexthop: fib6_siblings slot aliases nh_list, walk via nh. */ + if (rt->nh) + return nexthop_for_each_fib6_nh(rt->nh, + nft_fib6_nh_match_dev_cb, + (void *)dev); + nh_dev = fib6_info_nh_dev(rt); if (nft_fib6_info_nh_dev_match(nh_dev, dev)) return true; + if (!READ_ONCE(rt->fib6_nsiblings)) + return false; + list_for_each_entry_rcu(iter, &rt->fib6_siblings, fib6_siblings) { nh_dev = fib6_info_nh_dev(iter); -- cgit v1.2.3 From a40aaaef2f8f5a17a779eeac7032f2f7d5322406 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 20 May 2026 10:34:11 +0800 Subject: selftests: netfilter: add nft_fib_nexthop test Functional coverage of nft_fib6_eval()'s nexthop enumeration over three route shapes: 1) single external nexthop (nhid) 2) external nexthop group (nhid -> group) 3) old-style multipath (nexthop ... nexthop ...) Each scenario places one nexthop on the input device (veth0). For (2) and (3) the matching nexthop is the second member, so the walk has to traverse beyond the primary nh. Two nft counters on prerouting verify the data path: one increments only when fib reports veth0 as the oif, the other counts "missing" results and must stay at zero. ./nft_fib_nexthop.sh PASS: single external nexthop (nhid -> veth0) PASS: nexthop group (dummy0 + veth0) PASS: old-style multipath (sibling on veth0) Suggested-by: Florian Westphal Signed-off-by: Jiayuan Chen Signed-off-by: Florian Westphal --- tools/testing/selftests/net/netfilter/Makefile | 1 + .../selftests/net/netfilter/nft_fib_nexthop.sh | 152 +++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100755 tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index ee2d1a5254f8..d953ee218c0f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -26,6 +26,7 @@ TEST_PROGS := \ nft_concat_range.sh \ nft_conntrack_helper.sh \ nft_fib.sh \ + nft_fib_nexthop.sh \ nft_flowtable.sh \ nft_interface_stress.sh \ nft_meta.sh \ diff --git a/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh b/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh new file mode 100755 index 000000000000..c4f203057382 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_fib_nexthop.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2154 +# +# Exercise nft_fib6_eval()'s sibling/nh enumeration on three route shapes: +# 1) route via a single external nexthop (nhid) +# 2) route via an external nexthop group (nhid -> group, two members) +# 3) route via old-style multipath (nexthop ... nexthop ...) +# +# In each scenario the route's nexthop set contains veth0 (the iif of the +# test packet). nft_fib6_info_nh_uses_dev() must walk the set and report +# veth0 as a valid oif. For (2) and (3) the matching nexthop is the second +# member, so the walk has to traverse beyond the primary nh. +# +# After sending $PKTS ICMPv6 echo requests from ns1, check two counters on +# nsrouter: +# nf_ok -- `fib daddr . iif oif eq "veth0"` must equal $PKTS +# nf_bad -- `fib daddr . iif oif missing` must stay at 0 +# Both rules also match on iif veth0 and ip6 daddr dead:dead::/64 so that +# kernel-generated ND/MLD/RA traffic cannot pollute the counters. +# +# Topology similar to nft_fib.sh, without ns2; two dummy interfaces on +# nsrouter host extra nh devices: +# +# dead:1::99 dead:1::1 +# ns1 <----veth----> nsrouter --- dummy0 dead:2::1 +# \-- dummy1 dead:9::1 + +source lib.sh + +ret=0 +PKTS=3 + +checktool "nft --version" "run test without nft" +checktool "ip -V" "run test without iproute2" + +setup_ns nsrouter ns1 +trap cleanup_all_ns EXIT + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" \ + > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net "$ns1" link set lo up +ip -net "$ns1" link set eth0 up +ip -net "$ns1" -6 addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" link set lo up +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" -6 addr add dead:1::1/64 dev veth0 nodad + +if ! ip -net "$nsrouter" link add dummy0 type dummy 2>/dev/null; then + echo "SKIP: dummy netdev not available" + exit $ksft_skip +fi +ip -net "$nsrouter" link set dummy0 up +ip -net "$nsrouter" -6 addr add dead:2::1/64 dev dummy0 nodad + +ip -net "$nsrouter" link add dummy1 type dummy +ip -net "$nsrouter" link set dummy1 up +ip -net "$nsrouter" -6 addr add dead:9::1/64 dev dummy1 nodad + +ip netns exec "$nsrouter" sysctl -q net.ipv6.conf.all.forwarding=1 + +load_fib_rule() { + # filter on iif + daddr so the counters only see our test packets + ip netns exec "$nsrouter" nft -f /dev/stdin <&2 + ip netns exec "$nsrouter" nft list counter ip6 t "$counter" 1>&2 +} + +run_scenario() { + local what="$1"; shift + # counter output format is "packets PACKET_NUM bytes BYTES_NUM"; + # we only care about the packet count + local expect_ok="packets $PKTS bytes" + local expect_bad="packets 0 bytes" + local lret=0 + + # reset route + nexthop state between scenarios + ip -net "$nsrouter" -6 route del dead:dead::/64 > /dev/null 2>&1 || true + ip -net "$nsrouter" nexthop flush > /dev/null 2>&1 || true + + # run the scenario function passed by the caller + "$@" || echo "WARN ($what): scenario setup returned non-zero" + + load_fib_rule || { echo "FAIL ($what): nft load"; ret=1; return; } + + # ping a daddr inside dead:dead::/64 so fib has to walk the nh set + ip netns exec "$ns1" ping -6 -c "$PKTS" -i 0.1 -W 1 dead:dead::1 \ + > /dev/null 2>&1 || true + + # verify the packets went through the expected fib path + if ! ip netns exec "$nsrouter" nft list counter ip6 t nf_ok | grep -q "$expect_ok"; then + bad_counter nf_ok "$expect_ok" "$what" + lret=1 + fi + if ! ip netns exec "$nsrouter" nft list counter ip6 t nf_bad | grep -q "$expect_bad"; then + bad_counter nf_bad "$expect_bad" "$what" + lret=1 + fi + + if [ $lret -eq 0 ]; then + echo "PASS: $what" + else + ret=1 + fi +} + +scenario_single_nh() { + ip -net "$nsrouter" nexthop add id 1 via dead:1::99 dev veth0 + ip -net "$nsrouter" -6 route add dead:dead::/64 nhid 1 +} +run_scenario "single external nexthop (nhid -> veth0)" scenario_single_nh + +scenario_nh_group() { + ip -net "$nsrouter" nexthop add id 1 via dead:2::2 dev dummy0 + ip -net "$nsrouter" nexthop add id 2 via dead:1::99 dev veth0 + ip -net "$nsrouter" nexthop add id 100 group 1/2 + ip -net "$nsrouter" -6 route add dead:dead::/64 nhid 100 +} +run_scenario "nexthop group (dummy0 + veth0)" scenario_nh_group + +scenario_old_multipath() { + ip -net "$nsrouter" -6 route add dead:dead::/64 \ + nexthop via dead:2::2 dev dummy0 \ + nexthop via dead:1::99 dev veth0 +} +run_scenario "old-style multipath (sibling on veth0)" scenario_old_multipath + +exit $ret -- cgit v1.2.3 From 18014147d3ee7831dce53fe65d7fc8d428b02552 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 11 May 2026 16:37:56 +0200 Subject: netfilter: nf_tables: fix dst corruption in same register operation For lshift and rshift, the shift operations are performed in a loop over 32-bit words. The loop calculates the shifted value and write it to dst, and then immediately reads from src to calculate the carry for the next iteration. Because src and dst could point to the same memory location, the carry is incorrectly calculated using the newly modified dst value instead of the original src value. Adding a temporary local variable to cache the original value before writing to dst and using it for the carry calculation solves the problem. In addition, partial overlap is rejected from control plane for all kind of operations including byteorder. This was tested with the following bytecode: table test_table ip flags 0 use 1 handle 1 ip test_table test_chain use 3 type filter hook input prio 0 policy accept packets 0 bytes 0 flags 1 ip test_table test_chain 2 [ immediate reg 1 0x44332211 0x88776655 ] [ bitwise reg 1 = ( reg 1 << 0x08000000 ) ] [ cmp eq reg 1 0x66443322 0x00887766 ] [ counter pkts 0 bytes 0 ] ip test_table test_chain 4 3 [ immediate reg 1 0x44332211 0x88776655 ] [ bitwise reg 1 = ( reg 1 << 0x08000000 ) ] [ cmp eq reg 1 0x55443322 0x00887766 ] [ counter pkts 21794 bytes 1917798 ] Fixes: 567d746b55bc ("netfilter: bitwise: add support for shifts.") Acked-by: Jeremy Sowden Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 7 +++++++ net/netfilter/nft_bitwise.c | 18 ++++++++++++++---- net/netfilter/nft_byteorder.c | 13 ++++++++++--- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cff7b773e972..9d844354c4d9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -180,6 +180,13 @@ static inline u64 nft_reg_load64(const u32 *sreg) return get_unaligned((u64 *)sreg); } +static inline bool nft_reg_overlap(u8 src, u8 dst, u32 len) +{ + unsigned int n = DIV_ROUND_UP(len, sizeof(u32)); + + return src != dst && src < dst + n && dst < src + n; +} + static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 94dccdcfa06b..785b8e9731d1 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -43,8 +43,10 @@ static void nft_bitwise_eval_lshift(u32 *dst, const u32 *src, u32 carry = 0; for (i = DIV_ROUND_UP(priv->len, sizeof(u32)); i > 0; i--) { - dst[i - 1] = (src[i - 1] << shift) | carry; - carry = src[i - 1] >> (BITS_PER_TYPE(u32) - shift); + u32 tmp_src = src[i - 1]; + + dst[i - 1] = (tmp_src << shift) | carry; + carry = tmp_src >> (BITS_PER_TYPE(u32) - shift); } } @@ -56,8 +58,10 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src, u32 carry = 0; for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++) { - dst[i] = carry | (src[i] >> shift); - carry = src[i] << (BITS_PER_TYPE(u32) - shift); + u32 tmp_src = src[i]; + + dst[i] = carry | (tmp_src >> shift); + carry = tmp_src << (BITS_PER_TYPE(u32) - shift); } } @@ -235,6 +239,9 @@ static int nft_bitwise_init_bool(const struct nft_ctx *ctx, &priv->sreg2, priv->len); if (err < 0) return err; + + if (nft_reg_overlap(priv->sreg2, priv->dreg, priv->len)) + return -EINVAL; } return 0; @@ -265,6 +272,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + return -EINVAL; + if (tb[NFTA_BITWISE_OP]) { priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])); switch (priv->op) { diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e00dddfa2fc0..2316c77f4228 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -144,9 +144,16 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, if (err < 0) return err; - return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], - &priv->dreg, NULL, NFT_DATA_VALUE, - priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); + if (err < 0) + return err; + + if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + return -EINVAL; + + return 0; } static int nft_byteorder_dump(struct sk_buff *skb, -- cgit v1.2.3 From 654ddf855bebd8d45a6e707f5dc2344921f5e0cf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 May 2026 22:28:01 +0200 Subject: platform/x86: bitland-mifs-wmi: add CONFIG_LEDS_CLASS dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The newly added driver requires the LED classdev support and causes a link failure when that is disabled: x86_64-linux-ld: vmlinux.o: in function `bitland_mifs_wmi_probe': bitland-mifs-wmi.c:(.text+0xede02a): undefined reference to `devm_led_classdev_register_ext' Fixes: dc1ec4fa86b2 ("platform/x86: bitland-mifs-wmi: Add new Bitland MIFS WMI driver") Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260519202804.1339581-1-arnd@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 2ffa4ecf65b0..7a4956088300 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -118,6 +118,7 @@ config BITLAND_MIFS_WMI depends on ACPI_WMI depends on HWMON depends on INPUT + depends on LEDS_CLASS depends on POWER_SUPPLY select ACPI_PLATFORM_PROFILE select INPUT_SPARSEKMAP -- cgit v1.2.3 From f6982769910ecddabdb5b8b9afdab0bb8b6668ac Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 22 May 2026 20:56:22 +0900 Subject: block: avoid use-after-free in disk_free_zone_resources() The function disk_update_zone_resources() may call disk_free_zone_resources() in case of error, and following this, blk_revalidate_disk_zones() will again calls disk_free_zone_resources() if disk_update_zone_resources() failed. If a zone worker thread is being used (which is the default for a rotational media zoned device), disk_free_zone_resources() will try to stop the zone worker thread twice because disk->zone_wplugs_worker is not reset to NULL when the worker thread is stopped the first time. In disk_free_zone_resources(), fix this by correctly clearing disk->zone_wplugs_worker to NULL when the worker thread is stopped. And while at it, since disk_free_zone_resources() is always called after a failed call to disk_update_zone_resources(), remove the unnecessary call to disk_free_zone_resources() in disk_update_zone_resources(). Fixes: 1365b6904fd0 ("block: allow submitting all zone writes from a single context") Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260522115622.588535-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 42ef830054dc..6a221c180889 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -2001,8 +2001,10 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond) void disk_free_zone_resources(struct gendisk *disk) { - if (disk->zone_wplugs_worker) + if (disk->zone_wplugs_worker) { kthread_stop(disk->zone_wplugs_worker); + disk->zone_wplugs_worker = NULL; + } WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list)); if (disk->zone_wplugs_wq) { @@ -2135,9 +2137,6 @@ commit: ret = queue_limits_commit_update(q, &lim); unfreeze: - if (ret) - disk_free_zone_resources(disk); - blk_mq_unfreeze_queue(q, memflags); return ret; -- cgit v1.2.3 From e1a9d791fd66ab2431b9e6f6f835823809869047 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 22 May 2026 12:16:21 +0200 Subject: USB: serial: cypress_m8: fix memory corruption with small endpoint Make sure that the interrupt-out endpoint max packet size is at least eight bytes to avoid user-controlled slab corruption or NULL-pointer dereference should a malicious device report a smaller size. Fixes: 3416eaa1f8f8 ("USB: cypress_m8: Packet format is separate from characteristic size") Cc: stable@vger.kernel.org # 2.6.26 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/cypress_m8.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index afff1a0f4298..0b8a4e9d7bc5 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -445,6 +445,14 @@ static int cypress_generic_port_probe(struct usb_serial_port *port) return -ENODEV; } + /* + * The buffer must be large enough for the one or two-byte header (and + * following data), but assume anything smaller than eight bytes is + * broken. + */ + if (port->interrupt_out_size < 8) + return -EINVAL; + priv = kzalloc_obj(struct cypress_private); if (!priv) return -ENOMEM; -- cgit v1.2.3 From f4feb1e20058e407cb00f45aff47f5b7e19a6bbf Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Wed, 20 May 2026 09:00:21 -0700 Subject: tun: free page on short-frame rejection in tun_xdp_one() tun_xdp_one() returns -EINVAL on a frame shorter than ETH_HLEN without freeing the page that vhost_net_build_xdp() allocated for it. tun_sendmsg() discards that -EINVAL and still returns total_len, so vhost_tx_batch() takes the success path and never frees the page; each short frame in a batch leaks one page-frag chunk. A local process that can open /dev/net/tun and /dev/vhost-net can hit this path: it attaches a tun/tap device as the vhost-net backend and feeds TX descriptors whose length minus the virtio-net header is below ETH_HLEN. Each kick leaks the page-frag chunks for that batch, and a tight submission loop exhausts host memory and triggers an OOM panic. Free the page before returning -EINVAL, matching the XDP-program error path in the same function. Fixes: 049584807f1d ("tun: add missing verification for short frame") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Dongli Zhang Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260520160020.375349-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b183189f1853..f594360d66d6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2394,8 +2394,10 @@ static int tun_xdp_one(struct tun_struct *tun, bool skb_xdp = false; struct page *page; - if (unlikely(datasize < ETH_HLEN)) + if (unlikely(datasize < ETH_HLEN)) { + put_page(virt_to_head_page(xdp->data)); return -EINVAL; + } xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { -- cgit v1.2.3 From dab48a7e74e6a394f3aa0461a2b1fb0c7b38fcb8 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Fri, 22 May 2026 10:54:04 +0200 Subject: Input: ims-pcu - fix usb_free_coherent() size in ims_pcu_buffers_free() The input buffer size is pcu->max_in_size, but pcu->max_out_size is passed to usb_free_coherent(). Change size to match the allocation size. Fixes: 628329d52474 ("Input: add IMS Passenger Control Unit driver") Cc: stable@vger.kernel.org Signed-off-by: Thomas Fourier Link: https://patch.msgid.link/20260522085412.45430-2-fourier.thomas@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ims-pcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 4c022a36dbe8..7a1cb9333f53 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1624,7 +1624,7 @@ static void ims_pcu_buffers_free(struct ims_pcu *pcu) usb_kill_urb(pcu->urb_in); usb_free_urb(pcu->urb_in); - usb_free_coherent(pcu->udev, pcu->max_out_size, + usb_free_coherent(pcu->udev, pcu->max_in_size, pcu->urb_in_buf, pcu->read_dma); kfree(pcu->urb_out_buf); -- cgit v1.2.3 From 3bcf7aec6a9d16438f2cec29f5d7c8d5b8edf9b2 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 21 May 2026 09:32:31 -0700 Subject: tap: free page on error paths in tap_get_user_xdp() tap_get_user_xdp() rejects a frame shorter than ETH_HLEN with -EINVAL, and returns -ENOMEM when build_skb() fails. Both paths jump to the err label without freeing the page that vhost_net_build_xdp() allocated for the frame. tap_sendmsg() discards the per-buffer return value and always returns 0, so vhost_tx_batch() takes the success path and never frees the page; each rejected frame in a batch leaks one page-frag chunk. Free the page on both error paths, before the skb is built. This is the tap counterpart of the same leak in tun_xdp_one(). Fixes: 0efac27791ee ("tap: accept an array of XDP buffs through sendmsg()") Fixes: ed7f2afdd0e0 ("tap: add missing verification for short frame") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Dongli Zhang Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260521163230.1478627-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index a590e07ce0a9..fae115915c8e 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1052,6 +1052,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) int err, depth; if (unlikely(xdp->data_end - xdp->data < ETH_HLEN)) { + put_page(virt_to_head_page(xdp->data)); err = -EINVAL; goto err; } @@ -1061,6 +1062,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { + put_page(virt_to_head_page(xdp->data)); err = -ENOMEM; goto err; } -- cgit v1.2.3 From aa8963fdce667a42fb7f0bdd2909fadcab02f9a8 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 21 May 2026 09:33:13 -0700 Subject: tun: free page on build_skb failure in tun_xdp_one() When build_skb() fails in tun_xdp_one(), the function sets ret to -ENOMEM and jumps to the out label, which returns without freeing the page that vhost_net_build_xdp() allocated for the frame. As with the short-frame rejection path, tun_sendmsg() discards the per-buffer error and still returns total_len, so vhost_tx_batch() takes the success path and never frees the page. Each build_skb() failure in a batch leaks one page-frag chunk. Free the page before taking the error path, matching the put_page() the other error exits of tun_xdp_one() already perform. Fixes: 043d222f93ab ("tuntap: accept an array of XDP buffs through sendmsg()") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Dongli Zhang Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260521163312.1479805-2-bestswngs@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f594360d66d6..9e7744eb57a3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2439,6 +2439,7 @@ static int tun_xdp_one(struct tun_struct *tun, build: skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { + put_page(virt_to_head_page(xdp->data)); ret = -ENOMEM; goto out; } -- cgit v1.2.3 From aae9d8a5528b8ee9ff8dc5d3558b8a9f852a724a Mon Sep 17 00:00:00 2001 From: Ziyu Zhang Date: Wed, 20 May 2026 00:56:36 +0800 Subject: vsock: keep poll shutdown state consistent vsock_poll() reads vsk->peer_shutdown before taking the socket lock to set EPOLLHUP and EPOLLRDHUP, then reads it again after taking the lock to report EOF readability. A shutdown packet can update peer_shutdown while poll is waiting for the lock, so one poll invocation can report EOF readability without the corresponding HUP/RDHUP bits. For connectible sockets, take one peer_shutdown snapshot after lock_sock() and use it for all peer-shutdown-derived poll bits. For datagram sockets, which do not take lock_sock() in poll(), take one lockless READ_ONCE() snapshot and pair it with WRITE_ONCE() on the writer side. This keeps the peer-shutdown-derived bits internally consistent for each poll pass. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Signed-off-by: Ziyu Zhang Link: https://patch.msgid.link/20260519165636.62542-1-ziyuzhang201@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 49 +++++++++++++++++++++------------ net/vmw_vsock/hyperv_transport.c | 9 ++++-- net/vmw_vsock/virtio_transport_common.c | 14 ++++++---- net/vmw_vsock/vmci_transport.c | 8 ++++-- 4 files changed, 52 insertions(+), 28 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 44037b066a5f..2ce1063d4a67 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -642,7 +642,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ sock_reset_flag(sk, SOCK_DONE); sk->sk_state = TCP_CLOSE; - vsk->peer_shutdown = 0; + WRITE_ONCE(vsk->peer_shutdown, 0); } if (sk->sk_type == SOCK_SEQPACKET) { @@ -933,7 +933,7 @@ static struct sock *__vsock_create(struct net *net, vsk->rejected = false; vsk->sent_request = false; vsk->ignore_connecting_rst = false; - vsk->peer_shutdown = 0; + WRITE_ONCE(vsk->peer_shutdown, 0); INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); @@ -1241,6 +1241,25 @@ out: return err; } +static __poll_t vsock_poll_shutdown(struct sock *sk, u32 peer_shutdown) +{ + __poll_t mask = 0; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of EPOLLHUP set. + */ + if (sk->sk_shutdown == SHUTDOWN_MASK || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (peer_shutdown & SEND_SHUTDOWN))) + mask |= EPOLLHUP; + + if (sk->sk_shutdown & RCV_SHUTDOWN || + peer_shutdown & SEND_SHUTDOWN) + mask |= EPOLLRDHUP; + + return mask; +} + static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_table *wait) { @@ -1258,24 +1277,17 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; - /* INET sockets treat local write shutdown and peer write shutdown as a - * case of EPOLLHUP set. - */ - if ((sk->sk_shutdown == SHUTDOWN_MASK) || - ((sk->sk_shutdown & SEND_SHUTDOWN) && - (vsk->peer_shutdown & SEND_SHUTDOWN))) { - mask |= EPOLLHUP; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN || - vsk->peer_shutdown & SEND_SHUTDOWN) { - mask |= EPOLLRDHUP; - } - if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; if (sock->type == SOCK_DGRAM) { + u32 peer_shutdown = READ_ONCE(vsk->peer_shutdown); + + /* DGRAM sockets do not take lock_sock() in poll(), so use one + * lockless snapshot for all shutdown-derived mask bits. + */ + mask |= vsock_poll_shutdown(sk, peer_shutdown); + /* For datagram sockets we can read if there is something in * the queue and write as long as the socket isn't shutdown for * sending. @@ -1290,6 +1302,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } else if (sock_type_connectible(sk->sk_type)) { const struct vsock_transport *transport; + u32 peer_shutdown; lock_sock(sk); @@ -1322,8 +1335,10 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, * terminated should also be considered read, and we check the * shutdown flag for that. */ + peer_shutdown = READ_ONCE(vsk->peer_shutdown); + mask |= vsock_poll_shutdown(sk, peer_shutdown); if (sk->sk_shutdown & RCV_SHUTDOWN || - vsk->peer_shutdown & SEND_SHUTDOWN) { + peer_shutdown & SEND_SHUTDOWN) { mask |= EPOLLIN | EPOLLRDNORM; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 7a8963595bf9..b3394946b2ed 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -264,7 +264,7 @@ static void hvs_do_close_lock_held(struct vsock_sock *vsk, struct sock *sk = sk_vsock(vsk); sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); @@ -593,7 +593,9 @@ static int hvs_update_recv_data(struct hvsock *hvs) return -EIO; if (payload_len == 0) - hvs->vsk->peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(hvs->vsk->peer_shutdown, + READ_ONCE(hvs->vsk->peer_shutdown) | + SEND_SHUTDOWN); hvs->recv_data_len = payload_len; hvs->recv_data_off = 0; @@ -736,7 +738,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) return ret; return hvs->recv_data_len; case 0: - vsk->peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(vsk->peer_shutdown, + READ_ONCE(vsk->peer_shutdown) | SEND_SHUTDOWN); ret = 0; break; default: /* -1 */ diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index df3b418e0392..d4d26fba9e37 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1228,7 +1228,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, struct sock *sk = sk_vsock(vsk); sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); @@ -1431,12 +1431,15 @@ virtio_transport_recv_connected(struct sock *sk, case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; - case VIRTIO_VSOCK_OP_SHUTDOWN: + case VIRTIO_VSOCK_OP_SHUTDOWN: { + u32 peer_shutdown = READ_ONCE(vsk->peer_shutdown); + if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; + peer_shutdown |= RCV_SHUTDOWN; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (vsk->peer_shutdown == SHUTDOWN_MASK) { + peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(vsk->peer_shutdown, peer_shutdown); + if (peer_shutdown == SHUTDOWN_MASK) { if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { (void)virtio_transport_reset(vsk, NULL); virtio_transport_do_close(vsk, true); @@ -1451,6 +1454,7 @@ virtio_transport_recv_connected(struct sock *sk, if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) sk->sk_state_change(sk); break; + } case VIRTIO_VSOCK_OP_RST: virtio_transport_do_close(vsk, true); break; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index d2579380f51e..5c1ecd5bfdbc 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -819,7 +819,7 @@ static void vmci_transport_handle_detach(struct sock *sk) /* On a detach the peer will not be sending or receiving * anymore. */ - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); /* We should not be sending anymore since the peer won't be * there to receive, but we can still receive if there is data @@ -1542,7 +1542,9 @@ static int vmci_transport_recv_connected(struct sock *sk, if (pkt->u.mode) { vsk = vsock_sk(sk); - vsk->peer_shutdown |= pkt->u.mode; + WRITE_ONCE(vsk->peer_shutdown, + READ_ONCE(vsk->peer_shutdown) | + pkt->u.mode); sk->sk_state_change(sk); } break; @@ -1559,7 +1561,7 @@ static int vmci_transport_recv_connected(struct sock *sk, * a clean shutdown. */ sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; -- cgit v1.2.3 From 70f8592ee90585272018a725054b6eb2ab7e99ca Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 20 May 2026 19:22:35 +0200 Subject: net: netlink: fix sending unassigned nsid after assigned one If the current skb is not shared, it is re-used directly for all the sockets subscribed to the notification. If we have remote all-nsid socket receiving a message first, then the 'nsid_is_set' will be set to 'true'. If the nsid is NOT_ASSIGNED for the next socket in the list, the 'nsid_is_set' will remain 'true' and the negative value is be delivered to the user space. All subsequent nsid values will be delivered as well, since there is no code path that sets the flag back to 'false'. Fix that by always dropping the flag to 'false' first. Fixes: 7212462fa6fd ("netlink: don't send unknown nsid") Signed-off-by: Ilya Maximets Acked-by: Nicolas Dichtel Link: https://patch.msgid.link/20260520172317.175168-2-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2aeb0680807d..0742e97f256e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1482,6 +1482,7 @@ static void do_one_broadcast(struct sock *sk, p->skb2 = NULL; goto out; } + NETLINK_CB(p->skb2).nsid_is_set = false; NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) NETLINK_CB(p->skb2).nsid_is_set = true; -- cgit v1.2.3 From 88b126b39f9757e9debc322d4679239e9af089c7 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 20 May 2026 19:22:36 +0200 Subject: net: netlink: don't set nsid on local notifications In most cases, notifications on sockets with NETLINK_LISTEN_ALL_NSID do not contain NSID in their ancillary data in case the event is local to the listener. However, when a self-referential NSID is allocated for a namespace, every local notification starts sending this ID to the user space. This is problematic, because the listener cannot tell if those notifications are local or not anymore without making extra requests to figure out if the provided NSID is local or not. The listener can also not figure out the local NSID beforehand as it can be allocated at any point in time by other processes, changing the structure of the future notifications for everyone. The value is practically not useful, since it's the namespace's own ID that the application has to obtain from other sources in order to figure out if it's the same or not. So, for the application it's just an extra busy work with no benefits. Moreover, applications that do not know about this quirk may be mishandling notifications with NSID set as notifications from remote namespaces. This is the case for ovs-vswitchd and the iproute2's 'ip monitor' that stops printing 'current' and starts printing the nsid number mid-session. Lack of clear documentation for this behavior is also not helping. A search though open-source projects doesn't reveal any projects that use NETNSA_NSID_NOT_ASSIGNED and rely on metadata to contain self-referential NSIDs (expected, since the value is not useful). Quite the opposite, as already mentioned, there are few applications that rely on NSID to not be present in local events. Since the value is not useful and actively harmful in some cases, let's not report it for local events, making the notifications more consistent. Also adding some blank lines for readability. Fixes: 59324cf35aba ("netlink: allow to listen "all" netns") Reported-by: Matteo Perin Signed-off-by: Ilya Maximets Acked-by: Nicolas Dichtel Link: https://patch.msgid.link/20260520172317.175168-3-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0742e97f256e..7269e23b578d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1482,10 +1482,14 @@ static void do_one_broadcast(struct sock *sk, p->skb2 = NULL; goto out; } + NETLINK_CB(p->skb2).nsid_is_set = false; - NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); - if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) - NETLINK_CB(p->skb2).nsid_is_set = true; + if (!net_eq(sock_net(sk), p->net)) { + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) + NETLINK_CB(p->skb2).nsid_is_set = true; + } + val = netlink_broadcast_deliver(sk, p->skb2); if (val < 0) { netlink_overrun(sk); -- cgit v1.2.3 From 2e43b64248909c617281921d6d9ba3bfc0159473 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 20 May 2026 19:22:38 +0200 Subject: selftests: net: add a test case for nsid in all nsid notifications The test subscribes to link events from all namespaces and makes sure that local events do not carry NSID in their ancillary data (even if there is a self-referential NSID allocated for the local namespace), and remote events do. Signed-off-by: Ilya Maximets Acked-by: Nicolas Dichtel Link: https://patch.msgid.link/20260520172317.175168-5-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/link_netns.py | 61 ++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py index aab043c59d69..6d1f863b6262 100755 --- a/tools/testing/selftests/net/link_netns.py +++ b/tools/testing/selftests/net/link_netns.py @@ -3,13 +3,14 @@ import time -from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true from lib.py import ip from lib.py import NetNS, NetNSEnter from lib.py import RtnlFamily LINK_NETNSID = 100 +LINK_NETNSID2 = 200 def test_event() -> None: @@ -32,6 +33,57 @@ def test_event() -> None: "Received unexpected link notification") +def test_event_all_nsid() -> None: + """NETLINK_LISTEN_ALL_NSID notifications: local events must not + carry nsid even with a self-referential mapping. Remote events + must carry the correct nsid.""" + + with NetNS() as ns1, NetNS() as ns2: + net1, net2 = str(ns1), str(ns2) + + with NetNSEnter(net1): + rtnl = RtnlFamily() + rtnl.ntf_listen_all_nsid() + rtnl.ntf_subscribe("rtnlgrp-link") + + # Case 1: no nsid assigned, local event, no nsid expected. + ip("link add dummy-lo type dummy", ns=net1) + + # Case 2: self-referential nsid, local event, still no nsid. + ip(f"netns set {net1} {LINK_NETNSID}", ns=net1) + ip("link add dummy-sr type dummy", ns=net1) + + # Case 3: remote event, nsid present. + ip(f"netns set {net2} {LINK_NETNSID2}", ns=net1) + ip("link add dummy-re type dummy", ns=net2) + + # Collect the three newlink events, ignoring unrelated noise. + events = {} + for msg in rtnl.poll_ntf(duration=1): + if msg['name'] == 'getlink': + ifname = msg['msg'].get('ifname') + if ifname in ('dummy-lo', 'dummy-sr', 'dummy-re'): + events[ifname] = msg + if len(events) == 3: + break + + ksft_true('dummy-lo' in events, "missing local event") + ksft_true(events['dummy-lo'].get('nsid') is None, + "local event without nsid should not carry nsid") + + ksft_true('dummy-sr' in events, "missing self-ref event") + ksft_true(events['dummy-sr'].get('nsid') is None, + "local event with self-ref nsid should not carry nsid") + + ksft_true('dummy-re' in events, "missing remote event") + ksft_eq(events['dummy-re'].get('nsid'), LINK_NETNSID2, + "remote event should carry nsid") + + ip("link del dummy-lo", ns=net1) + ip("link del dummy-sr", ns=net1) + ip("link del dummy-re", ns=net2) + + def validate_link_netns(netns, ifname, link_netnsid) -> bool: link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) if not link_info: @@ -133,7 +185,12 @@ def test_peer_net() -> None: def main() -> None: - ksft_run([test_event, test_link_net, test_peer_net]) + ksft_run([ + test_event, + test_event_all_nsid, + test_link_net, + test_peer_net, + ]) ksft_exit() -- cgit v1.2.3 From 9e4389b0038781f19f97895186ed941ff8ac1678 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 21 May 2026 16:56:39 +0200 Subject: net/smc: Do not re-initialize smc hashtables INIT_HLIST_HEAD(&smc_v*_hashinfo.ht) are called after smc_nl_init(), proto_register() and sock_register(). This can lead to smc_v*_hashinfo.ht being reset even though hash entries already exist and are being used, possibly resulting in a corrupted list. Remove unnecessary and dangerous re-initialisation of smc_v*_hashinfo.ht in smc_init(); it is implicitly initialised to zero anyhow. Add HLIST_HEAD_INIT to the definitions for clarity. Fixes: f16a7dd5cf27 ("smc: netlink interface for SMC sockets") Suggested-by: Halil Pasic Signed-off-by: Alexandra Winter Acked-by: Halil Pasic Reviewed-by: Mahanta Jambigi Link: https://patch.msgid.link/20260521145639.10317-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index dffbd529762d..b5db69073e20 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -188,10 +188,12 @@ static bool smc_hs_congested(const struct sock *sk) struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), + .ht = HLIST_HEAD_INIT, }; struct smc_hashinfo smc_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), + .ht = HLIST_HEAD_INIT, }; int smc_hash_sk(struct sock *sk) @@ -3517,8 +3519,6 @@ static int __init smc_init(void) pr_err("%s: sock_register fails with %d\n", __func__, rc); goto out_proto6; } - INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); - INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { -- cgit v1.2.3 From 3589d20a666caf30ad100c960a2de7de390fce88 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 21 May 2026 07:11:45 -0700 Subject: net/iucv: fix locking in .getsockopt Mirror iucv_sock_setsockopt() and wrap the whole switch in lock_sock()/release_sock(). The pre-existing SO_MSGLIMIT-only lock becomes redundant and is removed. Any AF_IUCV HIPER user can potentially crash the kernel by racing recvmsg() with getsockopt(SO_MSGSIZE): the SO_MSGSIZE arm dereferences iucv->hs_dev->mtu after iucv_sock_close() (called from the racing recvmsg()) has set hs_dev to NULL, producing a NULL pointer dereference oops. Suggested-by: Stanislav Fomichev Fixes: 51363b8751a6 ("af_iucv: allow retrieval of maximum message size") Signed-off-by: Breno Leitao Reviewed-by: Alexandra Winter Tested-by: Alexandra Winter Link: https://patch.msgid.link/20260521-af_iucv_fix2-v1-1-f16b1c510aa9@debian.org Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 72dfccd4e3d5..c2dc3338670e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1540,7 +1540,7 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int val; - int len; + int len, rc; if (level != SOL_IUCV) return -ENOPROTOOPT; @@ -1553,26 +1553,34 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, len = min_t(unsigned int, len, sizeof(int)); + rc = 0; + + lock_sock(sk); switch (optname) { case SO_IPRMDATA_MSG: val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; break; case SO_MSGLIMIT: - lock_sock(sk); val = (iucv->path != NULL) ? iucv->path->msglim /* connected */ : iucv->msglimit; /* default */ - release_sock(sk); break; case SO_MSGSIZE: - if (sk->sk_state == IUCV_OPEN) - return -EBADFD; + if (sk->sk_state == IUCV_OPEN) { + rc = -EBADFD; + break; + } val = (iucv->hs_dev) ? iucv->hs_dev->mtu - sizeof(struct af_iucv_trans_hdr) - ETH_HLEN : 0x7fffffff; break; default: - return -ENOPROTOOPT; + rc = -ENOPROTOOPT; + break; } + release_sock(sk); + + if (rc) + return rc; if (put_user(len, optlen)) return -EFAULT; -- cgit v1.2.3 From 7205b58702273baf21d6ba7992e6ba15852325f7 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Fri, 15 May 2026 14:09:41 -0400 Subject: scsi: core: Run queues for all non-SDEV_DEL devices from scsi_run_host_queues While a SCSI host is in a recovery state, scsi_mq_requeue_cmd() will not set the requeue list for a requeued command to be kicked in the future. The expectation is a call to scsi_run_host_queues() will kick all SCSI devices once the recovery state is cleared. However, scsi_run_host_queues() uses shost_for_each_device() which uses scsi_device_get() and so will ignore devices in a partially removed state like SDEV_CANCEL. But these devices may also have requeued requests, leaving their requests stuck from not being kicked and causing the removal process of the device to hang. scsi_run_host_queues() needs to run against more devices than the macro shost_for_each_device() allows. Instead of using the too limiting scsi_device_get() state checks, only ignore devices in SDEV_DEL state or when unable to acquire a reference. Attempt to run the queues for all other devices when scsi_run_host_queues() is called. Fixes: 8b566edbdbfb ("scsi: core: Only kick the requeue list if necessary") Signed-off-by: David Jeffery Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20260515180941.9698-1-djeffery@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6e8c7a42603e..85eef401925a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -575,10 +575,33 @@ void scsi_requeue_run_queue(struct work_struct *work) void scsi_run_host_queues(struct Scsi_Host *shost) { - struct scsi_device *sdev; + struct scsi_device *sdev, *prev = NULL; + unsigned long flags; - shost_for_each_device(sdev, shost) + spin_lock_irqsave(shost->host_lock, flags); + __shost_for_each_device(sdev, shost) { + /* + * Only skip devices so deep into removal they will never need + * another kick to their queues. Thus scsi_device_get() cannot + * be used as it would skip devices in SDEV_CANCEL state which + * may need a queue kick. + */ + if (sdev->sdev_state == SDEV_DEL || + !get_device(&sdev->sdev_gendev)) + continue; + spin_unlock_irqrestore(shost->host_lock, flags); + + if (prev) + put_device(&prev->sdev_gendev); scsi_run_queue(sdev->request_queue); + + prev = sdev; + + spin_lock_irqsave(shost->host_lock, flags); + } + spin_unlock_irqrestore(shost->host_lock, flags); + if (prev) + put_device(&prev->sdev_gendev); } static void scsi_uninit_cmd(struct scsi_cmnd *cmd) -- cgit v1.2.3 From adda8a44e1e43aceba058839f56fa1c599f6f99b Mon Sep 17 00:00:00 2001 From: Alexander Perlis Date: Tue, 12 May 2026 18:12:54 -0500 Subject: scsi: devinfo: Add BLIST_NO_RSOC for Promise VTrak E310f The extremely slow boots reported July 2014 in bug 79901: https://bugzilla.kernel.org/show_bug.cgi?id=79901 for Promise VTrak E610f 3U 16-bay FC RAID enclosure occur also with the Promise VTrak E310f 2U 12-bay FC RAID enclosure. The 2014 patch: https://bugzilla.kernel.org/attachment.cgi?id=144101&action=diff added support for the BLIST_NO_RSOC flag and specified that flag for the Promise VTrak E610f. This current patch simply adds the E310f to that same list. One curiosity is the additional BLIST_SPARSELUN flag. This was also in the 2014 patch for the E610f, and was already in place for *all* Promise devices since 2007 due to commit e0b2e597d5dd ("[SCSI] stex: fix id mapping issue") which added the line: {"Promise", "", NULL, BLIST_SPARSELUN} The 2007 commit message talks of issues with SuperTrak EX (stex) but the added line did not limit itself to that particular device family. The current patch for E310F, like the 2014 patch for E610f, adds BLIST_NO_RSOC while preserving BLIST_SPARSELUN from 2007. Signed-off-by: Alexander Perlis Suggested-by: Nikkos Svoboda Link: https://patch.msgid.link/20260512231254.27530-1-aperlis@math.lsu.edu Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 68a992494b12..c6defe1c3152 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -218,6 +218,7 @@ static struct { {"PIONEER", "CD-ROM DRM-602X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-604X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, + {"Promise", "VTrak E310f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "", NULL, BLIST_SPARSELUN}, {"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES}, -- cgit v1.2.3 From a4719ae23fb5b1b6229120c7ea4b6143a501a62e Mon Sep 17 00:00:00 2001 From: "Milan P. Gandhi" Date: Thu, 14 May 2026 13:27:54 +0530 Subject: scsi: megaraid_sas: Fix NULL pointer dereference on firmware duplicate completion Add NULL check for scmd_local in the MPI2_FUNCTION_SCSI_IO_REQUEST case to handle firmware duplicate/stale completions. When firmware sends a duplicate completion for a command that was already processed and returned to the pool, the driver accesses NULL scmd pointer causing a crash. Timeline of the bug: 1. Command completes normally, megasas_return_cmd_fusion() called 2. This sets cmd->scmd = NULL and clears io_request with memset(..., 0, ...) 3. Firmware sends duplicate/stale completion for same SMID (firmware bug) 4. Driver processes reply descriptor again 5. Cleared io_request has Function = 0 (MPI2_FUNCTION_SCSI_IO_REQUEST) 6. Switch statement matches SCSI_IO_REQUEST case by accident 7. Accesses megasas_priv(NULL scmd)->status -> crash at offset 0x228 The offset 0x228 = sizeof(struct scsi_cmnd) 0x220 + offsetof(status) 0x8. This issue was observed on PERC H330 Mini running firmware 25.5.9.0001 after 3+ days of heavy I/O load. Crash signature: BUG: unable to handle kernel NULL pointer dereference at 0x228 RIP: complete_cmd_fusion+0x428 Function: megasas_priv(cmd_fusion->scmd)->status Add defensive check to skip processing when scmd_local is NULL. This handles duplicate completions from firmware and prevents accessing freed command structures. The check protects all scmd_local uses in both the SCSI_IO path and the fallthrough LDIO path. Signed-off-by: Milan P. Gandhi Link: https://patch.msgid.link/agWAgtk6rtHqNWb5@machine1 Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 2699e4e09b5b..056cbe50e19e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -3612,6 +3612,15 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex, complete(&cmd_fusion->done); break; case MPI2_FUNCTION_SCSI_IO_REQUEST: /*Fast Path IO.*/ + /* + * Firmware can send stale/duplicate completions for + * commands already returned to the pool. scmd_local + * would be NULL for such cases. Skip processing to + * avoid NULL pointer access. + */ + if (!scmd_local) + break; + /* Update load balancing info */ if (fusion->load_balance_info && (megasas_priv(cmd_fusion->scmd)->status & -- cgit v1.2.3 From e4bb73bf3ac11b4a93634660345b9d764a4a80df Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Tue, 19 May 2026 16:53:56 -0400 Subject: scsi: scsi_debug: Add missing newline in scsi_debug_device_reset() A "\n" at the end of the sdev_printk() string appears to have been inadvertently removed. Add it back for correct log message formatting. Fixes: a743b120227a ("scsi: scsi_debug: Stop printing extra function name in debug logs") Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Ewan D. Milne Reviewed-by: Bart Van Assche Reviewed-by: John Garry Link: https://patch.msgid.link/20260519205356.1040855-1-emilne@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 1515495fd9ea..040c5e1e713a 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6953,7 +6953,7 @@ static int scsi_debug_device_reset(struct scsi_cmnd *SCpnt) ++num_dev_resets; if (SDEBUG_OPT_ALL_NOISE & sdebug_opts) - sdev_printk(KERN_INFO, sdp, "doing device reset"); + sdev_printk(KERN_INFO, sdp, "doing device reset\n"); scsi_debug_stop_all_queued(sdp); if (devip) { -- cgit v1.2.3 From a9a39233ec1fc9f97ea1340a4d09bb7ec2be5153 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Wed, 20 May 2026 09:30:15 -0400 Subject: scsi: scsi_transport_fc: Widen FPIN pname walker counter to u32 An adjacent Fibre Channel fabric actor that can deliver an FPIN ELS frame to an lpfc or qla2xxx Linux initiator can trigger a non-return in the generic FC transport. This is not a local userspace or IP network path; the attacker must be able to inject fabric traffic, for example as a compromised switch or fabric controller, or as a same-zone N_Port on a fabric that permits source spoofing. The Link-Integrity and Peer-Congestion FPIN walkers used a u8 loop counter against the 32-bit on-wire pname_count field, and did not bound pname_count by the descriptor body already validated by the TLV walker. A pname_count of 256 therefore wraps the counter and keeps the loop condition true indefinitely. Factor the shared pname_list[] walk into one helper, widen the counter to u32, and clamp pname_count against the entries that fit in the descriptor body before iterating. Fixes: 3dcfe0de5a97 ("scsi: fc: Parse FPIN packets and update statistics") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Link: https://patch.msgid.link/20260520133015.1018937-1-michael.bommarito@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_fc.c | 77 +++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index dce95e361daf..173ed6373f04 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -737,6 +737,37 @@ fc_cn_stats_update(u16 event_type, struct fc_fpin_stats *stats) } } +static void +fc_fpin_pname_stats_update(struct Scsi_Host *shost, + struct fc_rport *attach_rport, u16 event_type, + u32 desc_len, u32 fixed_len, u32 pname_count, + __be64 *pname_list, + void (*stats_update)(u16 event_type, + struct fc_fpin_stats *stats)) +{ + u32 i; + struct fc_rport *rport; + u64 wwpn; + + if (desc_len < fixed_len) + pname_count = 0; + else + pname_count = min(pname_count, (desc_len - fixed_len) / + sizeof(pname_list[0])); + + for (i = 0; i < pname_count; i++) { + wwpn = be64_to_cpu(pname_list[i]); + rport = fc_find_rport_by_wwpn(shost, wwpn); + if (rport && + (rport->roles & FC_PORT_ROLE_FCP_TARGET || + rport->roles & FC_PORT_ROLE_NVME_TARGET)) { + if (rport == attach_rport) + continue; + stats_update(event_type, &rport->fpin_stats); + } + } +} + /* * fc_fpin_li_stats_update - routine to update Link Integrity * event statistics. @@ -747,13 +778,11 @@ fc_cn_stats_update(u16 event_type, struct fc_fpin_stats *stats) static void fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) { - u8 i; struct fc_rport *rport = NULL; struct fc_rport *attach_rport = NULL; struct fc_host_attrs *fc_host = shost_to_fc_host(shost); struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv; u16 event_type = be16_to_cpu(li_desc->event_type); - u64 wwpn; rport = fc_find_rport_by_wwpn(shost, be64_to_cpu(li_desc->attached_wwpn)); @@ -764,22 +793,11 @@ fc_fpin_li_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) fc_li_stats_update(event_type, &attach_rport->fpin_stats); } - if (be32_to_cpu(li_desc->pname_count) > 0) { - for (i = 0; - i < be32_to_cpu(li_desc->pname_count); - i++) { - wwpn = be64_to_cpu(li_desc->pname_list[i]); - rport = fc_find_rport_by_wwpn(shost, wwpn); - if (rport && - (rport->roles & FC_PORT_ROLE_FCP_TARGET || - rport->roles & FC_PORT_ROLE_NVME_TARGET)) { - if (rport == attach_rport) - continue; - fc_li_stats_update(event_type, - &rport->fpin_stats); - } - } - } + fc_fpin_pname_stats_update(shost, attach_rport, event_type, + be32_to_cpu(li_desc->desc_len), + FC_TLV_DESC_LENGTH_FROM_SZ(*li_desc), + be32_to_cpu(li_desc->pname_count), + li_desc->pname_list, fc_li_stats_update); if (fc_host->port_name == be64_to_cpu(li_desc->attached_wwpn)) fc_li_stats_update(event_type, &fc_host->fpin_stats); @@ -827,13 +845,11 @@ static void fc_fpin_peer_congn_stats_update(struct Scsi_Host *shost, struct fc_tlv_desc *tlv) { - u8 i; struct fc_rport *rport = NULL; struct fc_rport *attach_rport = NULL; struct fc_fn_peer_congn_desc *pc_desc = (struct fc_fn_peer_congn_desc *)tlv; u16 event_type = be16_to_cpu(pc_desc->event_type); - u64 wwpn; rport = fc_find_rport_by_wwpn(shost, be64_to_cpu(pc_desc->attached_wwpn)); @@ -844,22 +860,11 @@ fc_fpin_peer_congn_stats_update(struct Scsi_Host *shost, fc_cn_stats_update(event_type, &attach_rport->fpin_stats); } - if (be32_to_cpu(pc_desc->pname_count) > 0) { - for (i = 0; - i < be32_to_cpu(pc_desc->pname_count); - i++) { - wwpn = be64_to_cpu(pc_desc->pname_list[i]); - rport = fc_find_rport_by_wwpn(shost, wwpn); - if (rport && - (rport->roles & FC_PORT_ROLE_FCP_TARGET || - rport->roles & FC_PORT_ROLE_NVME_TARGET)) { - if (rport == attach_rport) - continue; - fc_cn_stats_update(event_type, - &rport->fpin_stats); - } - } - } + fc_fpin_pname_stats_update(shost, attach_rport, event_type, + be32_to_cpu(pc_desc->desc_len), + FC_TLV_DESC_LENGTH_FROM_SZ(*pc_desc), + be32_to_cpu(pc_desc->pname_count), + pc_desc->pname_list, fc_cn_stats_update); } /* -- cgit v1.2.3 From 9eed1bd59937e6828b00d2f2dfef631d964f3636 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 18 May 2026 10:43:07 -0400 Subject: scsi: fcoe: Reject FIP descriptors with zero fip_dlen in CVL walker drivers/scsi/fcoe/fcoe_ctlr.c::fcoe_ctlr_recv_clr_vlink() advanced the descriptor cursor by an attacker-supplied fip_dlen without ever requiring dlen >= sizeof(struct fip_desc) in the default branch. The named descriptor cases (FIP_DT_MAC, FIP_DT_NAME, FIP_DT_VN_ID) checked their per-type minimum lengths, but a FIP_DT_NON_CRITICAL descriptor (fip_dtype >= 128, which the standard requires receivers to silently ignore) skipped that check entirely. An unauthenticated L2 peer on the FCoE control VLAN could hang fcoe_ctlr_recv_work on an fcoe, qedf, or bnx2fc initiator indefinitely by emitting one FIP CVL frame whose single descriptor had fip_dtype == FIP_DT_NON_CRITICAL and fip_dlen == 0: the cursor advanced zero bytes per iteration and the loop condition rlen >= sizeof(*desc) stayed true forever, blocking every subsequent FIP frame on that controller. Tighten the outer dlen guard to also reject dlen < sizeof(struct fip_desc), so a malformed descriptor whose length cannot even cover the descriptor header is rejected before the switch. This is the same lower-bound the named cases already apply and is the minimum scope that closes the loop. Fixes: 97c8389d54b9 ("[SCSI] fcoe, libfcoe: Add support for FIP. FCoE discovery and keep-alive.") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260518144307.2820961-1-michael.bommarito@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_ctlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 02cd4410efca..496ddd45f74d 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -1385,7 +1385,7 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, while (rlen >= sizeof(*desc)) { dlen = desc->fip_dlen * FIP_BPW; - if (dlen > rlen) + if (dlen < sizeof(*desc) || dlen > rlen) goto err; /* Drop CVL if there are duplicate critical descriptors */ if ((desc->fip_dtype < 32) && -- cgit v1.2.3 From 4157501b9a8ff1bbe32ff5a7d8aece7ab18eff40 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 21 May 2026 14:47:32 +0200 Subject: vsock/virtio: fix skb overhead overflow on 32-bit builds On 32-bit architectures, both skb_queue_len() and SKB_TRUESIZE(0) evaluate to 32-bit values. The multiplication can overflow before being assigned to the u64 skb_overhead variable, making the skb overhead check ineffective. Cast skb_queue_len() to u64 so the multiplication is always performed in 64-bit arithmetic. This issue was reported by Sashiko while reviewing another patch. Fixes: 059b7dbd20a6 ("vsock/virtio: fix potential unbounded skb queue") Closes: https://sashiko.dev/#/patchset/20260518090656.134588-1-sgarzare%40redhat.com Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20260521124732.125771-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d4d26fba9e37..b143290a311d 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -417,7 +417,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + u64 skb_overhead = ((u64)skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); /* Allow at most buf_alloc * 2 total budget (payload + overhead), * similar to how SO_RCVBUF is doubled to reserve space for sk_buff -- cgit v1.2.3 From 87a1e0fe7776da7ab411be332b4be58ac8840d10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2026 12:21:47 +0000 Subject: ipv4: free net->ipv4.sysctl_local_reserved_ports after unregister_net_sysctl_table() ipv4_sysctl_exit_net() is currently freeing net->ipv4.sysctl_local_reserved_ports too soon. Only after unregister_net_sysctl_table() we can be sure no threads can possibly use the sysctls, including /proc/sys/net/ipv4/ip_local_reserved_ports. Fixes: 122ff243f5f1 ("ipv4: make ip_local_reserved_ports per netns") Reported-by: Ji'an Zhou Signed-off-by: Eric Dumazet Cc: Cong Wang Reviewed-by: Jason Xing Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260521122147.3584624-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/sysctl_net_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d8bdb1bdbff1..c0e85cc171ae 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1705,10 +1705,10 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) { const struct ctl_table *table; - kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); + kfree(net->ipv4.sysctl_local_reserved_ports); } static __net_initdata struct pernet_operations ipv4_sysctl_ops = { -- cgit v1.2.3 From 778c2ab142c625a8a8afa570e0f9b7873f445d99 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sat, 18 Apr 2026 11:49:27 -0400 Subject: scsi: target: iscsi: Fix CRC overread and double-free in iscsit_handle_text_cmd() Two latent bugs in the Text-phase handler, both present since the original LIO integration in commit e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1"): 1) DataDigest CRC buffer overread (4 bytes past text_in). text_in is kzalloc()'d at ALIGN(payload_length, 4). rx_size is then incremented by ISCSI_CRC_LEN to make room for the received DataDigest in the iovec, but the same (now-bumped) rx_size is passed as the buffer length to iscsit_crc_buf(): if (conn->conn_ops->DataDigest) { ... rx_size += ISCSI_CRC_LEN; } ... if (conn->conn_ops->DataDigest) { data_crc = iscsit_crc_buf(text_in, rx_size, 0, NULL); iscsit_crc_buf() walks rx_size bytes of text_in with crc32c(), so when DataDigest is negotiated it reads 4 bytes past the end of the text_in allocation. KASAN reproduces this directly on the unpatched mainline tree as slab-out-of-bounds in crc32c() called from the Text PDU path. The OOB bytes feed crc32c() and are then compared against the initiator-supplied checksum, so the value does not flow back to the attacker, but the kernel does read past the buffer on every Text PDU with DataDigest=CRC32C. Fix by passing the actual padded payload length (ALIGN(payload_length, 4)) that was used for the kzalloc(). 2) Stale cmd->text_in_ptr re-free (double-free) on ERL>0 bad DataDigest drop. On DataDigest mismatch with ErrorRecoveryLevel > 0 the handler silently drops the PDU and lets the initiator plug the CmdSN gap: kfree(text_in); return 0; cmd->text_in_ptr still points at the freed buffer. The next Text Request on the same ITT re-enters iscsit_setup_text_cmd(), which unconditionally does kfree(cmd->text_in_ptr); cmd->text_in_ptr = NULL; freeing the same pointer a second time. Session teardown via iscsit_release_cmd() has the same shape and hits the same double-free if the connection is dropped before a second Text Request arrives. On an unmodified mainline tree the bug-1 CRC overread fires first on the initial valid Text Request and perturbs the subsequent state, so #4 was isolated by building a kernel with only the bug-1 hunk of this patch applied plus temporary printk() observability around the three relevant kfree() sites. The observability prints are not part of this patch. On that build, a three-PDU Text Request sequence after login produces two back-to-back splats: BUG: KASAN: double-free in iscsit_setup_text_cmd+0x?? BUG: KASAN: double-free in iscsit_release_cmd+0x?? showing the same pointer freed in the ERL>0 drop path and again in iscsit_setup_text_cmd() (next Text Request on the same ITT) and once more in iscsit_release_cmd() (session teardown). On distro kernels with CONFIG_SLAB_FREELIST_HARDENED=y (default) the double-free becomes a remote kernel BUG(); on non-hardened kernels it corrupts the slab freelist. Fix by clearing cmd->text_in_ptr after the kfree() in the ERL>0 drop path. With both hunks applied #4 is directly observable on the stock tree without observability printks; fixing bug-1 alone would mask #4 less, not more, so the hunks are submitted together. Both fixes are one-liners. The Text PDU state machine is unchanged and the wire protocol is unaffected. Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Tested-by: John Garry Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index cb832fd523af..62ada3a52210 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2295,7 +2295,9 @@ iscsit_handle_text_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, goto reject; if (conn->conn_ops->DataDigest) { - data_crc = iscsit_crc_buf(text_in, rx_size, 0, NULL); + data_crc = iscsit_crc_buf(text_in, + ALIGN(payload_length, 4), + 0, NULL); if (checksum != data_crc) { pr_err("Text data CRC32C DataDigest" " 0x%08x does not match computed" @@ -2314,6 +2316,7 @@ iscsit_handle_text_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, " Command CmdSN: 0x%08x due to" " DataCRC error.\n", hdr->cmdsn); kfree(text_in); + cmd->text_in_ptr = NULL; return 0; } } else { -- cgit v1.2.3 From bf33e01f88388c43e285492a63e539df6ffed64c Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Mon, 11 May 2026 14:49:14 -0400 Subject: scsi: target: iscsi: Bound iscsi_encode_text_output() appends to rsp_buf iscsi_encode_text_output() concatenates "key=value\0" records into login->rsp_buf, an 8192-byte kzalloc(MAX_KEY_VALUE_PAIRS) buffer allocated in iscsit_alloc_login_setup_buffer(). The three sprintf() call sites in this function (lines 1398, 1411, 1424 in v7.1-rc2) never check the remaining buffer capacity: *length += sprintf(output_buf, "%s=%s", er->key, er->value); *length += 1; output_buf = textbuf + *length; The 8192-byte ceiling at iscsi_target_check_login_request() bounds the *input* Login PDU payload, but a single PDU can carry up to 2048 minimal four-byte "a=b\0" pairs, each unknown key expanding to a 16-byte "a=NotUnderstood\0" output record via iscsi_add_notunderstood_response(). 2048 * 16 = 32 KiB of output into an 8 KiB buffer, producing a ~24 KiB heap overrun in the kmalloc-8k slab. The fix introduces a static iscsi_encode_text_record() helper that uses snprintf() with a per-call bounds check against the remaining buffer, and threads a u32 textbuf_size parameter through iscsi_encode_text_output(). Both call sites in iscsi_target_handle_csg_zero() (PHASE_SECURITY) and iscsi_target_handle_csg_one() (PHASE_OPERATIONAL) pass MAX_KEY_VALUE_PAIRS. On overflow the encoder logs the condition, calls iscsi_release_extra_responses() to drop queued records, and returns -1; both caller sites now emit ISCSI_STATUS_CLS_INITIATOR_ERR / ISCSI_LOGIN_STATUS_INIT_ERR via iscsit_tx_login_rsp() before returning, so the initiator sees an explicit failed-login response rather than a silent connection drop. (Prior to this patch only the PHASE_OPERATIONAL caller did that; the PHASE_SECURITY caller is converted to the same shape.) Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Tested-by: John Garry Reviewed-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_nego.c | 7 ++- drivers/target/iscsi/iscsi_target_parameters.c | 62 ++++++++++++++++++++------ drivers/target/iscsi/iscsi_target_parameters.h | 2 +- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 832588f21f91..b03ed154ca34 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -899,10 +899,14 @@ static int iscsi_target_handle_csg_zero( SENDER_TARGET, login->rsp_buf, &login->rsp_length, + MAX_KEY_VALUE_PAIRS, conn->param_list, conn->tpg->tpg_attrib.login_keys_workaround); - if (ret < 0) + if (ret < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); return -1; + } if (!iscsi_check_negotiated_keys(conn->param_list)) { bool auth_required = iscsi_conn_auth_required(conn); @@ -986,6 +990,7 @@ static int iscsi_target_handle_csg_one(struct iscsit_conn *conn, struct iscsi_lo SENDER_TARGET, login->rsp_buf, &login->rsp_length, + MAX_KEY_VALUE_PAIRS, conn->param_list, conn->tpg->tpg_attrib.login_keys_workaround); if (ret < 0) { diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 4ed578c7b98d..2b318b13268e 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -1371,19 +1371,42 @@ free_buffer: return -1; } +/* + * Append "key=value" plus a trailing NUL into @textbuf at *@length. + * Returns 0 on success and advances *@length, or -EMSGSIZE if the + * record (including the NUL) would not fit in the remaining buffer. + */ +static int iscsi_encode_text_record(char *textbuf, u32 *length, + u32 textbuf_size, + const char *key, const char *value) +{ + int n; + u32 avail; + + if (*length >= textbuf_size) + return -EMSGSIZE; + + avail = textbuf_size - *length; + n = snprintf(textbuf + *length, avail, "%s=%s", key, value); + if (n < 0 || (u32)n + 1 > avail) + return -EMSGSIZE; + + *length += n + 1; + return 0; +} + int iscsi_encode_text_output( u8 phase, u8 sender, char *textbuf, u32 *length, + u32 textbuf_size, struct iscsi_param_list *param_list, bool keys_workaround) { - char *output_buf = NULL; struct iscsi_extra_response *er; struct iscsi_param *param; - - output_buf = textbuf + *length; + int ret; if (iscsi_enforce_integrity_rules(phase, param_list) < 0) return -1; @@ -1395,10 +1418,12 @@ int iscsi_encode_text_output( !IS_PSTATE_RESPONSE_SENT(param) && !IS_PSTATE_REPLY_OPTIONAL(param) && (param->phase & phase)) { - *length += sprintf(output_buf, "%s=%s", - param->name, param->value); - *length += 1; - output_buf = textbuf + *length; + ret = iscsi_encode_text_record(textbuf, length, + textbuf_size, + param->name, + param->value); + if (ret < 0) + goto err_overflow; SET_PSTATE_RESPONSE_SENT(param); pr_debug("Sending key: %s=%s\n", param->name, param->value); @@ -1408,10 +1433,12 @@ int iscsi_encode_text_output( !IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param) && (param->phase & phase)) { - *length += sprintf(output_buf, "%s=%s", - param->name, param->value); - *length += 1; - output_buf = textbuf + *length; + ret = iscsi_encode_text_record(textbuf, length, + textbuf_size, + param->name, + param->value); + if (ret < 0) + goto err_overflow; SET_PSTATE_PROPOSER(param); iscsi_check_proposer_for_optional_reply(param, keys_workaround); @@ -1421,14 +1448,21 @@ int iscsi_encode_text_output( } list_for_each_entry(er, ¶m_list->extra_response_list, er_list) { - *length += sprintf(output_buf, "%s=%s", er->key, er->value); - *length += 1; - output_buf = textbuf + *length; + ret = iscsi_encode_text_record(textbuf, length, textbuf_size, + er->key, er->value); + if (ret < 0) + goto err_overflow; pr_debug("Sending key: %s=%s\n", er->key, er->value); } iscsi_release_extra_responses(param_list); return 0; + +err_overflow: + pr_err("iSCSI login response buffer (%u bytes) exhausted, dropping login.\n", + textbuf_size); + iscsi_release_extra_responses(param_list); + return -1; } int iscsi_check_negotiated_keys(struct iscsi_param_list *param_list) diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index c672a971fcb7..38d2238dfe08 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -43,7 +43,7 @@ extern struct iscsi_param *iscsi_find_param_from_key(char *, struct iscsi_param_ extern int iscsi_extract_key_value(char *, char **, char **); extern int iscsi_update_param_value(struct iscsi_param *, char *); extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsit_conn *); -extern int iscsi_encode_text_output(u8, u8, char *, u32 *, +extern int iscsi_encode_text_output(u8, u8, char *, u32 *, u32, struct iscsi_param_list *, bool); extern int iscsi_check_negotiated_keys(struct iscsi_param_list *); extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *, -- cgit v1.2.3 From 85db7391310b1304d2dc8ae3b0b12105a9567147 Mon Sep 17 00:00:00 2001 From: Alexandru Hossu Date: Thu, 21 May 2026 17:11:21 +0200 Subject: scsi: target: iscsi: Validate CHAP_R length before base64 decode chap_server_compute_hash() allocates client_digest as kzalloc(chap->digest_size) and then, for BASE64-encoded responses, passes chap_r directly to chap_base64_decode() without checking whether the input length could produce more than digest_size bytes of output. chap_base64_decode() writes to the destination unconditionally as long as there is input to consume. With MAX_RESPONSE_LENGTH set to 128 and the "0b" prefix stripped by extract_param(), up to 127 base64 characters can reach the decoder. 127 characters decode to 95 bytes. For SHA-256 (digest_size=32) this overflows client_digest by 63 bytes; for MD5 (digest_size=16) the overflow is 79 bytes. The length check at line 344 fires after the write has already happened. The HEX branch in the same switch statement already validates the length up front. Apply the same approach to the BASE64 branch: strip trailing base64 padding characters, then reject any input whose data length exceeds DIV_ROUND_UP(digest_size * 4, 3) before calling the decoder. Stripping trailing '=' before the comparison handles both padded and unpadded encodings. chap_base64_decode() already returns early on '=', so the full original string is still passed to the decoder unchanged. The mutual CHAP path decodes CHAP_C into initiatorchg_binhex, which is kzalloc(CHAP_CHALLENGE_STR_LEN). extract_param() caps initiatorchg at CHAP_CHALLENGE_STR_LEN characters, so at most CHAP_CHALLENGE_STR_LEN-1 base64 characters reach the decoder. The maximum decoded size, DIV_ROUND_UP((CHAP_CHALLENGE_STR_LEN-1) * 3, 4), is less than CHAP_CHALLENGE_STR_LEN, so no overflow is possible there. A comment is added at the call site to document this. Fixes: 1e5733883421 ("scsi: target: iscsi: Support base64 in CHAP") Cc: stable@vger.kernel.org Signed-off-by: Alexandru Hossu Reviewed-by: David Disseldorp Link: https://patch.msgid.link/20260521151121.808477-1-hossu.alexandru@gmail.com Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_auth.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index c46c69a28e97..a3ad2d244dbe 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -340,13 +340,22 @@ static int chap_server_compute_hash( goto out; } break; - case BASE64: + case BASE64: { + size_t r_len = strlen(chap_r); + + while (r_len > 0 && chap_r[r_len - 1] == '=') + r_len--; + if (r_len > DIV_ROUND_UP(chap->digest_size * 4, 3)) { + pr_err("Malformed CHAP_R: base64 payload too long\n"); + goto out; + } if (chap_base64_decode(client_digest, chap_r, strlen(chap_r)) != chap->digest_size) { pr_err("Malformed CHAP_R: invalid BASE64\n"); goto out; } break; + } default: pr_err("Could not find CHAP_R\n"); goto out; @@ -473,6 +482,14 @@ static int chap_server_compute_hash( } break; case BASE64: + /* + * No overflow check needed: initiatorchg_binhex is + * CHAP_CHALLENGE_STR_LEN bytes and extract_param() caps + * initiatorchg at CHAP_CHALLENGE_STR_LEN characters, so + * the decoded output is at most DIV_ROUND_UP( + * (CHAP_CHALLENGE_STR_LEN - 1) * 3, 4) bytes, which is + * less than CHAP_CHALLENGE_STR_LEN. + */ initiatorchg_len = chap_base64_decode(initiatorchg_binhex, initiatorchg, strlen(initiatorchg)); -- cgit v1.2.3 From 4085f0dbb1ce2251c9a5938d693de6593f0ab2bd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 22 May 2026 16:19:50 +0200 Subject: USB: serial: mxuport: fix memory corruption with small endpoint Make sure that the bulk-out endpoint max packet size is at least eight bytes to avoid user-controlled slab corruption should a malicious device report a smaller size. Fixes: ee467a1f2066 ("USB: serial: add Moxa UPORT 12XX/14XX/16XX driver") Cc: stable@vger.kernel.org # 3.14 Cc: Andrew Lunn Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/mxuport.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index ad5fdf55a02e..c9b9928c473a 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -962,6 +962,14 @@ static int mxuport_calc_num_ports(struct usb_serial *serial, */ BUILD_BUG_ON(ARRAY_SIZE(epds->bulk_out) < 16); + /* + * The bulk-out buffers must be large enough for the four-byte header + * (and following data), but assume anything smaller than eight bytes + * is broken. + */ + if (usb_endpoint_maxp(epds->bulk_out[0]) < 8) + return -EINVAL; + for (i = 1; i < num_ports; ++i) epds->bulk_out[i] = epds->bulk_out[0]; -- cgit v1.2.3 From 60df93d30f9bdd27db17c4d80ed80ef718d7226b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 22 May 2026 16:20:58 +0200 Subject: USB: serial: omninet: fix memory corruption with small endpoint Make sure that the bulk-out buffers are at least as large as the hardcoded transfer size to avoid user-controlled slab corruption should a malicious device report a smaller endpoint max packet size than expected. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/omninet.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index aa1e9745f967..b59982ed8b25 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -30,6 +30,10 @@ /* This one seems to be a re-branded ZyXEL device */ #define BT_IGNITIONPRO_ID 0x2000 +#define OMNINET_HEADERLEN 4 +#define OMNINET_BULKOUTSIZE 64 +#define OMNINET_PAYLOADSIZE (OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN) + /* function prototypes */ static void omninet_process_read_urb(struct urb *urb); static int omninet_prepare_write_buffer(struct usb_serial_port *port, @@ -54,6 +58,7 @@ static struct usb_serial_driver zyxel_omninet_device = { .description = "ZyXEL - omni.net usb", .id_table = id_table, .num_bulk_out = 2, + .bulk_out_size = OMNINET_BULKOUTSIZE, .calc_num_ports = omninet_calc_num_ports, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, @@ -130,10 +135,6 @@ static void omninet_port_remove(struct usb_serial_port *port) kfree(od); } -#define OMNINET_HEADERLEN 4 -#define OMNINET_BULKOUTSIZE 64 -#define OMNINET_PAYLOADSIZE (OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN) - static void omninet_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; -- cgit v1.2.3 From 438061ed1ad85e6743e2dce826671772d81089ec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 22 May 2026 16:22:18 +0200 Subject: USB: serial: safe_serial: fix memory corruption with small endpoint Make sure that the bulk-out buffer size is at least eight bytes to avoid user-controlled slab corruption in "safe" mode should a malicious device report a smaller size. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/safe_serial.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index 238b54993446..d267a31dcccf 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -259,6 +259,7 @@ static int safe_prepare_write_buffer(struct usb_serial_port *port, static int safe_startup(struct usb_serial *serial) { struct usb_interface_descriptor *desc; + int bulk_out_size; if (serial->dev->descriptor.bDeviceClass != CDC_DEVICE_CLASS) return -ENODEV; @@ -279,6 +280,16 @@ static int safe_startup(struct usb_serial *serial) default: return -EINVAL; } + + /* + * The bulk-out buffer needs to be large enough for the two-byte + * trailer in safe mode, but assume anything smaller than eight bytes + * is broken. + */ + bulk_out_size = serial->port[0]->bulk_out_size; + if (bulk_out_size > 0 && bulk_out_size < 8) + return -EINVAL; + return 0; } -- cgit v1.2.3 From 9f9bfc80c67f35a275820da7e83a35dface08281 Mon Sep 17 00:00:00 2001 From: Zhang Cen Date: Fri, 22 May 2026 22:54:42 +0800 Subject: USB: serial: cypress_m8: validate interrupt packet headers cypress_read_int_callback() parses the interrupt-in buffer according to the selected Cypress packet format. Format 1 has a two-byte status/count header and format 2 has a one-byte combined status/count header. The usb-serial core sizes the interrupt-in buffer from the endpoint descriptor's wMaxPacketSize, and successful interrupt transfers can complete short when URB_SHORT_NOT_OK is not set. Check that the completed packet contains the selected header before reading it. Malformed short reports are ignored and the interrupt URB is resubmitted through the existing retry path, preventing out-of-bounds header-byte reads. KASAN report as below: KASAN slab-out-of-bounds in cypress_read_int_callback+0x240/0x7f0 Read of size 1 Call trace: cypress_read_int_callback() (drivers/usb/serial/cypress_m8.c:1009) __usb_hcd_giveback_urb() dummy_timer() Fixes: 3416eaa1f8f8 ("USB: cypress_m8: Packet format is separate from characteristic size") Assisted-by: Codex:gpt-5.5 Signed-off-by: Zhang Cen Fixes: 3416eaa1f8f8 ("USB: cypress_m8: Packet format is separate from characteristic size") Cc: stable@vger.kernel.org # 2.6.26 [ johan: use constants in header length sanity checks ] Signed-off-by: Johan Hovold --- drivers/usb/serial/cypress_m8.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 0b8a4e9d7bc5..bcf302e88ca4 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -1025,8 +1025,8 @@ static void cypress_read_int_callback(struct urb *urb) char tty_flag = TTY_NORMAL; int bytes = 0; int result; - int i = 0; int status = urb->status; + int i; switch (status) { case 0: /* success */ @@ -1064,22 +1064,32 @@ static void cypress_read_int_callback(struct urb *urb) spin_lock_irqsave(&priv->lock, flags); result = urb->actual_length; + i = 0; switch (priv->pkt_fmt) { default: case packet_format_1: /* This is for the CY7C64013... */ + if (result < 2) + break; priv->current_status = data[0] & 0xF8; bytes = data[1] + 2; i = 2; break; case packet_format_2: /* This is for the CY7C63743... */ + if (result < 1) + break; priv->current_status = data[0] & 0xF8; bytes = (data[0] & 0x07) + 1; i = 1; break; } spin_unlock_irqrestore(&priv->lock, flags); + if (i == 0) { + dev_dbg(dev, "%s - short packet received: %d bytes\n", + __func__, result); + goto continue_read; + } if (result < bytes) { dev_dbg(dev, "%s - wrong packet size - received %d bytes but packet said %d bytes\n", -- cgit v1.2.3 From 2d42c7cf1a2dad694db0c518b0e004502859f11c Mon Sep 17 00:00:00 2001 From: Hisam Mehboob Date: Thu, 9 Apr 2026 21:40:22 +0500 Subject: KVM: selftests: elf: Include instead of is a glibc-internal header that explicitly states it should never be included directly: #error "Never use directly; include instead." Replace it with the correct public header which works on all C libraries including musl. Building KVM selftests with musl-gcc fails with: lib/elf.c:10:10: fatal error: bits/endian.h: No such file or directory Fixes: 6089ae0bd5e1 ("kvm: selftests: add sync_regs_test") Signed-off-by: Hisam Mehboob Message-ID: <20260409164020.1575176-4-hisamshar@gmail.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index b689c4df4a01..1924a9895834 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -7,7 +7,7 @@ #include "test_util.h" -#include +#include #include #include "kvm_util.h" -- cgit v1.2.3 From 86e2de10eb14446a49019791bd0674faa5fae088 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 May 2026 10:35:25 -0700 Subject: KVM: x86: Return the VM's configured APIC bus frequency when queried When KVM_CAP_X86_APIC_BUS_CYCLES_NS is queried on a specific VM, return the VM's configured APIC bus frequency, not KVM's default. Aside from the fact that returning the default frequency is blatantly wrong if userspace has changed the frequency, returning the configured frequency means userspace can blindly trust the result, e.g. when filling PV CPUID information that communicates the APIC bus frequency to the guest. Fixes: 6fef518594bc ("KVM: x86: Add a capability to configure bus frequency for APIC timer") Reported-by: David Woodhouse Closes: https://lore.kernel.org/all/ab84153e33fbe7c25667f595c56b310d4d5a93ef.camel@infradead.org Signed-off-by: Sean Christopherson Message-ID: <20260522173526.3539407-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a1b63c63d1a..c1a72d749084 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4876,7 +4876,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = tdp_enabled; break; case KVM_CAP_X86_APIC_BUS_CYCLES_NS: - r = APIC_BUS_CYCLE_NS_DEFAULT; + r = kvm ? kvm->arch.apic_bus_cycle_ns : APIC_BUS_CYCLE_NS_DEFAULT; break; case KVM_CAP_EXIT_HYPERCALL: r = KVM_EXIT_HYPERCALL_VALID_MASK; -- cgit v1.2.3 From d9c41dc531b0e8feb046ee3d31ce37657101b137 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 May 2026 10:35:26 -0700 Subject: KVM: selftests: Verify that KVM returns the configured APIC cycle length Add checks in the APIC bus clock test to verify that querying KVM_CAP_X86_APIC_BUS_CYCLES_NS on the VM after changing the frequency returns the VM's actual APIC cycle length, not KVM's default. For giggles, verify that KVM still returns its default frequency for the system-scoped check. Signed-off-by: Sean Christopherson Message-ID: <20260522173526.3539407-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86/apic_bus_clock_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index 404f0028e110..0c84c27ea584 100644 --- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -137,6 +137,10 @@ static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms, vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, NSEC_PER_SEC / apic_hz); + TEST_ASSERT_EQ(kvm_check_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS), 1); + TEST_ASSERT_EQ(vm_check_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS), + NSEC_PER_SEC / apic_hz); + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); vcpu_args_set(vcpu, 2, apic_hz, delay_ms); -- cgit v1.2.3 From 9a12fa5213cfc391e0eed63902d3be98f0913765 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 22 May 2026 12:00:14 +0800 Subject: KVM: SVM: Disable AVIC IPI virtualization on Hygon Family 18h (erratum #1235) Hygon Family 18h CPUs are derived from AMD Family 17h (Zen1) silicon and share the same erratum #1235: hardware may read a stale IsRunning=1 bit during ICR write emulation and silently fail to generate an AVIC_IPI_FAILURE_TARGET_NOT_RUNNING VM-Exit on the sending vCPU. The absence of the VM-Exit causes KVM to miss the required wakeup of blocking target vCPUs, leading to hung vCPUs and unbounded delays in guest execution. Extend the existing AMD Family 17h erratum #1235 workaround to also cover Hygon Family 18h. With IPI virtualization disabled, KVM never sets IsRunning=1 in the Physical ID table, so every non-self IPI generates a VM-Exit and is correctly emulated. Fixes: 8de4a1c8164e ("KVM: SVM: Disable (x2)AVIC IPI virtualization if CPU has erratum #1235") Cc: Signed-off-by: Tina Zhang Message-ID: <20260522040014.3380201-1-zhang_wei@open-hieco.net> --- arch/x86/kvm/svm/avic.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index adf211860949..993b551180fe 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1300,12 +1300,14 @@ bool __init avic_hardware_setup(void) } /* - * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2) - * due to erratum 1235, which results in missed VM-Exits on the sender - * and thus missed wake events for blocking vCPUs due to the CPU - * failing to see a software update to clear IsRunning. + * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and + * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235, + * which results in missed VM-Exits on the sender and thus missed wake + * events for blocking vCPUs due to the CPU failing to see a software + * update to clear IsRunning. */ - enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17; + if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) + enable_ipiv = false; amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); -- cgit v1.2.3 From 7dd62566e0d108d29034bcff8503b827f8763320 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 22 May 2026 23:53:36 +0200 Subject: libbpf: fix off-by-one in emit_signature_match jump offset The offset for the cleanup-label jump is computed before the MOV R7 instruction is emitted, but the JMP lands after it. Account for the extra insn in the offset calculation (-2 instead of -1). Drop the redundant self-loop in the else branch; gen->error = -ERANGE already marks the generation as failed. Fixes: fb2b0e290147 ("libbpf: Update light skeleton for signing") Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20260522215337.662271-2-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/gen_loader.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index cd5c2543f54d..9478b8f78f26 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -592,13 +592,12 @@ static void emit_signature_match(struct bpf_gen *gen) gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start; emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0)); - off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1; + off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 2; if (is_simm16(off)) { emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL)); emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off)); } else { gen->error = -ERANGE; - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1)); } } } -- cgit v1.2.3 From 53676e4d44d6b38c8a0d9bff331f170ae2e41bbe Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 18 May 2026 15:17:14 -0700 Subject: drm/msm: Restore second parameter name in purge() and evict() After commit 3392291fc509 ("drm/msm: Fix shrinker deadlock"), all supported versions of clang warn (or error with CONFIG_WERROR=y): drivers/gpu/drm/msm/msm_gem_shrinker.c:105:58: error: omitting the parameter name in a function definition is a C23 extension [-Werror,-Wc23-extensions] 105 | purge(struct drm_gem_object *obj, struct ww_acquire_ctx *) | ^ drivers/gpu/drm/msm/msm_gem_shrinker.c:117:58: error: omitting the parameter name in a function definition is a C23 extension [-Werror,-Wc23-extensions] 117 | evict(struct drm_gem_object *obj, struct ww_acquire_ctx *) | ^ 2 errors generated. With older but supported versions of GCC, this is an unconditional hard error: drivers/gpu/drm/msm/msm_gem_shrinker.c: In function 'purge': drivers/gpu/drm/msm/msm_gem_shrinker.c:105:35: error: parameter name omitted purge(struct drm_gem_object *obj, struct ww_acquire_ctx *) ^~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/msm/msm_gem_shrinker.c: In function 'evict': drivers/gpu/drm/msm/msm_gem_shrinker.c:117:35: error: parameter name omitted evict(struct drm_gem_object *obj, struct ww_acquire_ctx *) ^~~~~~~~~~~~~~~~~~~~~~~ Restore the parameter name to clear up the warnings, renaming it "unused" to make it clear it is only needed to satisfy the prototype of drm_gem_lru_scan(). Cc: stable@vger.kernel.org Fixes: 3392291fc509 ("drm/msm: Fix shrinker deadlock") Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- drivers/gpu/drm/msm/msm_gem_shrinker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index c8dda2b68cff..9d2788f79ace 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -102,7 +102,7 @@ out_unlock: } static bool -purge(struct drm_gem_object *obj, struct ww_acquire_ctx *) +purge(struct drm_gem_object *obj, struct ww_acquire_ctx *unused) { if (!is_purgeable(to_msm_bo(obj))) return false; @@ -114,7 +114,7 @@ purge(struct drm_gem_object *obj, struct ww_acquire_ctx *) } static bool -evict(struct drm_gem_object *obj, struct ww_acquire_ctx *) +evict(struct drm_gem_object *obj, struct ww_acquire_ctx *unused) { if (is_unevictable(to_msm_bo(obj))) return false; -- cgit v1.2.3 From a88c02915d9c6160cfc7ab1b26ed64b2993e2b94 Mon Sep 17 00:00:00 2001 From: Lim HyeonJun Date: Sun, 24 May 2026 20:08:53 +0900 Subject: io_uring/tctx: set ->io_uring before publishing the tctx node io_register_iowq_max_workers() walks ctx->tctx_list under ctx->tctx_lock and dereferences each node's task->io_uring without a NULL check: list_for_each_entry(node, &ctx->tctx_list, ctx_node) { tctx = node->task->io_uring; if (WARN_ON_ONCE(!tctx->io_wq)) continue; ... } __io_uring_add_tctx_node() installs the node into ctx->tctx_list (via io_tctx_install_node(), which does the list_add() under tctx_lock) and only assigns current->io_uring = tctx afterwards. A task doing its first io_uring operation on a shared ring therefore has a window in which its node is already visible on ctx->tctx_list while node->task->io_uring is still NULL. A concurrent IORING_REGISTER_IOWQ_MAX_WORKERS on the same ring reads that NULL and dereferences tctx->io_wq: KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] RIP: io_register_iowq_max_workers io_uring/register.c:423 Publish current->io_uring = tctx before installing the node, so any node visible on ctx->tctx_list always has a valid task->io_uring. Fixes: 7880174e1e5e ("io_uring/tctx: clean up __io_uring_add_tctx_node() error handling") Signed-off-by: Lim HyeonJun Link: https://patch.msgid.link/20260524110853.115634-1-shja0831@gmail.com Signed-off-by: Jens Axboe --- io_uring/tctx.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 6af62ca9baba..42b219b34aa8 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -139,12 +139,14 @@ static int io_tctx_install_node(struct io_ring_ctx *ctx, int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) { struct io_uring_task *tctx = current->io_uring; + bool new_tctx = false; int ret; if (unlikely(!tctx)) { tctx = io_uring_alloc_task_context(current, ctx); if (IS_ERR(tctx)) return PTR_ERR(tctx); + new_tctx = true; if (data_race(ctx->int_flags) & IO_RING_F_IOWQ_LIMITS_SET) { unsigned int limits[2]; @@ -168,13 +170,15 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) if (tctx->io_wq) io_wq_set_exit_on_idle(tctx->io_wq, false); - ret = io_tctx_install_node(ctx, tctx); - if (!ret) { + if (new_tctx) current->io_uring = tctx; + + ret = io_tctx_install_node(ctx, tctx); + if (!ret) return 0; - } - if (!current->io_uring) { err_free: + if (new_tctx) { + current->io_uring = NULL; if (tctx->io_wq) { io_wq_exit_start(tctx->io_wq); io_wq_put_and_exit(tctx->io_wq); -- cgit v1.2.3 From e7ae89a0c97ce2b68b0983cd01eda67cf373517d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 May 2026 13:48:06 -0700 Subject: Linux 7.1-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f59598d3a08..f056c921ea9c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3 From 98fb1c1bb11e29eb609b7200a25e136e05aa4498 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Thu, 21 May 2026 08:01:23 -0300 Subject: ALSA: firewire-motu: Protect register DSP event queue positions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The register DSP event queue is updated under parser->lock, but snd_motu_register_dsp_message_parser_count_event() reads pull_pos and push_pos without the lock. snd_motu_register_dsp_message_parser_copy_event() also reads both queue positions before taking the lock. Protect these accesses with parser->lock as well. This keeps the hwdep poll/read path consistent with the producer side and with the cached meter/parameter accessors. Fixes: 634ec0b2906e ("ALSA: firewire-motu: notify event for parameter change in register DSP model") Cc: stable@vger.kernel.org Signed-off-by: Cássio Gabriel Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260521-alsa-firewire-motu-event-locking-v1-1-708e1c2b5e56@gmail.com --- sound/firewire/motu/motu-register-dsp-message-parser.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/firewire/motu/motu-register-dsp-message-parser.c b/sound/firewire/motu/motu-register-dsp-message-parser.c index a8053e3ef065..4ec23e6880d9 100644 --- a/sound/firewire/motu/motu-register-dsp-message-parser.c +++ b/sound/firewire/motu/motu-register-dsp-message-parser.c @@ -386,6 +386,8 @@ unsigned int snd_motu_register_dsp_message_parser_count_event(struct snd_motu *m { struct msg_parser *parser = motu->message_parser; + guard(spinlock_irqsave)(&parser->lock); + if (parser->pull_pos > parser->push_pos) return EVENT_QUEUE_SIZE - parser->pull_pos + parser->push_pos; else @@ -395,13 +397,14 @@ unsigned int snd_motu_register_dsp_message_parser_count_event(struct snd_motu *m bool snd_motu_register_dsp_message_parser_copy_event(struct snd_motu *motu, u32 *event) { struct msg_parser *parser = motu->message_parser; - unsigned int pos = parser->pull_pos; - - if (pos == parser->push_pos) - return false; + unsigned int pos; guard(spinlock_irqsave)(&parser->lock); + if (parser->pull_pos == parser->push_pos) + return false; + + pos = parser->pull_pos; *event = parser->event_queue[pos]; ++pos; -- cgit v1.2.3 From 4e273bca232ec71bbf072f10f7d395a28e85e4e1 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Fri, 22 May 2026 14:07:42 +0800 Subject: ALSA: hda/realtek: Fix incorrect comment for ALC299_FIXUP_PREDATOR_SPK The comment for the pin configuration 0x21 in the fixup ALC299_FIXUP_PREDATOR_SPK states "use as headset mic, without its own jack detect", but the fixup name and the actual usage indicate that the pin is meant to be used as internal speaker. Correct the comment to avoid confusion. Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260522060742.1384390-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index f180d6a72021..53585e20c494 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -5458,7 +5458,7 @@ static const struct hda_fixup alc269_fixups[] = { [ALC299_FIXUP_PREDATOR_SPK] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - { 0x21, 0x90170150 }, /* use as headset mic, without its own jack detect */ + { 0x21, 0x90170150 }, /* use as internal speaker */ { } } }, -- cgit v1.2.3 From f7b1f71566ff9d5344a516fa745118cee924c8d0 Mon Sep 17 00:00:00 2001 From: Kris Kater Date: Fri, 22 May 2026 08:09:02 +0200 Subject: ALSA: hda/realtek: Add HDA_CODEC_QUIRK for Lenovo Yoga Slim 7 14AGP11 The BIOS on the Lenovo Yoga Slim 7 14AGP11 (AMD Ryzen AI / Kraken Point chassis; board LNVNB161216, product 83QS) programs the PCI subsystem ID of the HDA function as 17aa:0000. As a result no entry in alc269_fixup_tbl[] matches via SND_PCI_QUIRK, the fixup falls back to the generic auto-routing path, and the bass speaker pin is left mis-routed. Laptop speakers sound noticeably thin. The codec's own internal subsystem ID register reports 0x17aa394c correctly, so an HDA_CODEC_QUIRK entry (which matches on the codec SSID rather than on the PCI SSID) binds the chassis to the existing ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN fixup. This mirrors the same workaround already in place for the closely-related Yoga 7 2-in-1 14AKP10 and 16AKP10 entries earlier in the table. With this change the kernel log goes from ALC287: picked fixup for PCI SSID 17aa:0000 to ALC287: picked fixup alc287-yoga9-bass-spk-pin and speaker routing matches what the firmware intended. Verified by the reporter against the equivalent modprobe override (model=,alc287-yoga9-bass-spk-pin). Link: https://bugzilla.kernel.org/show_bug.cgi?id=221438 Signed-off-by: Kris Kater Link: https://patch.msgid.link/20260522060902.9423-1-kris@kater.nu Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 53585e20c494..9cc4463af05e 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7778,6 +7778,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3929, "Thinkbook 13x Gen 5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x392b, "Thinkbook 13x Gen 5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + HDA_CODEC_QUIRK(0x17aa, 0x394c, "Lenovo Yoga Slim 7 14AGP11", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), -- cgit v1.2.3 From a0d9e8df2ebca290c2efff70abc05426e5a476b0 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Fri, 22 May 2026 09:49:30 -0300 Subject: ALSA: hda: cs35l56: Fix system name string leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cs35l56_hda_read_acpi() gets an allocated ACPI _SUB string from acpi_get_subsystem_id(). On success, that string is used to create the firmware system name. Several error paths after the _SUB lookup can return without releasing the allocated string. This includes speaker ID lookup errors other than -ENOENT, and errors after a firmware system name has been allocated. Use scoped cleanup for the temporary _SUB string and make cs35l56->system_name device-managed. This releases the temporary _SUB string on every error path and lets devres release the firmware system name on probe failure and device removal. Fixes: 6f03b446cbae ("ALSA: hda: cs35l56: Add support for speaker id") Fixes: 40b1c2f9b299 ("ALSA: hda/cs35l56: Workaround bad dev-index on Lenovo Yoga Book 9i GenX") Signed-off-by: Cássio Gabriel Reviewed-by: Richard Fitzgerald Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20260522-alsa-cs35l56-system-name-leak-v4-1-a6154dd09cd9@gmail.com --- sound/hda/codecs/side-codecs/cs35l56_hda.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/hda/codecs/side-codecs/cs35l56_hda.c b/sound/hda/codecs/side-codecs/cs35l56_hda.c index cdbc576569ef..a0ea08eb96a9 100644 --- a/sound/hda/codecs/side-codecs/cs35l56_hda.c +++ b/sound/hda/codecs/side-codecs/cs35l56_hda.c @@ -1025,7 +1025,7 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) u32 values[HDA_MAX_COMPONENTS]; char hid_string[8]; struct acpi_device *adev; - const char *property, *sub; + const char *property; int i, ret; /* @@ -1047,7 +1047,8 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) /* Initialize things that could be overwritten by a fixup */ cs35l56->index = -1; - sub = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev)); + const char *sub __free(kfree) = acpi_get_subsystem_id(ACPI_HANDLE(cs35l56->base.dev)); + ret = cs35l56_hda_apply_platform_fixups(cs35l56, sub, &id); if (ret) return ret; @@ -1095,15 +1096,16 @@ static int cs35l56_hda_read_acpi(struct cs35l56_hda *cs35l56, int hid, int id) ret = cirrus_scodec_get_speaker_id(cs35l56->base.dev, cs35l56->index, cs35l56->num_amps, -1); if (ret == -ENOENT) { - cs35l56->system_name = sub; + cs35l56->system_name = devm_kstrdup(cs35l56->base.dev, sub, GFP_KERNEL); } else if (ret >= 0) { - cs35l56->system_name = kasprintf(GFP_KERNEL, "%s-spkid%d", sub, ret); - kfree(sub); - if (!cs35l56->system_name) - return -ENOMEM; + cs35l56->system_name = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, + "%s-spkid%d", sub, ret); } else { return ret; } + + if (!cs35l56->system_name) + return -ENOMEM; } cs35l56->base.reset_gpio = devm_gpiod_get_index_optional(cs35l56->base.dev, @@ -1254,7 +1256,6 @@ void cs35l56_hda_remove(struct device *dev) cs_dsp_remove(&cs35l56->cs_dsp); - kfree(cs35l56->system_name); pm_runtime_put_noidle(cs35l56->base.dev); gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 0); -- cgit v1.2.3 From 4cc54bdd54b337e77115be5b55577d1c58608eae Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Fri, 22 May 2026 22:09:40 -0300 Subject: ALSA: pcm: oss: Fix setup list UAF on proc write error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snd_pcm_oss_proc_write() links a newly allocated setup entry into the OSS setup list before duplicating the task name. If the task-name allocation fails, the error path frees the already linked entry and leaves setup_list pointing at freed memory. A later OSS device open can then walk the stale list entry in snd_pcm_oss_look_for_setup() and dereference freed memory. Allocate the task name and initialize the setup entry before publishing the entry on setup_list. Also fetch the initial proc read iterator only after taking setup_mutex, so all setup_list traversal follows the same list lifetime rules. Reported-by: syzbot+8e498074a794999eb41c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6a1062b7.170a0220.35b2b7.0003.GAE@google.com Closes: https://syzkaller.appspot.com/bug?extid=8e498074a794999eb41c Fixes: 060d77b9c04a ("[ALSA] Fix / clean up PCM-OSS setup hooks") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260522-alsa-pcm-oss-setup-uaf-v1-1-40bdcc4d17e8@gmail.com Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 33fd34f0d615..746eaf93e1a5 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2974,8 +2974,10 @@ static void snd_pcm_oss_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; - struct snd_pcm_oss_setup *setup = pstr->oss.setup_list; + struct snd_pcm_oss_setup *setup; + guard(mutex)(&pstr->oss.setup_mutex); + setup = pstr->oss.setup_list; while (setup) { snd_iprintf(buffer, "%s %u %u%s%s%s%s%s%s\n", setup->task_name, @@ -3060,6 +3062,13 @@ static void snd_pcm_oss_proc_write(struct snd_info_entry *entry, buffer->error = -ENOMEM; return; } + template.task_name = kstrdup(task_name, GFP_KERNEL); + if (!template.task_name) { + kfree(setup); + buffer->error = -ENOMEM; + return; + } + *setup = template; if (pstr->oss.setup_list == NULL) pstr->oss.setup_list = setup; else { @@ -3067,12 +3076,7 @@ static void snd_pcm_oss_proc_write(struct snd_info_entry *entry, setup1->next; setup1 = setup1->next); setup1->next = setup; } - template.task_name = kstrdup(task_name, GFP_KERNEL); - if (! template.task_name) { - kfree(setup); - buffer->error = -ENOMEM; - return; - } + continue; } *setup = template; } -- cgit v1.2.3 From db37cf47b67e38ade40de5cd74a4d4d772ff1416 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Sun, 24 May 2026 06:34:14 +0930 Subject: ALSA: scarlett2: Fix 2i2 Gen 4 direct monitor gain on firmware 2417 Firmware 2417 for the Scarlett 4th Gen 2i2 moved the direct monitor gain parameter by 4 bytes, from offset 0x2a0 to 0x2a4, breaking the "Direct Monitor X Mix Y" controls. Special-case the offset in the get/set config helpers when the running firmware is 2417 or later. Fixes: 4e809a299677 ("ALSA: scarlett2: Add support for Solo, 2i2, and 4i4 Gen 4") Cc: Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/ahIWTueUlWA5xiV+@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett2.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c index 8e80a7165faf..a4fac4652201 100644 --- a/sound/usb/mixer_scarlett2.c +++ b/sound/usb/mixer_scarlett2.c @@ -2504,6 +2504,27 @@ static int scarlett2_has_config_item( return !!private->config_set->items[config_item_num].offset; } +/* Return the configuration item's offset, applying any per-firmware + * overrides. + * + * Firmware 2417 for the 2i2 Gen 4 moved DIRECT_MONITOR_GAIN by 4 + * bytes. Apply that shift here so that the rest of the driver can + * keep using the single config set. This override can be removed + * once the multi-config-set framework lands. + */ +static int scarlett2_config_item_offset( + struct scarlett2_data *private, int config_item_num) +{ + int offset = private->config_set->items[config_item_num].offset; + + if (config_item_num == SCARLETT2_CONFIG_DIRECT_MONITOR_GAIN && + private->info == &s2i2_gen4_info && + private->firmware_version >= 2417) + offset = 0x2a4; + + return offset; +} + /* Send a USB message to get configuration parameters; result placed in *buf */ static int scarlett2_usb_get_config( struct usb_mixer_interface *mixer, @@ -2513,6 +2534,7 @@ static int scarlett2_usb_get_config( const struct scarlett2_config *config_item = &private->config_set->items[config_item_num]; int size, err, i; + int item_offset; u8 *buf_8; u8 value; @@ -2522,13 +2544,15 @@ static int scarlett2_usb_get_config( if (!config_item->offset) return -EFAULT; + item_offset = scarlett2_config_item_offset(private, config_item_num); + /* Writes to the parameter buffer are always 1 byte */ size = config_item->size ? config_item->size : 8; /* For byte-sized parameters, retrieve directly into buf */ if (size >= 8) { size = size / 8 * count; - err = scarlett2_usb_get(mixer, config_item->offset, buf, size); + err = scarlett2_usb_get(mixer, item_offset, buf, size); if (err < 0) return err; if (config_item->size == 16) { @@ -2546,7 +2570,7 @@ static int scarlett2_usb_get_config( } /* For bit-sized parameters, retrieve into value */ - err = scarlett2_usb_get(mixer, config_item->offset, &value, 1); + err = scarlett2_usb_get(mixer, item_offset, &value, 1); if (err < 0) return err; @@ -2696,7 +2720,8 @@ static int scarlett2_usb_set_config( */ if (config_item->size >= 8) { size = config_item->size / 8; - offset = config_item->offset + index * size; + offset = scarlett2_config_item_offset(private, config_item_num) + + index * size; /* If updating a bit, retrieve the old value, set/clear the * bit as needed, and update value @@ -2705,7 +2730,7 @@ static int scarlett2_usb_set_config( u8 tmp; size = 1; - offset = config_item->offset; + offset = scarlett2_config_item_offset(private, config_item_num); err = scarlett2_usb_get(mixer, offset, &tmp, 1); if (err < 0) -- cgit v1.2.3 From 7740c6cedb2599d8e4534574a0a72042ba63c1c7 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Sun, 24 May 2026 15:53:24 -0300 Subject: ALSA: hda/realtek: Limit mic boost on Positivo DN140 The internal mic boost on the Positivo DN140 is too high. Fix this by applying the ALC269_FIXUP_LIMIT_INT_MIC_BOOST fixup to the machine to limit the gain. Signed-off-by: Edson Juliano Drosdeck Link: https://patch.msgid.link/20260524185324.28959-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 9cc4463af05e..b4c9a5584a04 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7846,6 +7846,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1e39, 0xca14, "MEDION NM14LNL", ALC233_FIXUP_MEDION_MTL_SPK), SND_PCI_QUIRK(0x1e50, 0x7007, "Positivo DN50E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1e50, 0x7038, "Positivo DN140", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1ee7, 0x2078, "HONOR BRB-X M1010", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1ee7, 0x2081, "HONOR MRB-XXX M1020", ALC256_FIXUP_HONOR_MRB_XXX_M1020_AUDIO), SND_PCI_QUIRK(0x1f4c, 0xe001, "Minisforum V3 (SE)", ALC245_FIXUP_BASS_HP_DAC), -- cgit v1.2.3 From fe80251152fed5b185f795ef2cd9f7fe9c3162e0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 22 May 2026 16:49:44 +0200 Subject: ACPI: button: Fix ACPI GPE handler leak during removal Commit a7e23ec17fee ("ACPI: button: Install notifier for system events as well") changed the ACPI notify handler type for ACPI buttons to ACPI_ALL_NOTIFY, but it forgot to update acpi_button_remove() to reflect that change. This leads to leaking the notify handler past driver removal, which may cause a kernel crash to occur if ACPI notify on the given device is triggered after removing the driver, and causes a subsequent probe of the given device with the same driver to fail. Address this by updating the acpi_remove_notify_handler() call in acpi_button_remove() as appropriate. Fixes: a7e23ec17fee ("ACPI: button: Install notifier for system events as well") Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) Cc: 6.15+ # 6.15+ Link: https://patch.msgid.link/7954431.EvYhyI6sBW@rafael.j.wysocki --- drivers/acpi/button.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index b47301ee4c8a..7c2e1a422ba0 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -689,7 +689,7 @@ static void acpi_button_remove(struct platform_device *pdev) acpi_button_event); break; default: - acpi_remove_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, + acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY, button->type == ACPI_BUTTON_TYPE_LID ? acpi_lid_notify : acpi_button_notify); -- cgit v1.2.3 From a004b8f0d3bc5d82d3f2c91ff93f4b4b7ccb8f76 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 22 May 2026 16:52:10 +0200 Subject: ACPI: button: Enable wakeup GPEs for ACPI buttons at probe time Prior to commit 57c31e6d620f ("ACPI: scan: Use acpi_setup_gpe_for_wake() for buttons"), ACPI button wakeup GPEs having handler methods remained enabled after acpi_wakeup_gpe_init(), but currently they are not enabled because acpi_setup_gpe_for_wake() disables them. That causes function keys to stop working on some systems [1] and there may be other related issues elsewhere. To address that, make the ACPI button driver enable wakeup GPEs for ACPI buttons so long as they have handler methods. While this does not restore the old behavior exactly (the ACPI button driver needs to be bound to the button devices for the GPEs to be enabled), it should be sufficient to restore the missing functionality. For this purpose, introduce acpi_enable_gpe_cond() that enables a GPE if its dispatch type matches the supplied one and modify acpi_button_probe() to use that function for enabling the GPEs in question. Fixes: 57c31e6d620f ("ACPI: scan: Use acpi_setup_gpe_for_wake() for buttons") Reported-by: Nick Closes: https://lore.kernel.org/linux-acpi/E2OXET.4X5GTP37VTNC3@kousu.ca/ [1] Signed-off-by: Rafael J. Wysocki Tested-by: Nick Cc: 7.0+ # 7.0+ Link: https://patch.msgid.link/9629117.CDJkKcVGEf@rafael.j.wysocki --- drivers/acpi/acpica/evxfgpe.c | 50 +++++++++++++++++++++++++++++++++++-------- drivers/acpi/button.c | 22 +++++++++++++++++++ include/acpi/acpixf.h | 5 +++++ 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c index 60dacec1b121..4074b5908db3 100644 --- a/drivers/acpi/acpica/evxfgpe.c +++ b/drivers/acpi/acpica/evxfgpe.c @@ -78,18 +78,22 @@ ACPI_EXPORT_SYMBOL(acpi_update_all_gpes) /******************************************************************************* * - * FUNCTION: acpi_enable_gpe + * FUNCTION: acpi_enable_gpe_cond * * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1 * gpe_number - GPE level within the GPE block + * dispatch_type - GPE dispatch type to match * * RETURN: Status * - * DESCRIPTION: Add a reference to a GPE. On the first reference, the GPE is - * hardware-enabled. + * DESCRIPTION: Add a reference to a GPE so long as its dispatch type matches + * the supplied one, or it is different from ACPI_GPE_DISPATCH_NONE + * if the supplied one is ACPI_GPE_DISPATCH_MASK. On the first + * reference, the GPE is hardware-enabled. * ******************************************************************************/ -acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number) +acpi_status acpi_enable_gpe_cond(acpi_handle gpe_device, u32 gpe_number, + u8 dispatch_type) { acpi_status status = AE_BAD_PARAMETER; struct acpi_gpe_event_info *gpe_event_info; @@ -100,14 +104,18 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number) flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock); /* - * Ensure that we have a valid GPE number and that there is some way - * of handling the GPE (handler or a GPE method). In other words, we - * won't allow a valid GPE to be enabled if there is no way to handle it. + * Ensure that we have a valid GPE number and that the dispatch type of + * the GPE matches the supplied one (or it is not ACPI_GPE_DISPATCH_NONE + * if the supplied one is ACPI_GPE_DISPATCH_MASK). */ gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number); if (gpe_event_info) { - if (ACPI_GPE_DISPATCH_TYPE(gpe_event_info->flags) != - ACPI_GPE_DISPATCH_NONE) { + if (dispatch_type == ACPI_GPE_DISPATCH_MASK) + dispatch_type = ACPI_GPE_DISPATCH_TYPE(gpe_event_info->flags); + else if (dispatch_type != ACPI_GPE_DISPATCH_TYPE(gpe_event_info->flags)) + dispatch_type = ACPI_GPE_DISPATCH_NONE; + + if (dispatch_type != ACPI_GPE_DISPATCH_NONE) { status = acpi_ev_add_gpe_reference(gpe_event_info, TRUE); if (ACPI_SUCCESS(status) && ACPI_GPE_IS_POLLING_NEEDED(gpe_event_info)) { @@ -128,6 +136,30 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number) acpi_os_release_lock(acpi_gbl_gpe_lock, flags); return_ACPI_STATUS(status); } +ACPI_EXPORT_SYMBOL(acpi_enable_gpe_cond) + +/******************************************************************************* + * + * FUNCTION: acpi_enable_gpe + * + * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1 + * gpe_number - GPE level within the GPE block + * + * RETURN: Status + * + * DESCRIPTION: Add a reference to a GPE. On the first reference, the GPE is + * hardware-enabled. + * + ******************************************************************************/ +acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number) +{ + /* + * Ensure that there is some way of handling the GPE (handler or a GPE + * method). In other words, we won't allow a valid GPE to be enabled if + * there is no way to handle it. + */ + return acpi_enable_gpe_cond(gpe_device, gpe_number, ACPI_GPE_DISPATCH_MASK); +} ACPI_EXPORT_SYMBOL(acpi_enable_gpe) /******************************************************************************* diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 7c2e1a422ba0..e8dd306e17ed 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -179,6 +179,7 @@ struct acpi_button { ktime_t last_time; bool suspended; bool lid_state_initialized; + bool gpe_enabled; }; static struct acpi_device *lid_device; @@ -646,6 +647,21 @@ static int acpi_button_probe(struct platform_device *pdev) status = acpi_install_notify_handler(device->handle, ACPI_ALL_NOTIFY, handler, button); + if (ACPI_SUCCESS(status) && device->wakeup.flags.valid) { + acpi_status st; + + /* + * If the wakeup GPE has a handler method, enable it in + * case it is also used for signaling runtime events. + */ + st = acpi_enable_gpe_cond(device->wakeup.gpe_device, + device->wakeup.gpe_number, + ACPI_GPE_DISPATCH_METHOD); + button->gpe_enabled = ACPI_SUCCESS(st); + if (button->gpe_enabled) + dev_dbg(button->dev, "Enabled ACPI GPE%02llx\n", + device->wakeup.gpe_number); + } break; } if (ACPI_FAILURE(status)) { @@ -689,6 +705,12 @@ static void acpi_button_remove(struct platform_device *pdev) acpi_button_event); break; default: + if (button->gpe_enabled) { + dev_dbg(button->dev, "Disabling ACPI GPE%02llx\n", + adev->wakeup.gpe_number); + acpi_disable_gpe(adev->wakeup.gpe_device, + adev->wakeup.gpe_number); + } acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY, button->type == ACPI_BUTTON_TYPE_LID ? acpi_lid_notify : diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 49d1749f30bb..a4b562700151 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -725,6 +725,11 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status */ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_update_all_gpes(void)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_enable_gpe_cond(acpi_handle gpe_device, + u32 gpe_number, + u8 dispatch_type)) + ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number)) -- cgit v1.2.3 From 3109f9f38800841e46769e95e1ba11f1f8c7b230 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 22 May 2026 16:53:48 +0200 Subject: ACPI: button: Add missing device class clearing on probe failures Commit e18947038bf4 ("ACPI: driver: Do not set acpi_device_class() unnecessarily") modified acpi_button_remove() to clear the device class field in struct acpi_device on driver removal, but it should also have updated the rollback path in acpi_button_probe(), which it didn't do, so do it now. Fixes: e18947038bf4 ("ACPI: driver: Do not set acpi_device_class() unnecessarily") Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) Link: https://patch.msgid.link/6167713.MhkbZ0Pkbq@rafael.j.wysocki --- drivers/acpi/button.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index e8dd306e17ed..d80276368b81 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -687,6 +687,7 @@ err_remove_fs: acpi_button_remove_fs(button); err_free_button: kfree(button); + memset(acpi_device_class(device), 0, sizeof(acpi_device_class)); return error; } -- cgit v1.2.3 From 13d33b9ef67066c77c84273fac5a1d3fde3533d1 Mon Sep 17 00:00:00 2001 From: Berkant Koc Date: Tue, 19 May 2026 22:08:17 +0200 Subject: drm/hyperv: validate resolution_count and fix WIN8 fallback A SYNTHVID_RESOLUTION_RESPONSE with resolution_count > 64 walks past the supported_resolution[SYNTHVID_MAX_RESOLUTION_COUNT] array in the parse loop. Bound resolution_count against the array size, folded into the existing zero-check. When the WIN10 resolution probe fails, the caller in hyperv_connect_vsp() left hv->screen_*_max / preferred_* unpopulated, which sets mode_config.max_width / max_height to 0 and makes drm_internal_framebuffer_create() reject every userspace framebuffer with -EINVAL. The pre-WIN10 branch had the same gap for preferred_width / preferred_height. Use a single post-probe fallback guarded by screen_width_max == 0 so both paths converge on the WIN8 defaults. Signed-off-by: Berkant Koc Assisted-by: Claude:claude-opus-4-7 berkoc-pipeline Fixes: 76c56a5affeb ("drm/hyperv: Add DRM driver for hyperv synthetic video device") Cc: stable@vger.kernel.org # 5.14+ Reviewed-by: Michael Kelley Tested-by: Michael Kelley Signed-off-by: Hamza Mahfooz Link: https://patch.msgid.link/6945b22419c7d404b4954a113de2ac9c900dba93.1779542874.git.me@berkoc.com --- drivers/gpu/drm/hyperv/hyperv_drm_proto.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_proto.c b/drivers/gpu/drm/hyperv/hyperv_drm_proto.c index 051ecc526832..c3d0ff229e3d 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_proto.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_proto.c @@ -391,8 +391,11 @@ static int hyperv_get_supported_resolution(struct hv_device *hdev) return -ETIMEDOUT; } - if (msg->resolution_resp.resolution_count == 0) { - drm_err(dev, "No supported resolutions\n"); + if (msg->resolution_resp.resolution_count == 0 || + msg->resolution_resp.resolution_count > + SYNTHVID_MAX_RESOLUTION_COUNT) { + drm_err(dev, "Invalid resolution count: %d\n", + msg->resolution_resp.resolution_count); return -ENODEV; } @@ -508,9 +511,13 @@ int hyperv_connect_vsp(struct hv_device *hdev) ret = hyperv_get_supported_resolution(hdev); if (ret) drm_err(dev, "Failed to get supported resolution from host, use default\n"); - } else { + } + + if (!hv->screen_width_max) { hv->screen_width_max = SYNTHVID_WIDTH_WIN8; hv->screen_height_max = SYNTHVID_HEIGHT_WIN8; + hv->preferred_width = SYNTHVID_WIDTH_WIN8; + hv->preferred_height = SYNTHVID_HEIGHT_WIN8; } hv->mmio_megabytes = hdev->channel->offermsg.offer.mmio_megabytes; -- cgit v1.2.3 From 7f87763f47a3c22fb50265a00619ef10f2394b18 Mon Sep 17 00:00:00 2001 From: Berkant Koc Date: Sat, 23 May 2026 15:27:47 +0200 Subject: drm/hyperv: validate VMBus packet size in receive callback hyperv_receive_sub() reads msg->vid_hdr.type and dispatches into one of four message-type branches without knowing how many bytes the host wrote into hv->recv_buf. The completion path then runs memcpy(hv->init_buf, msg, VMBUS_MAX_PACKET_SIZE), so the consumer that wakes on wait_for_completion_timeout() can read up to 16 KiB of residue from a prior message as if it were the response payload. Pass bytes_recvd into hyperv_receive_sub() and reject any packet that does not cover the pipe + synthvid header. A single switch on msg->vid_hdr.type then computes the type-specific payload size: the three completion-driving types (SYNTHVID_VERSION_RESPONSE, SYNTHVID_RESOLUTION_RESPONSE, SYNTHVID_VRAM_LOCATION_ACK) fall through to a shared exit that requires that size before memcpy/complete, while SYNTHVID_FEATURE_CHANGE validates its own payload and returns before reading is_dirt_needed. Unknown types are dropped. SYNTHVID_RESOLUTION_RESPONSE is variable length: the host fills resolution_count entries, not the full SYNTHVID_MAX_RESOLUTION_COUNT array. Validate the fixed prefix first so resolution_count can be read, bound it against the array, then require only the count-sized array, so the shorter responses the host actually sends are accepted. Only run the sub-handler when vmbus_recvpacket() returned success. The memcpy length is bytes_recvd, which is bounded by VMBUS_MAX_PACKET_SIZE only on a successful receive; on -ENOBUFS vmbus_recvpacket() instead reports the required length, which can exceed hv->recv_buf, so copying bytes_recvd would read and write past the 16 KiB buffers. Gating on the success return keeps the copy bounded. The nonzero-return path is itself a malformed-message case and is now logged rather than silently skipped; channel recovery is not attempted. Rejected packets are reported via drm_err_ratelimited() rather than silently dropped, matching the CoCo-hardened pattern in hv_kvp_onchannelcallback(). Fixes: 76c56a5affeb ("drm/hyperv: Add DRM driver for hyperv synthetic video device") Cc: stable@vger.kernel.org # 5.14+ Signed-off-by: Berkant Koc Assisted-by: Claude:claude-opus-4-7 berkoc-pipeline Reviewed-by: Michael Kelley Tested-by: Michael Kelley Signed-off-by: Hamza Mahfooz Link: https://patch.msgid.link/8200dbc199c7a9b75ac7e8af6c748d2189b5ebd5.1779542874.git.me@berkoc.com --- drivers/gpu/drm/hyperv/hyperv_drm_proto.c | 100 ++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_proto.c b/drivers/gpu/drm/hyperv/hyperv_drm_proto.c index c3d0ff229e3d..4e6f703a1b33 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_proto.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_proto.c @@ -420,30 +420,92 @@ static int hyperv_get_supported_resolution(struct hv_device *hdev) return 0; } -static void hyperv_receive_sub(struct hv_device *hdev) +static void hyperv_receive_sub(struct hv_device *hdev, u32 bytes_recvd) { struct hyperv_drm_device *hv = hv_get_drvdata(hdev); struct synthvid_msg *msg; + size_t hdr_size; + size_t need; if (!hv) return; - msg = (struct synthvid_msg *)hv->recv_buf; - - /* Complete the wait event */ - if (msg->vid_hdr.type == SYNTHVID_VERSION_RESPONSE || - msg->vid_hdr.type == SYNTHVID_RESOLUTION_RESPONSE || - msg->vid_hdr.type == SYNTHVID_VRAM_LOCATION_ACK) { - memcpy(hv->init_buf, msg, VMBUS_MAX_PACKET_SIZE); - complete(&hv->wait); + hdr_size = sizeof(struct pipe_msg_hdr) + + sizeof(struct synthvid_msg_hdr); + if (bytes_recvd < hdr_size) { + drm_err_ratelimited(&hv->dev, + "synthvid packet too small for header: %u\n", + bytes_recvd); return; } - if (msg->vid_hdr.type == SYNTHVID_FEATURE_CHANGE) { + msg = (struct synthvid_msg *)hv->recv_buf; + need = hdr_size; + + switch (msg->vid_hdr.type) { + case SYNTHVID_VERSION_RESPONSE: + need += sizeof(struct synthvid_version_resp); + break; + case SYNTHVID_RESOLUTION_RESPONSE: + /* + * The resolution response is variable length: the host + * fills resolution_count entries, not the full + * SYNTHVID_MAX_RESOLUTION_COUNT array. Require the fixed + * prefix first so resolution_count can be read, then + * demand exactly the count-sized array. + */ + need += offsetof(struct synthvid_supported_resolution_resp, + supported_resolution); + if (bytes_recvd < need) + break; + if (msg->resolution_resp.resolution_count > + SYNTHVID_MAX_RESOLUTION_COUNT) { + drm_err_ratelimited(&hv->dev, + "synthvid resolution count too large: %u\n", + msg->resolution_resp.resolution_count); + return; + } + need += msg->resolution_resp.resolution_count * + sizeof(struct hvd_screen_info); + break; + case SYNTHVID_VRAM_LOCATION_ACK: + need += sizeof(struct synthvid_vram_location_ack); + break; + case SYNTHVID_FEATURE_CHANGE: + /* + * Not a completion-driving message: validate its own payload + * and consume it here rather than falling through to the + * memcpy/complete shared by the wait-event responses. + */ + if (bytes_recvd < need + + sizeof(struct synthvid_feature_change)) { + drm_err_ratelimited(&hv->dev, + "synthvid feature change packet too small: %u\n", + bytes_recvd); + return; + } hv->dirt_needed = msg->feature_chg.is_dirt_needed; if (hv->dirt_needed) hyperv_hide_hw_ptr(hv->hdev); + return; + default: + return; + } + + /* + * Shared completion path for the wait-event responses + * (VERSION_RESPONSE, RESOLUTION_RESPONSE, VRAM_LOCATION_ACK): + * require the type-specific payload before handing the buffer to + * the waiter. + */ + if (bytes_recvd < need) { + drm_err_ratelimited(&hv->dev, + "synthvid packet too small for type %u: %u < %zu\n", + msg->vid_hdr.type, bytes_recvd, need); + return; } + memcpy(hv->init_buf, msg, bytes_recvd); + complete(&hv->wait); } static void hyperv_receive(void *ctx) @@ -464,9 +526,21 @@ static void hyperv_receive(void *ctx) ret = vmbus_recvpacket(hdev->channel, recv_buf, VMBUS_MAX_PACKET_SIZE, &bytes_recvd, &req_id); - if (bytes_recvd > 0 && - recv_buf->pipe_hdr.type == PIPE_MSG_DATA) - hyperv_receive_sub(hdev); + if (ret) { + /* + * A nonzero return (e.g. -ENOBUFS for an oversized + * packet) is itself a malformed message: bytes_recvd + * then reports the required length rather than a copied + * payload, so it must not be forwarded to the + * sub-handler. Channel recovery is not attempted. + */ + drm_err_ratelimited(&hv->dev, + "vmbus_recvpacket failed: %d (need %u)\n", + ret, bytes_recvd); + } else if (bytes_recvd > 0 && + recv_buf->pipe_hdr.type == PIPE_MSG_DATA) { + hyperv_receive_sub(hdev, bytes_recvd); + } } while (bytes_recvd > 0 && ret == 0); } -- cgit v1.2.3 From afb2a3a9d8369d18122a0d7cd294eba9a98259c6 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Tue, 19 May 2026 13:51:47 -0300 Subject: ASoC: Intel: bytcht_es8316: Fix MCLK leak on init errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit byt_cht_es8316_init() enables MCLK before configuring the codec sysclk and creating the headset jack. If either of those later steps fails, the function returns without disabling MCLK, leaving the clock enabled after card registration fails. Track whether this driver enabled MCLK and disable it on the init error paths. Add the matching DAI link exit callback so the same clock enable is also balanced when ASoC cleans up a successfully initialized link. Fixes: a03bdaa565cb ("ASoC: Intel: add machine driver for BYT/CHT + ES8316") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260519-asoc-bytcht-es8316-mclk-leak-v1-1-b4a11cdc2afd@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 192e2a394ff3..ea387dc74273 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -40,6 +40,7 @@ struct byt_cht_es8316_private { struct gpio_desc *speaker_en_gpio; struct device *codec_dev; bool speaker_en; + bool mclk_enabled; }; enum { @@ -170,6 +171,15 @@ static struct snd_soc_jack_pin byt_cht_es8316_jack_pins[] = { }, }; +static void byt_cht_es8316_disable_mclk(struct byt_cht_es8316_private *priv) +{ + if (!priv->mclk_enabled) + return; + + clk_disable_unprepare(priv->mclk); + priv->mclk_enabled = false; +} + static int byt_cht_es8316_init(struct snd_soc_pcm_runtime *runtime) { struct snd_soc_component *codec = snd_soc_rtd_to_codec(runtime, 0)->component; @@ -227,12 +237,14 @@ static int byt_cht_es8316_init(struct snd_soc_pcm_runtime *runtime) ret = clk_prepare_enable(priv->mclk); if (ret) dev_err(card->dev, "unable to enable MCLK\n"); + else + priv->mclk_enabled = true; ret = snd_soc_dai_set_sysclk(snd_soc_rtd_to_codec(runtime, 0), 0, 19200000, SND_SOC_CLOCK_IN); if (ret < 0) { dev_err(card->dev, "can't set codec clock %d\n", ret); - return ret; + goto err_disable_mclk; } ret = snd_soc_card_jack_new_pins(card, "Headset", @@ -241,13 +253,25 @@ static int byt_cht_es8316_init(struct snd_soc_pcm_runtime *runtime) ARRAY_SIZE(byt_cht_es8316_jack_pins)); if (ret) { dev_err(card->dev, "jack creation failed %d\n", ret); - return ret; + goto err_disable_mclk; } snd_jack_set_key(priv->jack.jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); snd_soc_component_set_jack(codec, &priv->jack, NULL); return 0; + +err_disable_mclk: + byt_cht_es8316_disable_mclk(priv); + return ret; +} + +static void byt_cht_es8316_exit(struct snd_soc_pcm_runtime *runtime) +{ + struct snd_soc_card *card = runtime->card; + struct byt_cht_es8316_private *priv = snd_soc_card_get_drvdata(card); + + byt_cht_es8316_disable_mclk(priv); } static int byt_cht_es8316_codec_fixup(struct snd_soc_pcm_runtime *rtd, @@ -353,6 +377,7 @@ static struct snd_soc_dai_link byt_cht_es8316_dais[] = { | SND_SOC_DAIFMT_CBC_CFC, .be_hw_params_fixup = byt_cht_es8316_codec_fixup, .init = byt_cht_es8316_init, + .exit = byt_cht_es8316_exit, SND_SOC_DAILINK_REG(ssp2_port, ssp2_codec, platform), }, }; -- cgit v1.2.3 From cee3e63e7106c3c81b2053371fdf14240bfba2fc Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 18 May 2026 09:23:43 +0000 Subject: ASoC: qcom: q6asm-dai: do not set stream state in event and trigger callbacks The q6asm-dai stream state is used by prepare() to decide whether an existing stream setup needs to be closed before opening/configuring a new one. Updating the state from trigger or asynchronous DSP callbacks can make that state stale or incorrect relative to the actual setup lifetime. In particular, setting Q6ASM_STREAM_STOPPED on STOP or EOS completion can make prepare() believe there is no active setup to close, which can result in opening/configuring the same stream more than once. Keep stream state updates tied to prepare(), where the stream is actually closed and reopened, and stop changing it from trigger and EOS callbacks. Fixes: bfbb12dfa144 ("ASoC: qcom: q6asm-dai: perform correct state check before closing") Cc: Stable@vger.kernel.org Closes: https://lore.kernel.org/all/afS7rTHdc9TyIeLx@rdacayan/ Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260518092347.3446946-2-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 4f8f7db6c3d3..56f0d8913904 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -186,7 +186,6 @@ static void event_handler(uint32_t opcode, uint32_t token, case ASM_CLIENT_EVENT_CMD_RUN_DONE: break; case ASM_CLIENT_EVENT_CMD_EOS_DONE: - prtd->state = Q6ASM_STREAM_STOPPED; break; case ASM_CLIENT_EVENT_DATA_WRITE_DONE: { snd_pcm_period_elapsed(substream); @@ -341,7 +340,6 @@ static int q6asm_dai_trigger(struct snd_soc_component *component, 0, 0, 0); break; case SNDRV_PCM_TRIGGER_STOP: - prtd->state = Q6ASM_STREAM_STOPPED; ret = q6asm_cmd_nowait(prtd->audio_client, prtd->stream_id, CMD_EOS); break; @@ -555,8 +553,6 @@ static void compress_event_handler(uint32_t opcode, uint32_t token, snd_compr_drain_notify(prtd->cstream); prtd->notify_on_drain = false; - } else { - prtd->state = Q6ASM_STREAM_STOPPED; } break; @@ -1014,7 +1010,6 @@ static int q6asm_dai_compr_trigger(struct snd_soc_component *component, 0, 0, 0); break; case SNDRV_PCM_TRIGGER_STOP: - prtd->state = Q6ASM_STREAM_STOPPED; ret = q6asm_cmd_nowait(prtd->audio_client, prtd->stream_id, CMD_EOS); break; -- cgit v1.2.3 From 048c540ee76ded666bda74f9dae1ca3254e0633c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 18 May 2026 09:23:44 +0000 Subject: ASoC: qcom: q6asm-dai: close stream only when running q6asm_dai_close() and q6asm_dai_compr_free() currently issue CMD_CLOSE whenever prtd->state is non-zero. After prepare() closes an existing stream, the state is updated to Q6ASM_STREAM_STOPPED. Since this state is also non-zero, the close and free paths can send CMD_CLOSE again for a stream that has already been closed. Restrict CMD_CLOSE to the Q6ASM_STREAM_RUNNING state so the command is sent only when the ASM stream is still active. Fixes: 2a9e92d371db ("ASoC: qdsp6: q6asm: Add q6asm dai driver") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260518092347.3446946-3-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 56f0d8913904..ef86b5b9a951 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -455,12 +455,12 @@ static int q6asm_dai_close(struct snd_soc_component *component, struct q6asm_dai_rtd *prtd = runtime->private_data; if (prtd->audio_client) { - if (prtd->state) + if (prtd->state == Q6ASM_STREAM_RUNNING) { q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); - - q6asm_unmap_memory_regions(substream->stream, + q6asm_unmap_memory_regions(substream->stream, prtd->audio_client); + } q6asm_audio_client_free(prtd->audio_client); prtd->audio_client = NULL; } @@ -670,7 +670,7 @@ static int q6asm_dai_compr_free(struct snd_soc_component *component, struct snd_soc_pcm_runtime *rtd = stream->private_data; if (prtd->audio_client) { - if (prtd->state) { + if (prtd->state == Q6ASM_STREAM_RUNNING) { q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); if (prtd->next_track_stream_id) { @@ -678,11 +678,11 @@ static int q6asm_dai_compr_free(struct snd_soc_component *component, prtd->next_track_stream_id, CMD_CLOSE); } - } - snd_dma_free_pages(&prtd->dma_buffer); - q6asm_unmap_memory_regions(stream->direction, + q6asm_unmap_memory_regions(stream->direction, prtd->audio_client); + } + snd_dma_free_pages(&prtd->dma_buffer); q6asm_audio_client_free(prtd->audio_client); prtd->audio_client = NULL; } -- cgit v1.2.3 From 4b4db09f283df65d780bc7cee66cb4a7e9bf4770 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 18 May 2026 09:23:45 +0000 Subject: ASoC: qcom: q6asm-dai: fix error handling in prepare and set_params Fix error handling in q6asm_dai_compr_set_params() and q6asm_dai_prepare() for both CMD_CLOSE and q6asm_unmap_memory_regions(). In both the functions, we are doing q6asm_audio_client_free in failure cases, which means if prepare or set_params fail, we can never recover. Now open and close are done in respective dai_open/close functions. Fixes: 2a9e92d371db ("ASoC: qdsp6: q6asm: Add q6asm dai driver") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260518092347.3446946-4-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index ef86b5b9a951..fd691004a657 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -226,9 +226,19 @@ static int q6asm_dai_prepare(struct snd_soc_component *component, /* rate and channels are sent to audio driver */ if (prtd->state == Q6ASM_STREAM_RUNNING) { /* clear the previous setup if any */ - q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); - q6asm_unmap_memory_regions(substream->stream, - prtd->audio_client); + ret = q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); + if (ret < 0) { + dev_err(dev, "Failed to close q6asm stream %d\n", prtd->stream_id); + return ret; + } + + ret = q6asm_unmap_memory_regions(substream->stream, prtd->audio_client); + if (ret < 0) { + dev_err(dev, "Failed to unmap memory regions for q6asm stream %d\n", + prtd->stream_id); + return ret; + } + q6routing_stream_close(soc_prtd->dai_link->id, substream->stream); prtd->state = Q6ASM_STREAM_STOPPED; @@ -296,8 +306,6 @@ routing_err: q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); open_err: q6asm_unmap_memory_regions(substream->stream, prtd->audio_client); - q6asm_audio_client_free(prtd->audio_client); - prtd->audio_client = NULL; return ret; } @@ -912,7 +920,7 @@ static int q6asm_dai_compr_set_params(struct snd_soc_component *component, prtd->session_id, dir); if (ret) { dev_err(dev, "Stream reg failed ret:%d\n", ret); - goto q6_err; + goto routing_err; } ret = __q6asm_dai_compr_set_codec_params(component, stream, @@ -938,11 +946,11 @@ static int q6asm_dai_compr_set_params(struct snd_soc_component *component, return 0; q6_err: + q6routing_stream_close(rtd->dai_link->id, dir); +routing_err: q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); open_err: - q6asm_audio_client_free(prtd->audio_client); - prtd->audio_client = NULL; return ret; } -- cgit v1.2.3 From c92d880cde739a3fb6346f42fc5feb2f093c063c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 18 May 2026 09:23:46 +0000 Subject: ASoC: qcom: q6asm-dai: remove unnecessary braces The ASM_CLIENT_EVENT_DATA_WRITE_DONE case does not declare any local variables or require a separate scope, so drop the unnecessary braces. Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260518092347.3446946-5-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index fd691004a657..5774c2611197 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -187,10 +187,9 @@ static void event_handler(uint32_t opcode, uint32_t token, break; case ASM_CLIENT_EVENT_CMD_EOS_DONE: break; - case ASM_CLIENT_EVENT_DATA_WRITE_DONE: { + case ASM_CLIENT_EVENT_DATA_WRITE_DONE: snd_pcm_period_elapsed(substream); break; - } case ASM_CLIENT_EVENT_DATA_READ_DONE: snd_pcm_period_elapsed(substream); if (prtd->state == Q6ASM_STREAM_RUNNING) -- cgit v1.2.3 From 909595c288af2304d0902488698e91c8aeee36a3 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 18 May 2026 09:23:47 +0000 Subject: ASoC: qcom: q6asm-dai: use pointer type with kzalloc_obj() Use kzalloc_obj(*prtd) instead of explicitly naming the structure type. Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20260518092347.3446946-6-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm-dai.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 5774c2611197..4f09fdd40905 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -383,7 +383,7 @@ static int q6asm_dai_open(struct snd_soc_component *component, return -EINVAL; } - prtd = kzalloc_obj(struct q6asm_dai_rtd); + prtd = kzalloc_obj(*prtd); if (prtd == NULL) return -ENOMEM; -- cgit v1.2.3 From 974820a59efde7c1a7e1260bcfe9bb81f833cc9f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 25 May 2026 14:48:58 +0200 Subject: hpfs: fix a crash if hpfs_map_dnode_bitmap fails If hpfs_map_dnode_bitmap fails, the code would call hpfs_brelse4 on uninitialized quad buffer head, causing a crash. Signed-off-by: Mikulas Patocka Reported-by: Farhad Alemi Cc: stable@vger.kernel.org --- fs/hpfs/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 66617b1557c6..f5150372618e 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -372,8 +372,8 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) return 0; } } + hpfs_brelse4(&qbh); } - hpfs_brelse4(&qbh); i = 0; if (hpfs_sb(s)->sb_c_bitmap != -1) { bmp = hpfs_map_bitmap(s, b, &qbh, "chkdn1"); -- cgit v1.2.3 From 86f1d0f063e423a5c1982db1e5e7a8eac511e603 Mon Sep 17 00:00:00 2001 From: Prathamesh Deshpande Date: Wed, 6 May 2026 01:00:31 +0100 Subject: net/mlx5: HWS: Reject unsupported remove-header action mlx5_cmd_hws_packet_reformat_alloc() handles MLX5_REFORMAT_TYPE_REMOVE_HDR by looking up a matching HWS remove-header action. If mlx5_fs_get_action_remove_header_vlan() returns NULL, the code only logs an error and continues. The function then returns success with a NULL HWS action stored in the packet-reformat object. Return an error when no matching remove-header action is available. Fixes: aecd9d1020e3 ("net/mlx5: fs, add HWS packet reformat API function") Signed-off-by: Prathamesh Deshpande Reviewed-by: Simon Horman Reviewed-by: Yevgeny Kliteynik Acked-by: Tariq Toukan Link: https://patch.msgid.link/20260506000054.51797-1-prathameshdeshpande7@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index aca77853abb8..5a172c572a68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -1320,8 +1320,10 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, break; case MLX5_REFORMAT_TYPE_REMOVE_HDR: hws_action = mlx5_fs_get_action_remove_header_vlan(fs_ctx, params); - if (!hws_action) + if (!hws_action) { mlx5_core_err(dev, "Only vlan remove header supported\n"); + return -EOPNOTSUPP; + } break; default: mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", -- cgit v1.2.3 From 7c2eee9c136734825ff524dd8b2146438a4f8250 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Wed, 13 May 2026 13:51:22 +0300 Subject: memblock: don't touch memblock arrays when memblock_free() is called late When memblock_free() is called after memblock_discard() on architectures that don't select ARCH_KEEP_MEMBLOCK, it tries to update memblock.reserved that was already discarded and it causes use-after-free, for example [ 8.514775] BUG: KASAN: use-after-free in memblock_isolate_range+0x4ac/0x650 [ 8.514775] Read of size 8 at addr ffff88a07fe6a000 by task swapper/0/1 [ 8.514775] Call Trace: [ 8.514775] [ 8.514775] kasan_report+0xb2/0x1b0 [ 8.514775] memblock_isolate_range+0x4ac/0x650 [ 8.514775] memblock_phys_free+0xc4/0x190 [ 8.514775] housekeeping_late_init+0x257/0x280 [ 8.514775] do_one_initcall+0xaa/0x470 [ 8.514775] do_initcalls+0x1b4/0x1f0 [ 8.514775] kernel_init_freeable+0x4b5/0x550 [ 8.514775] kernel_init+0x1c/0x150 [ 8.514775] ret_from_fork+0x5dc/0x8e0 [ 8.514775] ret_from_fork_asm+0x1a/0x30 [ 8.514775] Make sure memblock_free() updates memblock.reserved only when called early enough or when ARCH_KEEP_MEMBLOCK is enabled. Reported-by: Waiman Long Reported-by: Breno Leitao Closes: https://lore.kernel.org/all/20260505051821.1107133-1-longman@redhat.com Tested-by: Waiman Long Tested-by: Breno Leitao Fixes: 87ce9e83ab8b ("memblock, treewide: make memblock_free() handle late freeing") Link: https://patch.msgid.link/20260513105122.502506-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index a6a1c91e276d..ccd43f3abb82 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -989,13 +989,15 @@ void __init_memblock memblock_free(void *ptr, size_t size) int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; - int ret; + int ret = 0; memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, &base, &end, (void *)_RET_IP_); kmemleak_free_part_phys(base, size); - ret = memblock_remove_range(&memblock.reserved, base, size); + + if (!slab_is_available() || IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) + ret = memblock_remove_range(&memblock.reserved, base, size); if (slab_is_available()) __free_reserved_area(base, base + size, -1); -- cgit v1.2.3 From f7b52afe3592eae66e160586b45a3f2242972c63 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Fri, 22 May 2026 17:42:26 +0800 Subject: ipv6: exthdrs: refresh nh after handling HAO option ip6_parse_tlv() caches skb_network_header(skb) in nh while walking IPv6 TLVs. ipv6_dest_hao() may call pskb_expand_head() for a cloned skb, which can move the skb head and invalidate the cached network header pointer. Refresh nh after ipv6_dest_hao() returns so any trailing padding or TLVs are parsed from the current skb head. This matches the existing pattern used in ip6_parse_tlv() after helpers that can modify skb header storage. Fixes: a831f5bbc89a ("[IPV6] MIP6: Add inbound interface of home address option.") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Xin Liu Co-developed-by: Luxing Yin Signed-off-by: Luxing Yin Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Reviewed-by: Justin Iurman Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/7aba1debc2196189172499e5769802b026f8caf8.1779247873.git.zcliangcn@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index cf90f933ca1a..6d92c02d0e3d 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -201,6 +201,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_HAO: if (!ipv6_dest_hao(skb, off)) return false; + + nh = skb_network_header(skb); break; #endif default: -- cgit v1.2.3 From d47548a36639095939f4747d4c43f2271366f565 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Fri, 22 May 2026 13:20:13 +0200 Subject: ipv6: exthdrs: refresh nh pointer after ipv6_hop_jumbo() ipv6_hop_jumbo() calls pskb_trim_rcsum(), which can change skb pointers. Let's recompute nh pointer to make sure any change won't mess things up. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Justin Iurman Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260522112013.12342-1-justin.iurman@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6d92c02d0e3d..aca2a2abd2df 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -184,6 +184,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_CALIPSO: if (!ipv6_hop_calipso(skb, off)) -- cgit v1.2.3 From e68842b3356471ba56c882209f324613dac47f64 Mon Sep 17 00:00:00 2001 From: Junrui Luo Date: Wed, 20 May 2026 11:47:55 +0800 Subject: macsec: fix replay protection at XPN lower-PN wrap In macsec_post_decrypt(), when pn is U32_MAX, pn + 1 overflows u32 to 0 and the first branch never fires. If next_pn_halves.lower is also in the upper half, pn_same_half(pn, lower) is true and the XPN else-if does not fire either, leaving next_pn_halves unchanged. An attacker that captures the legitimate frame carrying pn == 0xFFFFFFFF on an XPN association can then replay it indefinitely, since lowest_pn never rises above the captured pn and macsec_decrypt() reconstructs the same IV. Extend the XPN else-if to also fire when pn + 1 wraps to 0, so receipt of pn == U32_MAX advances next_pn_halves to (upper + 1, 0). Fixes: a21ecf0e0338 ("macsec: Support XPN frame handling - IEEE 802.1AEbw") Reported-by: Yuhao Jiang Cc: stable@vger.kernel.org Signed-off-by: Junrui Luo Link: https://patch.msgid.link/SYBPR01MB78813FD49E58F253B989F197AF012@SYBPR01MB7881.ausprd01.prod.outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f904f4d16b45..fb009120a924 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -808,7 +808,8 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u if (pn + 1 > rx_sa->next_pn_halves.lower) { rx_sa->next_pn_halves.lower = pn + 1; } else if (secy->xpn && - !pn_same_half(pn, rx_sa->next_pn_halves.lower)) { + (pn + 1 == 0 || + !pn_same_half(pn, rx_sa->next_pn_halves.lower))) { rx_sa->next_pn_halves.upper++; rx_sa->next_pn_halves.lower = pn + 1; } -- cgit v1.2.3 From 2156a29aecfffa2eb7c558255690084efbe9f3b0 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Wed, 20 May 2026 11:41:57 -0400 Subject: octeontx2-af: validate body pcifunc in rvu_mbox_handler_rep_event_notify rvu_mbox_handler_rep_event_notify() in drivers/net/ethernet/marvell/ octeontx2/af/rvu_rep.c queues a sender-controlled REP_EVENT_NOTIFY request body verbatim, and rvu_rep_up_notify() then forwards event->pcifunc (the nested body field, distinct from the AF-normalised header pcifunc) into rvu_get_pfvf(), rvu_get_pf() and the AF->PF mailbox device index without any bounds check. A VF attached to a PF that has been put into switchdev representor mode reaches this path: the VF mailbox handler otx2_pfvf_mbox_handler() forwards every message id including MBOX_MSG_REP_EVENT_NOTIFY to AF without an allowlist, and the AF dispatcher rewrites only msg->pcifunc, leaving struct rep_event::pcifunc attacker-controlled. The sibling rvu_mbox_handler_esw_cfg() refuses requests whose header pcifunc is not rvu->rep_pcifunc; this handler has no equivalent gate. An out-of-range body pcifunc selects an &rvu->pf[]/&rvu->hwvf[] element past the allocated array and, for RVU_EVENT_MAC_ADDR_CHANGE, turns into a six-byte attacker-chosen OOB ether_addr_copy() target inside the queued worker; KASAN reports a slab-out-of-bounds write in rvu_rep_wq_handler. Reject malformed requests at the handler entry by gating on is_pf_func_valid(), which is already the canonical PF/VF range check in this driver; expose it via rvu.h so callers in rvu_rep.c can use it instead of open-coding the same range arithmetic. Fixes: b8fea84a0468 ("octeontx2-pf: Add support to sync link state between representor and VFs") Cc: stable@vger.kernel.org Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260520154157.1439319-1-michael.bommarito@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index e40b79076358..3cf131508ecf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -436,7 +436,7 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc) return &rvu->pf[rvu_get_pf(rvu->pdev, pcifunc)]; } -static bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc) +bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc) { int pf, vf, nvfs; u64 cfg; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index a466181cf908..de3fbd3d15d6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -917,6 +917,7 @@ u16 rvu_get_rsrc_mapcount(struct rvu_pfvf *pfvf, int blkaddr); struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc); void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf); bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr); +bool is_pf_func_valid(struct rvu *rvu, u16 pcifunc); bool is_pffunc_map_valid(struct rvu *rvu, u16 pcifunc, int blktype); int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot); int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c index 901f6fd40fd4..a2781e0f504e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c @@ -97,6 +97,14 @@ int rvu_mbox_handler_rep_event_notify(struct rvu *rvu, struct rep_event *req, { struct rep_evtq_ent *qentry; + /* The mailbox dispatcher normalises only the header pcifunc; the + * nested struct rep_event::pcifunc body field is sender-controlled + * and is later used by rvu_rep_up_notify() to index rvu->pf[] / + * rvu->hwvf[]. Reject out-of-range body selectors before queueing. + */ + if (!is_pf_func_valid(rvu, req->pcifunc)) + return -EINVAL; + qentry = kmalloc_obj(*qentry, GFP_ATOMIC); if (!qentry) return -ENOMEM; -- cgit v1.2.3 From f229426072fc865654a60978bb7fda790a051ff3 Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Sat, 23 May 2026 15:03:30 +0200 Subject: net: hsr: fix potential OOB access in supervision frame handling Ensure the entire TLV header is linearized before access by adding sizeof(struct hsr_sup_tlv) to the pskb_may_pull() calls. Without this, a truncated frame could cause an out-of-bounds access. Fixes: eafaa88b3eb7 ("net: hsr: Add support for redbox supervision frames") Signed-off-by: Luka Gejak Reviewed-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260523130330.61880-1-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 0aca859c88cb..f669a226d728 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -84,7 +84,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* Get next tlv */ total_length += hsr_sup_tag->tlv.HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_tlv))) return false; skb_pull(skb, total_length); hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; @@ -100,7 +100,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* make sure another tlv follows */ total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_tlv))) return false; /* get next tlv */ -- cgit v1.2.3 From 80501dff814eeccebf44a59340c3fe3a205eb120 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 20 May 2026 13:25:07 -0700 Subject: Documentation/arch/x86: Hide clearcpuid= This option was never meant to be used in production because it solely clears the X86_FEATURE kernel-internal representation of what CPUID bits it has detected and doesn't do any *proper* feature disablement like clearing CR4.CET in the user shadow stack case, for example. So remove its documentation so that it doesn't get used in production and people get silly ideas. It is meant strictly for debugging; and if a chicken bit for properly disabling a feature is warranted, then that would need proper enablement. No functional changes. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Mathias Krause Cc: Linus Torvalds Link: https://patch.msgid.link/20260520202508.160112-1-bp@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 18 ------------------ Documentation/arch/x86/cpuinfo.rst | 4 ++++ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4d0f545fb3ec..97007f4f69d4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -789,24 +789,6 @@ Kernel parameters cio_ignore= [S390] See Documentation/arch/s390/common_io.rst for details. - clearcpuid=X[,X...] [X86] - Disable CPUID feature X for the kernel. See - arch/x86/include/asm/cpufeatures.h for the valid bit - numbers X. Note the Linux-specific bits are not necessarily - stable over kernel options, but the vendor-specific - ones should be. - X can also be a string as appearing in the flags: line - in /proc/cpuinfo which does not have the above - instability issue. However, not all features have names - in /proc/cpuinfo. - Note that using this option will taint your kernel. - Also note that user programs calling CPUID directly - or using the feature without checking anything - will still see it. This just prevents it from - being used by the kernel or shown in /proc/cpuinfo. - Also note the kernel might malfunction if you disable - some critical bits. - clk_ignore_unused [CLK] Prevents the clock framework from automatically gating diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst index 9f2e47c4b1c8..17fce95367e6 100644 --- a/Documentation/arch/x86/cpuinfo.rst +++ b/Documentation/arch/x86/cpuinfo.rst @@ -187,6 +187,10 @@ to disable features using the feature number as defined in Protection can be disabled using clearcpuid=514. The number 514 is calculated from #define X86_FEATURE_UMIP (16*32 + 2). +DO NOT USE this cmdline option in production - it is meant to be used only as +a quick'n'dirty debugging aid to rule out a feature-enabling code is the +culprit. If you use it, it'll taint the kernel. + In addition, there exists a variety of custom command-line parameters that disable specific features. The list of parameters includes, but is not limited to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using -- cgit v1.2.3 From 20587302f8d700f26ee2c8a60ffb0a69ae0edf16 Mon Sep 17 00:00:00 2001 From: Zhang Heng Date: Tue, 26 May 2026 09:36:11 +0800 Subject: ALSA: hda/realtek: Fix speaker output on ASUS ROG Strix G615LP Add quirk for ALC294 codec on ASUS ROG Strix G615LP (SSID 1043:1214) using ALC287_FIXUP_TXNW2781_I2C_ASUS to fix speaker output. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221173 Cc: Signed-off-by: Zhang Heng Link: https://patch.msgid.link/20260526013611.1954949-1-zhangheng@kylinos.cn Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index b4c9a5584a04..212adce0f8e6 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7324,6 +7324,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), HDA_CODEC_QUIRK(0x1043, 0x1204, "ASUS Strix G16 G615JMR", ALC287_FIXUP_TXNW2781_I2C_ASUS), SND_PCI_QUIRK(0x1043, 0x1204, "ASUS Strix G615JHR_JMR_JPR", ALC287_FIXUP_TAS2781_I2C), + HDA_CODEC_QUIRK(0x1043, 0x1214, "ASUS ROG Strix G615LP", ALC287_FIXUP_TXNW2781_I2C_ASUS), SND_PCI_QUIRK(0x1043, 0x1214, "ASUS Strix G615LH_LM_LP", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1271, "ASUS X430UN", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), -- cgit v1.2.3 From 44e151be23deb788d9f6124de93823faf6e04e99 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 May 2026 10:14:42 +0300 Subject: accel/ivpu: prevent uninitialized data bug in debugfs The simple_write_to_buffer() will only initialize data starting from the *pos offset so if it's non-zero then the first part of the buffer uninitialized. Really, if *pos is non-zero then this code won't work so just check for that at the start of the function. Fixes: 320323d2e545 ("accel/ivpu: Add debugfs interface for setting HWS priority bands") Signed-off-by: Dan Carpenter Reviewed-by: Karol Wachowski Signed-off-by: Karol Wachowski Link: https://patch.msgid.link/ahP24m6Mii9EDL7Q@stanley.mountain --- drivers/accel/ivpu/ivpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 189dbe94cf14..dc20bc73c6ed 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -450,7 +450,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t u32 band; int ret; - if (size >= sizeof(buf)) + if (*pos != 0 || size >= sizeof(buf)) return -EINVAL; ret = simple_write_to_buffer(buf, sizeof(buf) - 1, pos, user_buf, size); -- cgit v1.2.3 From d64b0372760e09de6d18a0616d7bc652c8c6891d Mon Sep 17 00:00:00 2001 From: George Guo Date: Sat, 9 May 2026 10:44:15 +0800 Subject: kho: fix KHO_TREE_MAX_DEPTH for non-4KB page sizes KHO_TREE_MAX_DEPTH is calculated as: DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2, KHO_TABLE_SIZE_LOG2) + 1 For systems with 16KB pages (e.g. arm64 with CONFIG_ARM64_16K_PAGES=y or LoongArch), this gives a depth of 4. Since levels are 0 based, with depth = 4 the effective top level is 3 and the top-level shift at bit 39. PAGE_SHIFT = 14 KHO_BITMAP_SIZE_LOG2 = PAGE_SHIFT + 3 = 17 KHO_TABLE_SIZE_LOG2 = log(2; (1 << PAGE_SHIFT) / 8) = 11 shift = ((3 - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2 = 39 The order-0 bit sits at bit 50 (KHO_ORDER_0_LOG2 = 64 - PAGE_SHIFT = 50). When inserting or reading a key, the index extracted at the top level is: (1 << 50) >> 39 = 2048 2048 is exactly the table size (PAGE_SIZE / sizeof(phys_addr_t) = 2048 for 16KB pages), so it wraps to 0, aliasing the order bit to index 0 and losing it silently. On the second kernel, kho_radix_decode_key() sees a key without the order bit, calls fls64() on the wrong bit, computes a wrong order and thus a garbage physical address. phys_to_page() of that address faults in kho_preserved_memory_reserve(), causing a kernel panic early in boot. Fix by adding +1 to the DIV_ROUND_UP numerator so the formula accounts for the order bit itself, giving depth 5 for 16KB pages. The top-level shift becomes 50, and (1 << 50) >> 50 = 1, which is nonzero and unambiguous. For 4KB and 64KB page sizes the depth is unchanged. Link: https://patch.msgid.link/20260509024415.33190-1-dongtai.guo@linux.dev Fixes: 3f2ad90060f6 ("kho: adopt radix tree for preserved memory tracking") Tested-by: Kexin Liu Signed-off-by: George Guo Reviewed-by: Pasha Tatashin [rppt: added actual math to the changelog] Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/kho/abi/kexec_handover.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kho/abi/kexec_handover.h b/include/linux/kho/abi/kexec_handover.h index 7e847a2339b0..db9bda6dd310 100644 --- a/include/linux/kho/abi/kexec_handover.h +++ b/include/linux/kho/abi/kexec_handover.h @@ -274,7 +274,7 @@ enum kho_radix_consts { * and 1 bitmap level. */ KHO_TREE_MAX_DEPTH = - DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2, + DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2 + 1, KHO_TABLE_SIZE_LOG2) + 1, }; -- cgit v1.2.3 From 8fd2f26fa2a33cfe8ac043f976137ecf5b567f03 Mon Sep 17 00:00:00 2001 From: "Pratyush Yadav (Google)" Date: Tue, 19 May 2026 15:33:30 +0200 Subject: kho: fix order calculation for kho_unpreserve_pages() Commit 91e74fa8b1bc ("kho: make sure preservations do not span multiple NUMA nodes") made sure preservations from kho_preserve_pages() do not span multiple NUMA nodes. If they do, the order is reduced and tried again. The same logic was not implemented for kho_unpreserve_pages(). This can result in unpreserve calculating a different order than preserve, and thus not actually unpreserving the pages. Fix this by moving the order calculation logic to __kho_preserve_pages_order() and use it from both preserve and unpreserve paths. Move __kho_unpreserve() down to avoid having a forward declaration. Its users are further down in the file anyway. Also, it results in grouping for all the page-level preservation and unpreservation functions. This unfortunately makes the diff hard to read, but the main change in __kho_unpreserve() is to call __kho_preserve_pages_order() instead of open-coding the order calculation. Fixes: 91e74fa8b1bc ("kho: make sure preservations do not span multiple NUMA nodes") Cc: stable@vger.kernel.org Signed-off-by: Pratyush Yadav (Google) Reviewed-by: Samiullah Khawaja Reviewed-by: Pasha Tatashin Link: https://patch.msgid.link/20260519133332.2498092-1-pratyush@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- kernel/liveupdate/kexec_handover.c | 56 ++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index 2592f7ca16e2..1b592d86dc48 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -357,20 +357,6 @@ int kho_radix_walk_tree(struct kho_radix_tree *tree, } EXPORT_SYMBOL_GPL(kho_radix_walk_tree); -static void __kho_unpreserve(struct kho_radix_tree *tree, - unsigned long pfn, unsigned long end_pfn) -{ - unsigned int order; - - while (pfn < end_pfn) { - order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); - - kho_radix_del_page(tree, pfn, order); - - pfn += 1 << order; - } -} - /* For physically contiguous 0-order pages. */ static void kho_init_pages(struct page *page, unsigned long nr_pages) { @@ -860,6 +846,37 @@ void kho_unpreserve_folio(struct folio *folio) } EXPORT_SYMBOL_GPL(kho_unpreserve_folio); +static unsigned int __kho_preserve_pages_order(unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned int order = min(count_trailing_zeros(start_pfn), + ilog2(end_pfn - start_pfn)); + + /* + * Make sure all the pages in a single preservation are in the same NUMA + * node. The restore machinery can not cope with a preservation spanning + * multiple NUMA nodes. + */ + while (pfn_to_nid(start_pfn) != pfn_to_nid(start_pfn + (1UL << order) - 1)) + order--; + + return order; +} + +static void __kho_unpreserve(struct kho_radix_tree *tree, + unsigned long pfn, unsigned long end_pfn) +{ + unsigned int order; + + while (pfn < end_pfn) { + order = __kho_preserve_pages_order(pfn, end_pfn); + + kho_radix_del_page(tree, pfn, order); + + pfn += 1 << order; + } +} + /** * kho_preserve_pages - preserve contiguous pages across kexec * @page: first page in the list. @@ -885,16 +902,7 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages) } while (pfn < end_pfn) { - unsigned int order = - min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); - - /* - * Make sure all the pages in a single preservation are in the - * same NUMA node. The restore machinery can not cope with a - * preservation spanning multiple NUMA nodes. - */ - while (pfn_to_nid(pfn) != pfn_to_nid(pfn + (1UL << order) - 1)) - order--; + unsigned int order = __kho_preserve_pages_order(pfn, end_pfn); err = kho_radix_add_page(tree, pfn, order); if (err) { -- cgit v1.2.3 From dac917ed5aead741004db8d0d5151dd577802df8 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 26 May 2026 08:35:01 +0200 Subject: gpio: mxc: fix irq_high handling If port->irq_high is -1 (fsl,imx21-gpio compatible) and gpio_idx is >= 16 enable_irq_wake() is called with -1 which is wrong. Fixes: 5f6d1998adeb ("gpio: mxc: release the parent IRQ in runtime suspend") Signed-off-by: Alexander Stein Reviewed-by: Frank Li Link: https://patch.msgid.link/20260526063504.25916-1-alexander.stein@ew.tq-group.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mxc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 647b6f4861b7..12f11a6c9665 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -469,7 +469,7 @@ static int mxc_gpio_probe(struct platform_device *pdev) * the handler is needed only once, but doing it for every port * is more robust and easier. */ - port->irq_high = -1; + port->irq_high = 0; port->mx_irq_handler = mx2_gpio_irq_handler; } else port->mx_irq_handler = mx3_gpio_irq_handler; -- cgit v1.2.3 From 5c4063c87a619e4df954c179d24628636f5db15f Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 8 May 2026 14:23:51 +0200 Subject: drm/i915: Fix potential UAF in TTM object purge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TLDR: The bo->ttm object might be changed by calling ttm_bo_validate(), move casting it to an i915_tt object later to actually get the right pointer. A user reported hitting the following bug under heavy use on DG2: [26620.095550] Oops: general protection fault, probably for non-canonical address 0xa56b6b6b6b6b6b8b: 0000 1 SMP NOPTI [26620.095556] CPU: 2 UID: 0 PID: 631 Comm: Xorg Not tainted 6.18.8 #1 PREEMPT(lazy) [26620.095558] Hardware name: ASRock B850M Steel Legend WiFi/B850M Steel Legend WiFi, BIOS 3.50 09/18/2025 [26620.095559] RIP: 0010:i915_ttm_purge+0x84/0x100 [i915] [26620.095604] Code: 00 00 00 48 8d 54 24 10 48 89 e6 48 89 fb e8 83 aa ae ff 85 c0 75 6f 48 83 bb a8 01 00 00 00 74 2c 48 8b 45 78 48 85 c0 74 23 <48> 8b 78 20 48 c7 c2 ff ff ff ff 31 f6 e8 7a 73 e3 e0 48 8b 7d 78 [26620.095605] RSP: 0018:ffffc90005fd7430 EFLAGS: 00010282 [26620.095607] RAX: a56b6b6b6b6b6b6b RBX: ffff8881f46c3dc0 RCX: 0000000000000000 [26620.095608] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 00000000ffffffff [26620.095609] RBP: ffff888289610f00 R08: 0000000000000001 R09: ffff88823b022000 [26620.095609] R10: ffff888103029b28 R11: ffff8881fc7f3800 R12: ffff88810b6150d0 [26620.095609] R13: ffff888289610f00 R14: 0000000000000000 R15: ffff8881f46c3dc0 [26620.095610] FS: 00007f1004d86900(0000) GS:ffff88901c858000(0000) knlGS:0000000000000000 [26620.095611] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [26620.095611] CR2: 00007f0fdf489000 CR3: 000000035b0c1000 CR4: 0000000000750ef0 [26620.095612] PKRU: 55555554 [26620.095612] Call Trace: [26620.095615] [26620.095615] i915_ttm_move+0x2b9/0x420 [i915] [26620.095642] ? ttm_tt_init+0x65/0x80 [ttm] [26620.095644] ? i915_ttm_tt_create+0xc6/0x150 [i915] [26620.095667] ttm_bo_handle_move_mem+0xb6/0x160 [ttm] [26620.095669] ttm_bo_evict+0x100/0x150 [ttm] [26620.095671] ? preempt_count_add+0x64/0xa0 [26620.095673] ? _raw_spin_lock+0xe/0x30 [26620.095675] ? _raw_spin_unlock+0xd/0x30 [26620.095675] ? i915_gem_object_evictable+0xb7/0xd0 [i915] [26620.095704] ttm_bo_evict_cb+0x6e/0xd0 [ttm] [26620.095705] ttm_lru_walk_for_evict+0xa6/0x200 [ttm] [26620.095708] ttm_bo_alloc_resource+0x185/0x4f0 [ttm] [26620.095709] ? init_object+0x62/0xd0 [26620.095712] ttm_bo_validate+0x7a/0x180 [ttm] [26620.095713] ? _raw_spin_unlock_irqrestore+0x16/0x30 [26620.095714] __i915_ttm_get_pages+0xb0/0x170 [i915] [26620.095737] i915_ttm_get_pages+0x9f/0x150 [i915] [26620.095759] ? i915_gem_do_execbuffer+0xedc/0x2b40 [i915] [26620.095786] ? alloc_debug_processing+0xd0/0x100 [26620.095787] ? _raw_spin_unlock_irqrestore+0x16/0x30 [26620.095788] ? i915_vma_instance+0xa0/0x4e0 [i915] [26620.095822] __i915_gem_object_get_pages+0x2f/0x40 [i915] [26620.095848] i915_vma_pin_ww+0x706/0x980 [i915] [26620.095875] ? i915_gem_do_execbuffer+0xedc/0x2b40 [i915] [26620.095904] eb_validate_vmas+0x170/0xa00 [i915] [26620.095930] i915_gem_do_execbuffer+0x1201/0x2b40 [i915] [26620.095953] ? alloc_debug_processing+0xd0/0x100 [26620.095954] ? _raw_spin_unlock_irqrestore+0x16/0x30 [26620.095955] ? i915_gem_execbuffer2_ioctl+0xc9/0x240 [i915] [26620.095977] ? __wake_up_sync_key+0x32/0x50 [26620.095979] ? i915_gem_execbuffer2_ioctl+0xc9/0x240 [i915] [26620.096001] ? __slab_alloc.isra.0+0x67/0xc0 [26620.096003] i915_gem_execbuffer2_ioctl+0x11a/0x240 [i915] Results from decode_stacktrace.sh pointed to dereference of a file pointer field of a i915 TTM page vector container associated with an object being purged on eviction. That path is taken when the object is marked as no longer needed. Code analysis revealed a possibility of the i915 TTM page vector container being replaced with a new instance inside a function that purges content of the object, should it be still busy. That function is called, indirectly via a more general function that changes the object's placement and caching policy, before the problematic dereference, but still after a pointer to the container is captured, rendering the pointer no longer valid. Fix the issue by capturing the pointer to the container only after its potential replacement. v2: Move the container_of() inside the if block (Sebastian), - a simplified version of the commit description that explains briefly why the change is necessary (Christian). Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/work_items/14882 Fixes: 7ae034590ceae ("drm/i915/ttm: add tt shmem backend") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org # v5.17+ Cc: Matthew Auld Cc: Thomas Hellström Cc: Sebastian Brzezinka Cc: Christian König Reviewed-by: Andi Shyti Reviewed-by: Christian König Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260508122612.469227-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 4462966a93eb185849b7f174f0d0de53476d00a4) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index de70517b4ef2..df3fcc2b1248 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -419,8 +419,6 @@ void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj) int i915_ttm_purge(struct drm_i915_gem_object *obj) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); - struct i915_ttm_tt *i915_tt = - container_of(bo->ttm, typeof(*i915_tt), ttm); struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, @@ -435,16 +433,22 @@ int i915_ttm_purge(struct drm_i915_gem_object *obj) if (ret) return ret; - if (bo->ttm && i915_tt->filp) { - /* - * The below fput(which eventually calls shmem_truncate) might - * be delayed by worker, so when directly called to purge the - * pages(like by the shrinker) we should try to be more - * aggressive and release the pages immediately. - */ - shmem_truncate_range(file_inode(i915_tt->filp), - 0, (loff_t)-1); - fput(fetch_and_zero(&i915_tt->filp)); + if (bo->ttm) { + struct i915_ttm_tt *i915_tt = + container_of(bo->ttm, typeof(*i915_tt), ttm); + + if (i915_tt->filp) { + /* + * The below fput(which eventually calls shmem_truncate) + * might be delayed by worker, so when directly called + * to purge the pages(like by the shrinker) we should + * try to be more aggressive and release the pages + * immediately. + */ + shmem_truncate_range(file_inode(i915_tt->filp), + 0, (loff_t)-1); + fput(fetch_and_zero(&i915_tt->filp)); + } } obj->write_domain = 0; -- cgit v1.2.3 From 202e77cf2e839e1adc804433322dc5c9ee511c9f Mon Sep 17 00:00:00 2001 From: Michał Grzelak Date: Thu, 16 Apr 2026 18:37:44 +0200 Subject: drm/i915/aux: use polling when irqs are unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PTL with physically disconnected display was observed to have 40s longer execution time when testing xe_fault_injection@xe_guc_mmio_send_recv. The issue has not been seen when reverting commit 40a9f77a28fa ("Revert "drm/i915/dp: change aux_ctl reg read to polling read""). Apparently the configuration suffers from not having AUX enabled when using interrupts. One probable cause can be xe enabling interrupts too late: interrupts need memory allocations which currently can't be done before the display FB takeover is done. As for now, use polling for AUX in case interrupts are unavailable. Fixes: 40a9f77a28fa ("Revert "drm/i915/dp: change aux_ctl reg read to polling read"") Suggested-by: Ville Syrjälä Signed-off-by: Michał Grzelak Signed-off-by: Ville Syrjälä Link: https://patch.msgid.link/20260416163744.288107-1-michal.grzelak@intel.com (cherry picked from commit 05e0550b65cd1604bd515fbc65f522bce4c10a87) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_dp_aux.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index b20ec3e589fa..9c9b6410366d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -12,6 +12,7 @@ #include "intel_dp.h" #include "intel_dp_aux.h" #include "intel_dp_aux_regs.h" +#include "intel_parent.h" #include "intel_pps.h" #include "intel_quirks.h" #include "intel_tc.h" @@ -60,18 +61,29 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp); const unsigned int timeout_ms = 10; + bool done = true; u32 status; - bool done; + int ret; + if (intel_parent_irq_enabled(display)) { #define C (((status = intel_de_read_notrace(display, ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) - done = wait_event_timeout(display->gmbus.wait_queue, C, - msecs_to_jiffies_timeout(timeout_ms)); + done = wait_event_timeout(display->gmbus.wait_queue, C, + msecs_to_jiffies_timeout(timeout_ms)); + +#undef C + } else { + ret = intel_de_wait_ms(display, ch_ctl, + DP_AUX_CH_CTL_SEND_BUSY, 0, + timeout_ms, &status); + + if (ret == -ETIMEDOUT) + done = false; + } if (!done) drm_err(display->drm, "%s: did not complete or timeout within %ums (status 0x%08x)\n", intel_dp->aux.name, timeout_ms, status); -#undef C return status; } -- cgit v1.2.3 From d196136a988051173f68f91de0b5a1bd32122dd7 Mon Sep 17 00:00:00 2001 From: Pranay Samala Date: Tue, 19 May 2026 13:23:08 +0530 Subject: drm/i915/color: Fix HDR pre-CSC LUT programming loop The integer lut programming loop never executes completely due to incorrect condition (i++ > 130). Fix to properly program 129th+ entries for values > 1.0. Cc: #v6.19 Fixes: 82caa1c8813f ("drm/i915/color: Program Pre-CSC registers") Signed-off-by: Pranay Samala Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Uma Shankar Signed-off-by: Suraj Kandpal Link: https://patch.msgid.link/20260519075308.383877-1-pranay.samala@intel.com (cherry picked from commit f33862ec3e8849ad7c0a3dd46719083b13ade248) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_color.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index e7950655434b..6d1cffc6d2be 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -3976,7 +3976,7 @@ xelpd_program_plane_pre_csc_lut(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_PRE_CSC_GAMC_DATA_ENH(pipe, plane, 0), (1 << 24)); - } while (i++ > 130); + } while (i++ < 130); } else { for (i = 0; i < lut_size; i++) { u32 v = (i * ((1 << 24) - 1)) / (lut_size - 1); -- cgit v1.2.3 From 8bb9093df555f9e89fdbe1405118b11384c03e04 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 20 May 2026 13:49:43 +0300 Subject: drm/i915/psr: Block DC states on vblank enable when Panel Replay supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are blocking DC states only when Panel Replay is enabled on vblank enable. It may happen that Panel Replay is getting enabled when vblank is already enabled. Fix this by blocking DC states always if Panel Replay is supported. While at it take care of possible dual eDP case by looping all encoders supporting PSR. Fixes: 0c427ac78a1d ("drm/i915/psr: Add interface to notify PSR of vblank enable/disable") Cc: # v6.16+ Signed-off-by: Jouni Högander Reviewed-by: Michał Grzelak Link: https://patch.msgid.link/20260520104944.239797-1-jouni.hogander@intel.com (cherry picked from commit eb5911f990554f7ce947dd53df00c114362e4465) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_psr.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 29904a037575..bd5a8c6ac6ef 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -4151,32 +4151,33 @@ void intel_psr_notify_vblank_enable_disable(struct intel_display *display, bool enable) { struct intel_encoder *encoder; + bool block_dc_states = false; for_each_intel_encoder_with_psr(display->drm, encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); mutex_lock(&intel_dp->psr.lock); - if (intel_dp->psr.panel_replay_enabled) { - mutex_unlock(&intel_dp->psr.lock); - break; - } + if (CAN_PANEL_REPLAY(intel_dp)) + block_dc_states = true; - if (intel_dp->psr.enabled && intel_dp->psr.pkg_c_latency_used) + if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled && + intel_dp->psr.pkg_c_latency_used) intel_psr_apply_underrun_on_idle_wa_locked(intel_dp); mutex_unlock(&intel_dp->psr.lock); - return; } /* * NOTE: intel_display_power_set_target_dc_state is used - * only by PSR * code for DC3CO handling. DC3CO target + * only by PSR code for DC3CO handling. DC3CO target * state is currently disabled in * PSR code. If DC3CO * is taken into use we need take that into account here * as well. */ - intel_display_power_set_target_dc_state(display, enable ? DC_STATE_DISABLE : - DC_STATE_EN_UPTO_DC6); + if (block_dc_states) + intel_display_power_set_target_dc_state(display, enable ? + DC_STATE_DISABLE : + DC_STATE_EN_UPTO_DC6); } static void -- cgit v1.2.3 From 3549a9649dc7c5fc586ab12f675279283cdcb2a7 Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Wed, 20 May 2026 13:49:44 +0300 Subject: drm/i915/psr: Use DC_OFF wake reference to block DC6 on vblank enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are observing following warnings: *ERROR* power well DC_off state mismatch (refcount 0/enabled 1) gen9_dc_off_power_well_enabled is considering target state DC_STATE_DISABLE as DC_OFF power well being enabled. Fix this by using wakeref for the purpose. To achieve this we need to modify notification code as well. Currently it is possible that PSR gets notified vblank enable/disable twice on same status. This is currently not a problem as it is just triggering call to intel_display_power_set_target_dc_state with same target state as a parameter. When using wakeref this becomes a problem due to reference counting. Fix this storing vbank status on last notification and use that to ensure there are no more than one notification with same vblank status. v2: ensure there is no subsequent notifications with same status Fixes: aa451abcffb5 ("drm/i915/display: Prevent DC6 while vblank is enabled for Panel Replay") Cc: # v6.13+ Signed-off-by: Jouni Högander Reviewed-by: Michał Grzelak Link: https://patch.msgid.link/20260520104944.239797-2-jouni.hogander@intel.com (cherry picked from commit 35485ac56d878192a3829a58cb26503125ec7104) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_core.h | 1 + drivers/gpu/drm/i915/display/intel_display_irq.c | 8 ++++++-- drivers/gpu/drm/i915/display/intel_display_types.h | 2 ++ drivers/gpu/drm/i915/display/intel_psr.c | 24 ++++++++-------------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index d9baca2d5aaf..78afcd42f44c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -497,6 +497,7 @@ struct intel_display { u8 vblank_enabled; int vblank_enable_count; + bool vblank_status_last_notified; struct work_struct vblank_notify_work; diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 70c1bba7c0a8..aedf3928a089 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -1773,8 +1773,12 @@ static void intel_display_vblank_notify_work(struct work_struct *work) struct intel_display *display = container_of(work, typeof(*display), irq.vblank_notify_work); int vblank_enable_count = READ_ONCE(display->irq.vblank_enable_count); + bool vblank_status = !!vblank_enable_count; - intel_psr_notify_vblank_enable_disable(display, vblank_enable_count); + if (display->irq.vblank_status_last_notified != vblank_status) { + intel_psr_notify_vblank_enable_disable(display, vblank_status); + display->irq.vblank_status_last_notified = vblank_status; + } } int bdw_enable_vblank(struct drm_crtc *_crtc) @@ -1787,10 +1791,10 @@ int bdw_enable_vblank(struct drm_crtc *_crtc) if (gen11_dsi_configure_te(crtc, true)) return 0; + spin_lock_irqsave(&display->irq.lock, irqflags); if (crtc->vblank_psr_notify && display->irq.vblank_enable_count++ == 0) schedule_work(&display->irq.vblank_notify_work); - spin_lock_irqsave(&display->irq.lock, irqflags); bdw_enable_pipe_irq(display, pipe, GEN8_PIPE_VBLANK); spin_unlock_irqrestore(&display->irq.lock, irqflags); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 9c7c357afb09..2e6a85708555 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1790,6 +1790,8 @@ struct intel_psr { u8 active_non_psr_pipes; const char *no_psr_reason; + + struct ref_tracker *vblank_wakeref; }; struct intel_dp { diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index bd5a8c6ac6ef..598fe769a402 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -4151,14 +4151,20 @@ void intel_psr_notify_vblank_enable_disable(struct intel_display *display, bool enable) { struct intel_encoder *encoder; - bool block_dc_states = false; for_each_intel_encoder_with_psr(display->drm, encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); mutex_lock(&intel_dp->psr.lock); - if (CAN_PANEL_REPLAY(intel_dp)) - block_dc_states = true; + if (CAN_PANEL_REPLAY(intel_dp)) { + if (enable) + intel_dp->psr.vblank_wakeref = + intel_display_power_get(display, + POWER_DOMAIN_DC_OFF); + else + intel_display_power_put(display, POWER_DOMAIN_DC_OFF, + intel_dp->psr.vblank_wakeref); + } if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled && intel_dp->psr.pkg_c_latency_used) @@ -4166,18 +4172,6 @@ void intel_psr_notify_vblank_enable_disable(struct intel_display *display, mutex_unlock(&intel_dp->psr.lock); } - - /* - * NOTE: intel_display_power_set_target_dc_state is used - * only by PSR code for DC3CO handling. DC3CO target - * state is currently disabled in * PSR code. If DC3CO - * is taken into use we need take that into account here - * as well. - */ - if (block_dc_states) - intel_display_power_set_target_dc_state(display, enable ? - DC_STATE_DISABLE : - DC_STATE_EN_UPTO_DC6); } static void -- cgit v1.2.3 From 7f83d174073234839aea176f265e517e0d50a1d2 Mon Sep 17 00:00:00 2001 From: Shaomin Chen Date: Thu, 21 May 2026 02:07:23 +0800 Subject: xfrm: iptfs: reset runtime state when cloning SAs iptfs_clone_state() clones the IPTFS mode data with kmemdup(). This copies runtime objects which must not be shared with the original SA, including the embedded sk_buff_head, hrtimers, spinlock, and in-flight reassembly/reorder state. If xfrm_state_migrate() fails after clone_state() but before the later init_state() call has reinitialized those fields, the cloned state can be destroyed by xfrm_state_gc_task() with list and timer state copied from the original SA. With queued packets this lets the clone splice and free skbs owned by the original IPTFS queue, leading to use-after-free and double-free reports in iptfs_destroy_state() and skb release paths. Reinitialize the clone's runtime state before publishing it through x->mode_data. Because clone_state() now publishes a destroyable mode_data object before init_state(), take the mode callback module reference there. Avoid taking it again from __iptfs_init_state() for the same object. Fixes: 0e4fbf013fa5 ("xfrm: iptfs: add user packet (tunnel ingress) handling") Cc: stable@vger.kernel.org Signed-off-by: Shaomin Chen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 97bc979e55ba..6c6bbc040517 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -2650,7 +2650,8 @@ static void __iptfs_init_state(struct xfrm_state *x, x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr); /* Always keep a module reference when x->mode_data is set */ - __module_get(x->mode_cbs->owner); + if (x->mode_data != xtfs) + __module_get(x->mode_cbs->owner); x->mode_data = xtfs; xtfs->x = x; @@ -2658,22 +2659,39 @@ static void __iptfs_init_state(struct xfrm_state *x, static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) { + struct skb_wseq *w_saved = NULL; struct xfrm_iptfs_data *xtfs; xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL); if (!xtfs) return -ENOMEM; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { - xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, - xtfs->cfg.reorder_win_size); - if (!xtfs->w_saved) { + w_saved = kzalloc_objs(*w_saved, xtfs->cfg.reorder_win_size); + if (!w_saved) { kfree_sensitive(xtfs); return -ENOMEM; } } + xtfs->w_saved = w_saved; + + __skb_queue_head_init(&xtfs->queue); + xtfs->queue_size = 0; + hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC, + IPTFS_HRTIMER_MODE); + + spin_lock_init(&xtfs->drop_lock); + hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC, + IPTFS_HRTIMER_MODE); + xtfs->w_seq_set = false; + xtfs->w_wantseq = 0; + xtfs->w_savedlen = 0; + xtfs->ra_newskb = NULL; + xtfs->ra_wantseq = 0; + xtfs->ra_runtlen = 0; + + __module_get(x->mode_cbs->owner); x->mode_data = xtfs; xtfs->x = x; -- cgit v1.2.3 From 3e52417318473782012b236d0325bf7d2266a597 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Thu, 21 May 2026 03:29:26 -0700 Subject: xfrm: move policy_bydst RCU sync from per-netns .exit to .pre_exit The struct pernet_operations docstring in include/net/net_namespace.h explicitly warns against blocking RCU primitives in .exit handlers: Exit methods using blocking RCU primitives, such as synchronize_rcu(), should be implemented via exit_batch. [...] Please, avoid synchronize_rcu() at all, where it's possible. Note that a combination of pre_exit() and exit() can be used, since a synchronize_rcu() is guaranteed between the calls. xfrm_policy_fini() violates this: it calls synchronize_rcu() before freeing the policy_bydst hash tables (so no RCU reader is mid- traversal at free time), but runs from xfrm_net_ops.exit -- once per namespace -- so a cleanup_net() of N namespaces pays N full RCU grace periods serially. Use the documented pre_exit/exit split. Move the policy flush (and the workqueue drains it depends on) into a new .pre_exit handler; xfrm_policy_fini() then runs in .exit and frees the hash tables after the synchronize_rcu_expedited() that cleanup_net() guarantees between the two phases. Providing O(1) RCU grace periods per batch instead of O(N). Observed on Linux 6.18 with a workload doing unshare(CLONE_NEWNET) at ~13/sec sustained: cleanup_net() and the netns_wq rescuer kthread both stuck in xfrm_policy_fini()'s synchronize_rcu(), >300k struct net accumulated in the cleanup queue, Percpu in /proc/meminfo climbed to 130+ GB on 256-CPU hosts, and memcg OOMs followed. setup_net and __put_net counts were balanced, ruling out a refcount leak. Fixes: 069daad4f2ae ("xfrm: Wait for RCU readers during policy netns exit") Signed-off-by: Usama Arif Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 59968dcbafe1..dd09d2063da2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4276,21 +4276,21 @@ out_byidx: return -ENOMEM; } -static void xfrm_policy_fini(struct net *net) +static void __net_exit xfrm_net_pre_exit(struct net *net) { - struct xfrm_pol_inexact_bin *b, *t; - unsigned int sz; - int dir; - disable_work_sync(&net->xfrm.policy_hthresh.work); - flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); +} - synchronize_rcu(); +static void xfrm_policy_fini(struct net *net) +{ + struct xfrm_pol_inexact_bin *b, *t; + unsigned int sz; + int dir; WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -4368,6 +4368,7 @@ static void __net_exit xfrm_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, + .pre_exit = xfrm_net_pre_exit, .exit = xfrm_net_exit, }; -- cgit v1.2.3 From c16f74dc1d75d0e2e7670076d5375deda110ebeb Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Fri, 22 May 2026 17:31:55 +0800 Subject: xfrm: input: hold netns during deferred transport reinjection Transport-mode reinjection stores a struct net pointer in skb->cb and uses it later from xfrm_trans_reinject(). That pointer must stay valid until the deferred callback runs. Take a netns reference when queueing deferred reinjection work and drop it after the callback completes. Use maybe_get_net() so the queueing path does not revive a namespace that is already being torn down. This keeps the existing workqueue design and fixes the netns lifetime handling in one place for all users of xfrm_trans_queue_net(). Fixes: 7b3801927e52 ("xfrm: introduce xfrm_trans_queue_net") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Xin Liu Co-developed-by: Luxing Yin Signed-off-by: Luxing Yin Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Assisted-by: Codex:gpt-5.4 Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index f65291eba1f6..e4c2cd24936d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -797,9 +797,12 @@ static void xfrm_trans_reinject(struct work_struct *work) spin_unlock_bh(&trans->queue_lock); local_bh_disable(); - while ((skb = __skb_dequeue(&queue))) - XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net, - NULL, skb); + while ((skb = __skb_dequeue(&queue))) { + struct net *net = XFRM_TRANS_SKB_CB(skb)->net; + + XFRM_TRANS_SKB_CB(skb)->finish(net, NULL, skb); + put_net(net); + } local_bh_enable(); } @@ -808,6 +811,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, struct sk_buff *)) { struct xfrm_trans_tasklet *trans; + struct net *hold_net; trans = this_cpu_ptr(&xfrm_trans_tasklet); @@ -816,8 +820,12 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb)); + hold_net = maybe_get_net(net); + if (!hold_net) + return -ENODEV; + XFRM_TRANS_SKB_CB(skb)->finish = finish; - XFRM_TRANS_SKB_CB(skb)->net = net; + XFRM_TRANS_SKB_CB(skb)->net = hold_net; spin_lock_bh(&trans->queue_lock); __skb_queue_tail(&trans->queue, skb); spin_unlock_bh(&trans->queue_lock); -- cgit v1.2.3 From 25fe708bbc59289d3d1ea4b126fbc1b460a072a5 Mon Sep 17 00:00:00 2001 From: Weiming Shi Date: Thu, 21 May 2026 01:12:01 -0700 Subject: net: team: fix NULL pointer dereference in team_xmit during mode change __team_change_mode() clears team->ops with memset() before restoring safe dummy handlers via team_adjust_ops(). A concurrent team_xmit() running under RCU on another CPU can read team->ops.transmit during this window and call a NULL function pointer, crashing the kernel. The race requires a mode change (CAP_NET_ADMIN) concurrent with transmit on the team device. BUG: kernel NULL pointer dereference, address: 0000000000000000 Oops: 0010 [#1] SMP KASAN NOPTI RIP: 0010:0x0 Call Trace: team_xmit (drivers/net/team/team_core.c:1853) dev_hard_start_xmit (net/core/dev.c:3904) __dev_queue_xmit (net/core/dev.c:4871) packet_sendmsg (net/packet/af_packet.c:3109) __sys_sendto (net/socket.c:2265) The original code assumed that no ports means no traffic, so mode changes could freely memset()/memcpy() the ops. AF_PACKET with forced carrier breaks that assumption. Prevent the race instead of making it safe: replace memset()/memcpy() with per-field updates that never touch transmit or receive. Those two handlers are managed solely by team_adjust_ops(), which already installs dummies when tx_en_port_count == 0 (always true during mode change since no ports are present). WRITE_ONCE/READ_ONCE prevent store/load tearing on the handler pointers. synchronize_net() before exit_op() drains in-flight readers that may still reference old mode state from before port removal switched the handlers to dummies. Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Reported-by: Xiang Mei Signed-off-by: Weiming Shi Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260521081159.1491563-3-bestswngs@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 45 +++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 0c87f9972457..f51388d50307 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -534,21 +534,23 @@ static void team_adjust_ops(struct team *team) if (!team->tx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->transmit) - team->ops.transmit = team_dummy_transmit; + WRITE_ONCE(team->ops.transmit, team_dummy_transmit); else - team->ops.transmit = team->mode->ops->transmit; + WRITE_ONCE(team->ops.transmit, team->mode->ops->transmit); if (!team->rx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->receive) - team->ops.receive = team_dummy_receive; + WRITE_ONCE(team->ops.receive, team_dummy_receive); else - team->ops.receive = team->mode->ops->receive; + WRITE_ONCE(team->ops.receive, team->mode->ops->receive); } /* - * We can benefit from the fact that it's ensured no port is present - * at the time of mode change. Therefore no packets are in fly so there's no - * need to set mode operations in any special way. + * team_change_mode() ensures no ports are present during mode change, + * but lockless readers can still reach team_xmit(). Avoid touching + * transmit/receive -- they are already set to dummies by + * team_adjust_ops() since no ports are enabled. synchronize_net() + * drains in-flight readers before destroying old mode state. */ static int __team_change_mode(struct team *team, const struct team_mode *new_mode) @@ -557,9 +559,21 @@ static int __team_change_mode(struct team *team, if (team_is_mode_set(team)) { void (*exit_op)(struct team *team) = team->ops.exit; - /* Clear ops area so no callback is called any longer */ - memset(&team->ops, 0, sizeof(struct team_mode_ops)); - team_adjust_ops(team); + /* Clear cold-path ops used only under RTNL. transmit and + * receive are already dummies (no ports) so leave them + * alone -- overwriting them is the source of the race. + */ + team->ops.init = NULL; + team->ops.exit = NULL; + team->ops.port_enter = NULL; + team->ops.port_leave = NULL; + team->ops.port_change_dev_addr = NULL; + team->ops.port_tx_disabled = NULL; + + /* Wait for in-flight readers before tearing down mode + * state they may reference. + */ + synchronize_net(); if (exit_op) exit_op(team); @@ -582,7 +596,12 @@ static int __team_change_mode(struct team *team, } team->mode = new_mode; - memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops)); + team->ops.init = new_mode->ops->init; + team->ops.exit = new_mode->ops->exit; + team->ops.port_enter = new_mode->ops->port_enter; + team->ops.port_leave = new_mode->ops->port_leave; + team->ops.port_change_dev_addr = new_mode->ops->port_change_dev_addr; + team->ops.port_tx_disabled = new_mode->ops->port_tx_disabled; team_adjust_ops(team); return 0; @@ -743,7 +762,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) /* allow exact match delivery for disabled ports */ res = RX_HANDLER_EXACT; } else { - res = team->ops.receive(team, port, skb); + res = READ_ONCE(team->ops.receive)(team, port, skb); } if (res == RX_HANDLER_ANOTHER) { struct team_pcpu_stats *pcpu_stats; @@ -1845,7 +1864,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) tx_success = team_queue_override_transmit(team, skb); if (!tx_success) - tx_success = team->ops.transmit(team, skb); + tx_success = READ_ONCE(team->ops.transmit)(team, skb); if (tx_success) { struct team_pcpu_stats *pcpu_stats; -- cgit v1.2.3 From 11b326fb0a374f4654f9be22d0f0f7abd9f7d3fe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 21 May 2026 21:05:54 +0800 Subject: ip6: vti: Use ip6_tnl.net in vti6_changelink(). ip netns add ns1 ip netns add ns2 ip -n ns1 link add vti6_test type vti6 remote ::1 local ::2 key 7 ip -n ns1 link set vti6_test netns ns2 ip -n ns2 link set vti6_test type vti6 remote ::3 local ::4 key 9 ip netns del ns2 ip netns del ns1 [ 132.495484] ------------[ cut here ]------------ [ 132.497609] kernel BUG at net/core/dev.c:12376! Commit 61220ab34948 ("vti6: Enable namespace changing") dropped NETIF_F_NETNS_LOCAL from vti6 devices. A vti6 tunnel can then move through IFLA_NET_NS_FD. After the move dev_net(dev) points at the new netns while t->net stays at the creation netns. vti6_changelink() and vti6_update() still use dev_net(dev) and dev_net(t->dev). They unlink from one per netns hash and relink into another. The creation netns is left with a stale entry. cleanup_net() of that netns later walks freed memory. Reachable from an unprivileged user namespace (unshare --user --map-root-user --net). Cross tenant scope on container hosts. Fixes: 61220ab34948 ("vti6: Enable namespace changing") Reported-by: Maoyi Xie Reviewed-by: Eric Dumazet Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260521130555.3421684-2-maoyixie.tju@gmail.com Signed-off-by: Paolo Abeni --- net/ipv6/ip6_vti.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ad5290be4dd6..dcb257411d6e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -722,10 +722,11 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p, bool keep_mtu) { - struct net *net = dev_net(t->dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct net *net = t->net; + struct vti6_net *ip6n; int err; + ip6n = net_generic(net, vti6_net_id); vti6_tnl_unlink(ip6n, t); synchronize_net(); err = vti6_tnl_change(t, p, keep_mtu); @@ -1031,11 +1032,12 @@ static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6_tnl *t; + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct __ip6_tnl_parm p; - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n; + ip6n = net_generic(net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) return -EINVAL; -- cgit v1.2.3 From 8b484efd5cb4eeef9021a661e198edc5349dacf6 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Thu, 21 May 2026 21:05:55 +0800 Subject: ip6: vti: Use ip6_tnl.net in vti6_siocdevprivate(). After patch 1/2 in this series, vti6_update() unlinks and relinks the tunnel through t->net. vti6_siocdevprivate() still uses dev_net(dev) for the collision lookup. For a tunnel moved through IFLA_NET_NS_FD, dev_net(dev) is the new netns, not t->net. SIOCCHGTUNNEL on a migrated tunnel then runs: net = dev_net(dev) /* migrated netns */ t = vti6_locate(net, &p1, false) /* misses target in t->net */ ... t = netdev_priv(dev) vti6_update(t, &p1, false) /* mutates t->net's hash */ A caller in the migrated netns picks params that match a tunnel in the creation netns. The lookup in dev_net(dev) finds nothing. vti6_update() prepends the migrated tunnel at the head of the creation netns hash bucket for those params. Later lookups in the creation netns resolve to the migrated device. xfrm receive delivers the matched packets through a device the caller controls. Reachable from an unprivileged user namespace (unshare --user --map-root-user --net). Cross tenant scope on container hosts. Switch the SIOCCHGTUNNEL path on a non fallback device to use t->net for the lookup. The lookup now matches the netns vti6_update() operates on. Also add ns_capable(self->net->user_ns, CAP_NET_ADMIN) before the lookup. The check at the top of the case is against dev_net(dev)->user_ns, which after migration is the attacker's netns. A caller there can pick params absent from self->net, the lookup returns NULL, t becomes self, and vti6_update() inserts the device into the creation netns hash. The new check requires CAP_NET_ADMIN in the creation netns user_ns too. SIOCADDTUNNEL and SIOCCHGTUNNEL on the fallback device keep dev_net(dev), which equals init_net there. Fixes: 61220ab34948 ("vti6: Enable namespace changing") Suggested-by: Jakub Kicinski Suggested-by: Xiao Liang Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Maoyi Xie Link: https://patch.msgid.link/20260521130555.3421684-3-maoyixie.tju@gmail.com Signed-off-by: Paolo Abeni --- net/ipv6/ip6_vti.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index dcb257411d6e..df793c8bfffb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -835,17 +835,24 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data if (p.proto != IPPROTO_IPV6 && p.proto != 0) break; vti6_parm_from_user(&p1, &p); - t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + struct ip6_tnl *self = netdev_priv(dev); + + err = -EPERM; + if (!ns_capable(self->net->user_ns, CAP_NET_ADMIN)) + break; + t = vti6_locate(self->net, &p1, false); if (t) { if (t->dev != dev) { err = -EEXIST; break; } } else - t = netdev_priv(dev); + t = self; err = vti6_update(t, &p1, false); + } else { + t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); } if (t) { err = 0; -- cgit v1.2.3 From df488cac6140aa04ae52af9b4507d8f99a3762be Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Sat, 23 May 2026 05:55:03 +0000 Subject: cpufreq/amd-pstate-ut: Disable dynamic_epp after the mode switch Dan reported a possible NULL pointer dereference in amd-pstate-ut.c from static analysis and sure enough, running amd-pstate-ut in active mode with amd_dynamic_epp=enable results in a crash as a reult of the policy reference being set to NULL early, before disabling dynamic EPP. Kalpana also reported seeing amd-pstate-ut error out with -EBUSY for "amd_pstate_ut_epp" test when starting from the passive mode and amd_dynamic_epp=enable in the command line. The reason for the failure is that the command line enables dynamic_epp by default after the mode switch and the modifications to EPP values are blocked when running in dynamic EPP mode. Solution to both problems is to toggle off dynamic_epp *after* the mode switch when the driver grabs the policy reference again since the unit test is in full control of the policy after that point. The final restoration step will reset the dynamic_epp state via mode switch based on the initial conditions of the system. Reported-by: Kalpana Shetty Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-pm/ahEq0CvdBX0T7_cO@stanley.mountain/ Fixes: f9f16835d4dc ("cpufreq/amd-pstate-ut: Drop policy reference before driver switch") Signed-off-by: K Prateek Nayak Link: https://patch.msgid.link/20260523055503.7651-1-kprateek.nayak@amd.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate-ut.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index 13a23dac477d..735b29f76438 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -302,12 +302,6 @@ static int amd_pstate_ut_epp(u32 index) cpufreq_cpu_put(policy); policy = NULL; - /* disable dynamic EPP before running test */ - if (cpudata->dynamic_epp) { - pr_debug("Dynamic EPP is enabled, disabling it\n"); - amd_pstate_clear_dynamic_epp(policy); - } - buf = (char *)__get_free_page(GFP_KERNEL); if (!buf) return -ENOMEM; @@ -327,6 +321,16 @@ static int amd_pstate_ut_epp(u32 index) orig_policy = cpudata->policy; cpudata->policy = CPUFREQ_POLICY_POWERSAVE; + /* + * Disable dynamic EPP before running test. If "orig_dynamic_epp" is + * true, the driver will do a redundant switch at the end and there + * is no need for enabling it again at the end of the test. + */ + if (cpudata->dynamic_epp) { + pr_debug("Dynamic EPP is enabled, disabling it\n"); + amd_pstate_clear_dynamic_epp(policy); + } + for (epp = 0; epp <= U8_MAX; epp++) { u8 val; -- cgit v1.2.3 From 2e357f002c61fd76fd8f12468744a06a5ec48eaa Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 May 2026 14:06:40 +0200 Subject: net: Avoid checksumming unreadable skb tail on trim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pskb_trim_rcsum_slow() keeps CHECKSUM_COMPLETE valid by subtracting the checksum of the bytes removed from the skb tail. That assumes the removed bytes can be read. io_uring zcrx skbs may contain unreadable net_iov frags. With fbnic header/data split, small TCP/IPv4 packets can carry Ethernet padding in such a frag. ip_rcv_core() trims the skb to iph->tot_len before TCP sees it, and the CHECKSUM_COMPLETE adjustment then calls skb_checksum() on the padding. This is exposed by IPv4 because small TCP/IPv4 frames can be shorter than the Ethernet minimum payload. TCP/IPv6 frames are large enough in the normal zcrx path, so they do not hit the same padding trim. Keep the existing checksum adjustment for readable skbs. If the remaining packet is fully linear, drop CHECKSUM_COMPLETE and let the stack validate the packet after trimming. If unreadable payload would remain, fail the trim; the checksum cannot be adjusted without reading the trimmed tail. Also clear skb->unreadable when trimming removes all frags. Fixes: 65249feb6b3d ("net: add support for skbs with unreadable frags") Signed-off-by: Björn Töpel Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260522120643.242974-1-bjorn@kernel.org Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 44ac121cfccb..d247acd447e4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2787,6 +2787,8 @@ done: skb->data_len = 0; skb_set_tail_pointer(skb, len); } + if (!skb_shinfo(skb)->nr_frags && !skb_has_frag_list(skb)) + skb->unreadable = 0; if (!skb->sk || skb->destructor == sock_edemux) skb_condense(skb); @@ -2794,16 +2796,37 @@ done: } EXPORT_SYMBOL(___pskb_trim); +static int pskb_trim_rcsum_complete(struct sk_buff *skb, unsigned int len) +{ + int delta = skb->len - len; + + if (skb_frags_readable(skb)) { + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); + return 0; + } + + if (len > skb_headlen(skb)) + return -EFAULT; + + /* The trimmed bytes are unreadable, but the remaining packet can be + * checksummed by software after trimming. + */ + skb->ip_summed = CHECKSUM_NONE; + return 0; +} + /* Note : use pskb_trim_rcsum() instead of calling this directly */ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - int delta = skb->len - len; + int err; - skb->csum = csum_block_sub(skb->csum, - skb_checksum(skb, len, delta, 0), - len); + err = pskb_trim_rcsum_complete(skb, len); + if (err) + return err; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; int offset = skb_checksum_start_offset(skb) + skb->csum_offset; -- cgit v1.2.3 From c75b6f6eaacd0b74b832414cc3b9289c3686e941 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:06:42 -0700 Subject: ethtool: rss: avoid modifying the RSS context response Gemini says that we're modifying the RSS_CREATE response skb. I think it's right, the comment says that unicast() should unshare the skb but I'm not entirely sure what I meant there. netlink_trim() does a copy but only if skb is not well sized (it's at least 2x larger than necessary for the payload). Fixes: a166ab7816c5 ("ethtool: rss: support creating contexts via Netlink") Link: https://patch.msgid.link/20260522230647.1705600-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 353110b862ab..8ffec9785efa 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -981,11 +981,17 @@ ethnl_rss_create_validate(struct net_device *dev, struct genl_info *info) } static void -ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev) +ethnl_rss_create_send_ntf(const struct sk_buff *rsp, struct net_device *dev) { - struct nlmsghdr *nlh = (void *)rsp->data; struct genlmsghdr *genl_hdr; + struct nlmsghdr *nlh; + struct sk_buff *ntf; + + ntf = skb_copy_expand(rsp, 0, 0, GFP_KERNEL); + if (!ntf) + return; + nlh = nlmsg_hdr(ntf); /* Convert the reply into a notification */ nlh->nlmsg_pid = 0; nlh->nlmsg_seq = ethnl_bcast_seq_next(); @@ -993,7 +999,7 @@ ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev) genl_hdr = nlmsg_data(nlh); genl_hdr->cmd = ETHTOOL_MSG_RSS_CREATE_NTF; - ethnl_multicast(rsp, dev); + ethnl_multicast(ntf, dev); } int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) @@ -1104,12 +1110,8 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) genlmsg_end(rsp, hdr); - /* Use the same skb for the response and the notification, - * genlmsg_reply() will copy the skb if it has elevated user count. - */ - skb_get(rsp); - ret = genlmsg_reply(rsp, info); ethnl_rss_create_send_ntf(rsp, dev); + ret = genlmsg_reply(rsp, info); rsp = NULL; exit_unlock: -- cgit v1.2.3 From 3e6c6e9782ff8a8d8ded774b07ad4590cd61d04c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:06:43 -0700 Subject: ethtool: rss: add missing errno on RSS context delete Remember to set ret before jumping out if someone tries to delete a context on a device which doesn't support contexts. Fixes: fbe09277fa63 ("ethtool: rss: support removing contexts via Netlink") Link: https://patch.msgid.link/20260522230647.1705600-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 8ffec9785efa..a16ee1e8e640 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -1170,8 +1170,10 @@ int ethnl_rss_delete_doit(struct sk_buff *skb, struct genl_info *info) dev = req.dev; ops = dev->ethtool_ops; - if (!ops->create_rxfh_context) + if (!ops->create_rxfh_context) { + ret = -EOPNOTSUPP; goto exit_free_dev; + } rtnl_lock(); netdev_lock_ops(dev); -- cgit v1.2.3 From 8d60141a32875248ef71d49c9920fa5e2aa40b29 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:06:44 -0700 Subject: ethtool: rss: fix falsely ignoring indir table updates rss_set_prep_indir() compares the new indirection table against the current one to determine whether any update is needed. The memcmp call passes data->indir_size as the length argument, but indir_size is the number of u32 entries, not the byte count. Fixes: c0ae03588bbb ("ethtool: rss: initial RSS_SET (indirection table handling)") Link: https://patch.msgid.link/20260522230647.1705600-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index a16ee1e8e640..458a4a7907e4 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -686,7 +686,7 @@ rss_set_prep_indir(struct net_device *dev, struct genl_info *info, ethtool_rxfh_indir_default(i, num_rx_rings); } - *mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size); + *mod |= memcmp(rxfh->indir, data->indir_table, alloc_size); return user_size; -- cgit v1.2.3 From 266297692f97008ca48bc311775c087c59bd7fe3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:06:45 -0700 Subject: ethtool: rss: fix indir_table and hkey leak on get_rxfh failure rss_prepare_get() allocates the indirection table and hash key buffer via rss_get_data_alloc(), then calls ops->get_rxfh() to populate them. If get_rxfh() fails, the function returns an error without freeing the allocation. Fixes: 4f038a6a02d2 ("net: ethtool: Don't call .cleanup_data when prepare_data fails") Link: https://patch.msgid.link/20260522230647.1705600-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 458a4a7907e4..9fb675d29232 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -170,8 +170,10 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, rxfh.key = data->hkey; ret = ops->get_rxfh(dev, &rxfh); - if (ret) + if (ret) { + rss_get_data_free(data); goto out_unlock; + } data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; -- cgit v1.2.3 From 78ccf1a70c6378e1f5073a8c2209b5129067b925 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:06:46 -0700 Subject: ethtool: rss: fix hkey leak when indir_size is 0 rss_get_data_alloc() allocates a single buffer that backs both the indirection table and the hash key, but only assigned data->indir_table when indir_size was nonzero. The expectation was that no driver implements RSS without supporting indirection table but apparently enic does just that (it's the only such in-tree driver). enic has get_rxfh_key_size but no get_rxfh_indir_size. data->indir_table stays as NULL, hkey gets set but rss_get_data_free() kfree(data->indir_table) is a nop and the allocation leaks. Always store the allocation base in data->indir_table so the free path is unambiguous. No caller treats indir_table as a sentinel; everything keys off indir_size. Fixes: 7112a04664bf ("ethtool: add netlink based get rss support") Link: https://patch.msgid.link/20260522230647.1705600-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 9fb675d29232..f5cf214f8f85 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -134,8 +134,7 @@ rss_get_data_alloc(struct net_device *dev, struct rss_reply_data *data) if (!rss_config) return -ENOMEM; - if (data->indir_size) - data->indir_table = (u32 *)rss_config; + data->indir_table = (u32 *)rss_config; if (data->hkey_size) data->hkey = rss_config + indir_bytes; -- cgit v1.2.3 From 32a9ecde62731c9f7412507709192c84dafc38d1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:06:47 -0700 Subject: ethtool: rss: avoid device context leak on reply-build failure We wait with filling the reply for new RSS context creation until after the driver ->create_rxfh_context call. The driver needs to fill some of the defaults in the context. The failure of rss_fill_reply() is somewhat theoretical, but doesn't take much effort to handle it properly. Call ->remove_rxfh_context(). If the driver's remove callback fails (some implementations like sfc can return real command errors from firmware RPCs) - skip the xa_erase and kfree, leaving the context in the xarray. This matches how ethnl_rss_delete_doit() behaves. Fixes: a166ab7816c5 ("ethtool: rss: support creating contexts via Netlink") Link: https://patch.msgid.link/20260522230647.1705600-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/rss.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index f5cf214f8f85..53792f53f922 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -1106,7 +1106,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) ntf_fail |= rss_fill_reply(rsp, &req.base, &data.base); if (WARN_ON(!hdr || ntf_fail)) { ret = -EMSGSIZE; - goto exit_unlock; + goto err_remove_ctx; } genlmsg_end(rsp, hdr); @@ -1134,6 +1134,10 @@ exit_free_rsp: nlmsg_free(rsp); return ret; +err_remove_ctx: + if (ops->remove_rxfh_context(dev, ctx, req.rss_context, NULL)) + /* leave the context on failure, like ethnl_rss_delete_doit() */ + goto exit_unlock; err_ctx_id_free: xa_erase(&dev->ethtool->rss_ctx, req.rss_context); err_unlock_free_ctx: -- cgit v1.2.3 From 84371fb58423f997939aacdcbc02d128d76a54e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:04 -0700 Subject: ethtool: module: call ethnl_ops_complete() on module flash errors When validate() fails we are skipping over ethnl_ops_complete() even tho we already called ethnl_ops_begin(). Fixes: 32b4c8b53ee7 ("ethtool: Add ability to flash transceiver modules' firmware") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index cad2eb25b5a4..741f6fb25d45 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -427,10 +427,11 @@ int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) ret = ethnl_module_fw_flash_validate(dev, info->extack); if (ret < 0) - goto out_unlock; + goto out_complete; ret = module_flash_fw(dev, tb, skb, info); +out_complete: ethnl_ops_complete(dev); out_unlock: -- cgit v1.2.3 From fb7f511d62692661846c47f199e0afe25c2982db Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:05 -0700 Subject: ethtool: module: avoid leaking a netdev ref on module flash errors module_flash_fw_schedule() is missing undo for setting the "in_progress" flag and taking the netdev reference. Delay taking these, the device can't disappear while we are holding rtnl_lock. Fixes: 32b4c8b53ee7 ("ethtool: Add ability to flash transceiver modules' firmware") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/module.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 741f6fb25d45..392c03935e5e 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -319,8 +319,6 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; - dev->ethtool->module_fw_flash_in_progress = true; - netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); fw_update->dev = dev; fw_update->ntf_params.portid = info->snd_portid; fw_update->ntf_params.seq = info->snd_seq; @@ -335,6 +333,9 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; + dev->ethtool->module_fw_flash_in_progress = true; + netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); + schedule_work(&module_fw->work); return 0; -- cgit v1.2.3 From 7a84b965ffc12030af63cd10a8f3a1123ff39b7a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:06 -0700 Subject: ethtool: module: avoid racy updates to dev->ethtool bitfield When reviewing other changes Gemini points out that we currently update module_fw_flash_in_progress without holding any locks. Since module_fw_flash_in_progress is part of a bitfield this is not great, updates to other fields may be lost. We could use a bool and sprinkle some READ_ONCE/WRITE_ONCE here but seems like the issue is rather than the work is an unusual writer. The other writers already hold the right locks. So just very briefly take these locks when the work completes. Note that nothing ever cancels the FW update work, so there's no concern with deadlocks vs cancel. Fixes: 32b4c8b53ee7 ("ethtool: Add ability to flash transceiver modules' firmware") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/module.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 392c03935e5e..cdb85e19a23b 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -221,14 +221,22 @@ static void module_flash_fw_work_list_del(struct list_head *list) static void module_flash_fw_work(struct work_struct *work) { struct ethtool_module_fw_flash *module_fw; + struct net_device *dev; module_fw = container_of(work, struct ethtool_module_fw_flash, work); + dev = module_fw->fw_update.dev; ethtool_cmis_fw_update(&module_fw->fw_update); module_flash_fw_work_list_del(&module_fw->list); - module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false; - netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); + + rtnl_lock(); + netdev_lock_ops(dev); + dev->ethtool->module_fw_flash_in_progress = false; + netdev_unlock_ops(dev); + rtnl_unlock(); + + netdev_put(dev, &module_fw->dev_tracker); release_firmware(module_fw->fw_update.fw); kfree(module_fw); } -- cgit v1.2.3 From 504eaefa44c8dec50f7499edcb36d24f3aefab2a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:07 -0700 Subject: ethtool: module: check fw_flash_in_progress under rtnl_lock ethnl_set_module_validate() inspects module_fw_flash_in_progress but validate is meant for _input_ validation, not state validation. rtnl_lock is not held, yet. Move the check into ethnl_set_module(). Fixes: 32b4c8b53ee7 ("ethtool: Add ability to flash transceiver modules' firmware") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/module.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index cdb85e19a23b..5b49004ddf60 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -120,12 +120,6 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; - if (req_info->dev->ethtool->module_fw_flash_in_progress) { - NL_SET_ERR_MSG(info->extack, - "Module firmware flashing is in progress"); - return -EBUSY; - } - if (!ops->get_module_power_mode || !ops->set_module_power_mode) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], @@ -148,6 +142,12 @@ ethnl_set_module(struct ethnl_req_info *req_info, struct genl_info *info) ops = dev->ethtool_ops; + if (dev->ethtool->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(info->extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]); ret = ops->get_module_power_mode(dev, &power, info->extack); if (ret < 0) -- cgit v1.2.3 From 760d04ebad5c4304f22c0d2251c9623b87a117c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:08 -0700 Subject: ethtool: module: fix cleanup if socket used for flashing multiple devices When a single Netlink socket issues MODULE_FW_FLASH_ACT against multiple devices, ethnl_sock_priv_set() overwrites sk_priv->dev on each call, retaining only the last one. The socket priv is used on socket close, to walk the global work list and mark the uncompleted flashing work as "orphaned". Otherwise if another socket reuses the PID it will unexpectedly receive the flashing notifications. Don't record the device, record net pointer instead. The purpose of the dev is to scope the work to a netns, anyway. If we store netns the overrides are safe/a nop since all flashed devices must be in the same netns as the socket. Fixes: 32b4c8b53ee7 ("ethtool: Add ability to flash transceiver modules' firmware") Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/module.c | 9 ++++----- net/ethtool/netlink.c | 4 ++-- net/ethtool/netlink.h | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 5b49004ddf60..ea4fb2a76650 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -291,11 +291,9 @@ void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv) spin_lock(&module_fw_flash_work_list_lock); list_for_each_entry(work, &module_fw_flash_work_list, list) { - if (work->fw_update.dev == sk_priv->dev && - work->fw_update.ntf_params.portid == sk_priv->portid) { + if (work->fw_update.ntf_params.portid == sk_priv->portid && + dev_net(work->fw_update.dev) == sk_priv->net) work->fw_update.ntf_params.closed_sock = true; - break; - } } spin_unlock(&module_fw_flash_work_list_lock); } @@ -332,7 +330,8 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, fw_update->ntf_params.seq = info->snd_seq; fw_update->ntf_params.closed_sock = false; - err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid, + err = ethnl_sock_priv_set(skb, dev_net(dev), + fw_update->ntf_params.portid, ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH); if (err < 0) goto err_release_firmware; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5046023a30b1..7d45f9a884e5 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -53,7 +53,7 @@ const struct nla_policy ethnl_header_policy_phy_stats[] = { [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; -int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, +int ethnl_sock_priv_set(struct sk_buff *skb, struct net *net, u32 portid, enum ethnl_sock_type type) { struct ethnl_sock_priv *sk_priv; @@ -62,7 +62,7 @@ int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, if (IS_ERR(sk_priv)) return PTR_ERR(sk_priv); - sk_priv->dev = dev; + sk_priv->net = net; sk_priv->portid = portid; sk_priv->type = type; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index aaf6f2468768..fd2198e45d2b 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -318,12 +318,12 @@ enum ethnl_sock_type { }; struct ethnl_sock_priv { - struct net_device *dev; + struct net *net; u32 portid; enum ethnl_sock_type type; }; -int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, +int ethnl_sock_priv_set(struct sk_buff *skb, struct net *net, u32 portid, enum ethnl_sock_type type); /** -- cgit v1.2.3 From 6c3f999a9d1338c6c89a9ff4549eafe72bc2e7b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:09 -0700 Subject: ethtool: cmis: require exact CDB reply length Malicious SFP module could respond with rpl_len longer than what cmis_cdb_process_reply() expected, leading to OOB writes. Malicious HW is a bit theoretical but some modules may just be buggy and/or the reads may occasionally get corrupted, so let's protect the kernel. The existing check protects from short replies. We need to protect from long ones, too. All callers that pass a non-zero rpl_exp_len cast the reply payload to a fixed-layout struct and read fields at fixed offsets, with no version negotiation or short-reply handling: - cmis_cdb_validate_password() - cmis_cdb_module_features_get() - cmis_fw_update_fw_mng_features_get() so let's assume that responses longer than expected do not have to be handled gracefully here. Add a warning message to make the debug easier in case my understanding is wrong... Note that page_data->length (argument of kmalloc) comes from last arg to ethtool_cmis_page_init() which is rpl_exp_len. Note2 that AIs also like to point out overflows in args->req.payload itself (which is a fixed-size 120 B buffer, on the stack), but callers should be reading structs defined by the standard, so protecting from requests for more data than max seem like defensive programming. Fixes: a39c84d79625 ("ethtool: cmis_cdb: Add a layer for supporting CDB commands") Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/cmis_cdb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index 3670ca42dd40..f3a53a984460 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -513,8 +513,13 @@ static int cmis_cdb_process_reply(struct net_device *dev, } rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data; - if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) || - !rpl->hdr.rpl_chk_code) { + if (rpl->hdr.rpl_len != args->rpl_exp_len) { + netdev_warn(dev, "CDB reply length mismatch, expected %u got %u\n", + args->rpl_exp_len, rpl->hdr.rpl_len); + err = -EIO; + goto out; + } + if (!rpl->hdr.rpl_chk_code) { err = -EIO; goto out; } -- cgit v1.2.3 From 3e8c3d464c36bb342fe377b026577c7ec27fdbb4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:10 -0700 Subject: ethtool: cmis: fix u16-to-u8 truncation of msleep_pre_rpl ethtool_cmis_cdb_compose_args() accepts msleep_pre_rpl as u16 but stores it into the u8 field ethtool_cmis_cdb_cmd_args::msleep_pre_rpl, silently truncating values >= 256. Seven of the nine call sites pass 1000 ms (it's the third argument from the end). Fixes: a39c84d79625 ("ethtool: cmis_cdb: Add a layer for supporting CDB commands") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/cmis.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 4a9a946cabf0..778783a0f23c 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -63,9 +63,9 @@ struct ethtool_cmis_cdb_request { * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments * @req: CDB command fields as described in the CMIS standard. * @max_duration: Maximum duration time for command completion in msec. + * @msleep_pre_rpl: Waiting time before checking reply in msec. * @read_write_len_ext: Allowable additional number of byte octets to the LPL * in a READ or a WRITE commands. - * @msleep_pre_rpl: Waiting time before checking reply in msec. * @rpl_exp_len: Expected reply length in bytes. * @flags: Validation flags for CDB commands. * @err_msg: Error message to be sent to user space. @@ -73,8 +73,8 @@ struct ethtool_cmis_cdb_request { struct ethtool_cmis_cdb_cmd_args { struct ethtool_cmis_cdb_request req; u16 max_duration; + u16 msleep_pre_rpl; u8 read_write_len_ext; - u8 msleep_pre_rpl; u8 rpl_exp_len; u8 flags; char *err_msg; -- cgit v1.2.3 From 12c2496a71f82f63617971ca9b730dffa05cf58b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:11 -0700 Subject: ethtool: cmis: validate start_cmd_payload_size from module The CMIS firmware update code reads start_cmd_payload_size from the module's FW Management Features CDB reply and uses it directly as the byte count for memcpy. The destination buffer is 112 bytes (ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - 8). So a malicious module (or corrupted response) can cause a OOB write later on in cmis_fw_update_start_download(). Let's error out. If modules that expect longer LPL writes actually exist we should revisit. struct cmis_cdb_start_fw_download_pl's definition has to move, no change there. Fixes: c4f78134d45c ("ethtool: cmis_fw_update: add a layer for supporting firmware update using CDB") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/cmis_fw_update.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index df5f344209c4..16190c97e1f7 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -44,6 +44,20 @@ enum cmis_cdb_fw_write_mechanism { CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, }; +/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard + * revision 5.2. + * struct cmis_cdb_start_fw_download_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_start_fw_download_pl { + __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, + __be32 image_size; + __be32 resv1; + ); + u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - + sizeof(struct cmis_cdb_start_fw_download_pl_h)]; +}; + static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, struct net_device *dev, @@ -86,6 +100,14 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, */ cdb->read_write_len_ext = rpl->read_write_len_ext; fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size; + if (fw_mng->start_cmd_payload_size > + sizeof_field(struct cmis_cdb_start_fw_download_pl, vendor_data)) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Start cmd payload size exceeds max LPL payload", + NULL); + return -EINVAL; + } + fw_mng->write_mechanism = rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ? CMIS_CDB_FW_WRITE_MECHANISM_LPL : @@ -97,20 +119,6 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, return 0; } -/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard - * revision 5.2. - * struct cmis_cdb_start_fw_download_pl is a structured layout of the - * flat array, ethtool_cmis_cdb_request::payload. - */ -struct cmis_cdb_start_fw_download_pl { - __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, - __be32 image_size; - __be32 resv1; - ); - u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - - sizeof(struct cmis_cdb_start_fw_download_pl_h)]; -}; - static int cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, struct ethtool_cmis_fw_update_params *fw_update, -- cgit v1.2.3 From d5551f4c1800dc714cec86647bdd651ae0de923e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 May 2026 16:13:12 -0700 Subject: ethtool: cmis: validate fw->size against start_cmd_payload_size cmis_fw_update_start_download() copies start_cmd_payload_size bytes from the firmware blob into the CDB LPL vendor_data[] payload without validating that the FW has enough data. Since the start_cmd_payload_size can only be ~120B an image too short is most likely corrupted, so reject it. Fixes: c4f78134d45c ("ethtool: cmis_fw_update: add a layer for supporting firmware update using CDB") Reviewed-by: Maxime Chevallier Reviewed-by: Danielle Ratson Link: https://patch.msgid.link/20260522231312.1710836-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/cmis_fw_update.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index 16190c97e1f7..291d04d2776a 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -130,6 +130,14 @@ cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, u8 lpl_len; int err; + if (fw_update->fw->size < vendor_data_size) { + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Firmware image too small for module's start payload", + NULL); + return -EINVAL; + } + pl.image_size = cpu_to_be32(fw_update->fw->size); memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size); -- cgit v1.2.3 From 60474437af5fe12abd20607766c809e0ad4e7245 Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Thu, 14 May 2026 12:15:31 +0530 Subject: spi: dt-bindings: spi-qpic-snand: Add ipq5210 compatible Since the QPIC-SPI-NAND flash controller present in ipq5210 is the same as the one found in ipq9574, document the ipq5210 compatible and with ipq9574 as the fallback. Signed-off-by: Varadarajan Narayanan Link: https://patch.msgid.link/20260514-ipq5210-nand-v1-1-cbdd7492e826@oss.qualcomm.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml index 7d0571feb46d..829da22537d4 100644 --- a/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml +++ b/Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml @@ -25,6 +25,7 @@ properties: - items: - enum: - qcom,ipq5018-snand + - qcom,ipq5210-snand - qcom,ipq5332-snand - qcom,ipq5424-snand - const: qcom,ipq9574-snand -- cgit v1.2.3 From cda64169bade79427f264e43d0f422eaed9dc116 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 13 May 2026 22:06:01 +0200 Subject: x86/microcode: Do not access MSR_IA32_PLATFORM_ID when running as a guest Patch in Fixes: causes the usual: unchecked MSR access error: RDMSR from 0x17 at ... (intel_get_platform_id) Call Trace: early_init_intel early_cpu_init setup_arch _printk start_kernel x86_64_start_reservations x86_64_start_kernel common_startup_64 because the kernel is booted in a guest. In order to avoid it, this MSR access needs to be prevented when running virtualized. That is usually done by checking X86_FEATURE_HYPERVISOR but for this particular case it is too early yet. The platform ID needs to be read as early as when microcode is loaded on the BSP: load_ucode_bsp ... -> get_microcode_blob ... -> intel_find_matching_signature and by that time, CPUID leafs haven't been parsed yet. The microcode loader already has logic to check early whether the kernel is running virtualized so make that globally available to arch/x86/. The query whether running virtualized is getting more and more prominent in recent times so might as well make it an arch-global var which the rest of the code can use. Fixes: d8630b67ca1ed ("x86/cpu: Add platform ID to CPU info structure") Reported-by: Vishal Verma Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Tested-by: Binbin Wu Link: https://lore.kernel.org/all/20260430020953.1405535-1-binbin.wu@linux.intel.com --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 4 ++-- arch/x86/kernel/cpu/microcode/core.c | 22 ++++++++++------------ arch/x86/kernel/cpu/microcode/intel.c | 3 +++ arch/x86/kernel/cpu/microcode/internal.h | 1 - 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 10b5355b323e..67dd932305db 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -733,6 +733,7 @@ bool xen_set_default_idle(void); #endif void __noreturn stop_this_cpu(void *dummy); +extern bool x86_hypervisor_present; void microcode_check(struct cpuinfo_x86 *prev_info); void store_cpu_caps(struct cpuinfo_x86 *info); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index e533881284a1..5c0afae75e9f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -322,7 +322,7 @@ static u32 get_patch_level(void) { u32 rev, dummy __always_unused; - if (IS_ENABLED(CONFIG_MICROCODE_DBG) && hypervisor_present) { + if (IS_ENABLED(CONFIG_MICROCODE_DBG) && x86_hypervisor_present) { int cpu = smp_processor_id(); if (!microcode_rev[cpu]) { @@ -714,7 +714,7 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev, invlpg(p_addr_end); } - if (IS_ENABLED(CONFIG_MICROCODE_DBG) && hypervisor_present) + if (IS_ENABLED(CONFIG_MICROCODE_DBG) && x86_hypervisor_present) microcode_rev[smp_processor_id()] = mc->hdr.patch_id; /* verify patch application was successful */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 651202e6fefb..45ca406a8112 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -57,7 +57,7 @@ bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); u32 base_rev; u32 microcode_rev[NR_CPUS] = {}; -bool hypervisor_present; +bool __ro_after_init x86_hypervisor_present; /* * Synchronization. @@ -118,14 +118,9 @@ bool __init microcode_loader_disabled(void) /* * Disable when: * - * 1) The CPU does not support CPUID. - */ - if (!cpuid_feature()) { - dis_ucode_ldr = true; - return dis_ucode_ldr; - } - - /* + * 1) The CPU does not support CPUID, detected below in + * load_ucode_bsp(). + * * 2) Bit 31 in CPUID[1]:ECX is clear * The bit is reserved for hypervisor use. This is still not * completely accurate as XEN PV guests don't see that CPUID bit @@ -135,9 +130,7 @@ bool __init microcode_loader_disabled(void) * 3) Certain AMD patch levels are not allowed to be * overwritten. */ - hypervisor_present = native_cpuid_ecx(1) & BIT(31); - - if ((hypervisor_present && !IS_ENABLED(CONFIG_MICROCODE_DBG)) || + if ((x86_hypervisor_present && !IS_ENABLED(CONFIG_MICROCODE_DBG)) || amd_check_current_patch_level()) dis_ucode_ldr = true; @@ -179,6 +172,11 @@ void __init load_ucode_bsp(void) early_parse_cmdline(); + if (!cpuid_feature()) + dis_ucode_ldr = true; + else + x86_hypervisor_present = native_cpuid_ecx(1) & BIT(31); + if (microcode_loader_disabled()) return; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 37ac4afe0972..a4c0a0cf928b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -138,6 +138,9 @@ u32 intel_get_platform_id(void) { unsigned int val[2]; + if (x86_hypervisor_present) + return 0; + /* * This can be called early. Use CPUID directly instead of * relying on cpuinfo_x86 which may not be fully initialized. diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 3b93c0676b4f..a10b547eda1e 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -48,7 +48,6 @@ extern struct early_load_data early_data; extern struct ucode_cpu_info ucode_cpu_info[]; extern u32 microcode_rev[NR_CPUS]; extern u32 base_rev; -extern bool hypervisor_present; struct cpio_data find_microcode_in_initrd(const char *path); -- cgit v1.2.3 From d90f236f8b9e354848bd226f581db27755ab901d Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 20 May 2026 20:14:57 +0800 Subject: cxl/test: Update mock dev array before calling platform_device_add() CXL test environment hits the following error sometimes. cxl_mem mem9: endpoint7 failed probe All mock memdevs are platform firmware devices added by cxl_test module, and cxl_test module also provides a platform device driver for them to create a memdev device to CXL subsystem. cxl_test module uses cxl_rcd/mem_single/mem arrays to store different types of mock memdevs. CXL drivers calls registered mock functions for a mock memdev by checking if a given memdev is in these arrays. When cxl_test module adds these mock memdevs, it always calls platform_device_add() before adding them to a suitable mock memdev array. However, there is a small window where CXL drivers calls mock function for a added memdev before it added to a mock memdev array. In above case, cxl endpoint driver considers a added memdev was not a mock memdev, then calling devm_cxl_endpoint_decoders_setup() for it rather than mock_endpoint_decoders_setup(). An appropriate solution is that adding a new mock device to a mock device array before calling platform_device_add() for it. It can guarantee the new mock device is visible to CXL subsystem. This patch introduces a new helped called cxl_mock_platform_device_add() to handle the issue, and uses the function for all mock devices addition. Fixes: 3a2b97b3210b ("cxl/test: Improve init-order fidelity relative to real-world systems") Signed-off-by: Li Ming Tested-by: Alison Schofield Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20260520121457.234404-1-ming.li@zohomail.com Signed-off-by: Dave Jiang --- tools/testing/cxl/test/cxl.c | 105 ++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 62 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 418669927fb0..296516eecfd6 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1523,6 +1523,23 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_64G (SZ_32G * 2) #endif +static int cxl_mock_platform_device_add(struct platform_device *pdev, + struct platform_device **ppdev) +{ + int rc; + + if (ppdev) + *ppdev = pdev; + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + if (ppdev) + *ppdev = NULL; + } + + return rc; +} + static __init int cxl_rch_topo_init(void) { int rc, i; @@ -1537,13 +1554,10 @@ static __init int cxl_rch_topo_init(void) goto err_bridge; mock_companion(adev, &pdev->dev); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_rch[i]); + if (rc) goto err_bridge; - } - cxl_rch[i] = pdev; mock_pci_bus[idx].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "firmware_node"); @@ -1595,13 +1609,10 @@ static __init int cxl_single_topo_init(void) goto err_bridge; mock_companion(adev, &pdev->dev); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_hb_single[i]); + if (rc) goto err_bridge; - } - cxl_hb_single[i] = pdev; mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); @@ -1620,12 +1631,9 @@ static __init int cxl_single_topo_init(void) goto err_port; pdev->dev.parent = &bridge->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_root_single[i]); + if (rc) goto err_port; - } - cxl_root_single[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) { @@ -1638,12 +1646,9 @@ static __init int cxl_single_topo_init(void) goto err_uport; pdev->dev.parent = &root_port->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_swu_single[i]); + if (rc) goto err_uport; - } - cxl_swu_single[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) { @@ -1657,12 +1662,9 @@ static __init int cxl_single_topo_init(void) goto err_dport; pdev->dev.parent = &uport->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_swd_single[i]); + if (rc) goto err_dport; - } - cxl_swd_single[i] = pdev; } return 0; @@ -1735,12 +1737,9 @@ static int cxl_mem_init(void) pdev->dev.parent = &dport->dev; set_dev_node(&pdev->dev, i % 2); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_mem[i]); + if (rc) goto err_mem; - } - cxl_mem[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { @@ -1753,12 +1752,9 @@ static int cxl_mem_init(void) pdev->dev.parent = &dport->dev; set_dev_node(&pdev->dev, i % 2); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_mem_single[i]); + if (rc) goto err_single; - } - cxl_mem_single[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { @@ -1772,12 +1768,9 @@ static int cxl_mem_init(void) pdev->dev.parent = &rch->dev; set_dev_node(&pdev->dev, i % 2); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_rcd[i]); + if (rc) goto err_rcd; - } - cxl_rcd[i] = pdev; } return 0; @@ -1869,13 +1862,10 @@ static __init int cxl_test_init(void) goto err_bridge; mock_companion(adev, &pdev->dev); - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_host_bridge[i]); + if (rc) goto err_bridge; - } - cxl_host_bridge[i] = pdev; mock_pci_bus[i].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); @@ -1893,12 +1883,9 @@ static __init int cxl_test_init(void) goto err_port; pdev->dev.parent = &bridge->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_root_port[i]); + if (rc) goto err_port; - } - cxl_root_port[i] = pdev; } BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port)); @@ -1911,12 +1898,9 @@ static __init int cxl_test_init(void) goto err_uport; pdev->dev.parent = &root_port->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_switch_uport[i]); + if (rc) goto err_uport; - } - cxl_switch_uport[i] = pdev; } for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { @@ -1929,12 +1913,9 @@ static __init int cxl_test_init(void) goto err_dport; pdev->dev.parent = &uport->dev; - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); + rc = cxl_mock_platform_device_add(pdev, &cxl_switch_dport[i]); + if (rc) goto err_dport; - } - cxl_switch_dport[i] = pdev; } rc = cxl_single_topo_init(); @@ -1953,9 +1934,9 @@ static __init int cxl_test_init(void) acpi0017_mock.dev.bus = &platform_bus_type; cxl_acpi->dev.groups = cxl_acpi_groups; - rc = platform_device_add(cxl_acpi); + rc = cxl_mock_platform_device_add(cxl_acpi, NULL); if (rc) - goto err_root; + goto err_rch; rc = cxl_mem_init(); if (rc) -- cgit v1.2.3 From b051bb6bf0a231117036aa607cadf55be8e63910 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 26 May 2026 08:35:31 -0700 Subject: blk-mq: reinsert cached request to the list A previous commit removed an optimization out of caution for a scenario that turns out not to be real: all the "queue_exit" goto's are safe to reinsert the request into the cached_rq's plug list as they are either from a non-blocking path, or a successful merge that already holds the queue reference. This optimization is most needed for small sequential workloads that successfully merge into larger requests. Fixes: dc278e9bf2b9 ("blk-mq: pop cached request if it is usable") Suggested-by: Ming Lei Suggested-by: Christoph Hellwig Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Link: https://patch.msgid.link/20260526153531.2365935-1-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 28c2d931e75e..a24175441380 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3246,7 +3246,7 @@ queue_exit: if (!rq) blk_queue_exit(q); else - blk_mq_free_request(rq); + rq_list_add_head(&plug->cached_rqs, rq); } #ifdef CONFIG_BLK_MQ_STACKING -- cgit v1.2.3 From 030675aa54cf757769b3db65642433d626b3ed7c Mon Sep 17 00:00:00 2001 From: Chaitanya Sabnis Date: Tue, 26 May 2026 15:52:40 +0530 Subject: i2c: davinci: fix division by zero on missing clock-frequency When the 'clock-frequency' property is missing from the device tree, the driver falls back to DAVINCI_I2C_DEFAULT_BUS_FREQ. However, this macro was defined in kHz (100), whereas the device tree property is expected in Hz. The probe function divided the fallback value by 1000, causing integer truncation that resulted in dev->bus_freq = 0. This triggered a deterministic division-by-zero kernel panic when calculating clock dividers later in the probe sequence. Fix this by redefining DAVINCI_I2C_DEFAULT_BUS_FREQ in Hz (100000) to match the expected device tree property unit, allowing the existing division logic to work correctly for both cases. Fixes: b04ce6385979 ("i2c: davinci: kill platform data") Reported-by: Sashiko Closes: https://lore.kernel.org/all/20260514044726.57297C2BCB7@smtp.kernel.org/ Signed-off-by: Chaitanya Sabnis Cc: # v6.14+ Reviewed-by: Bartosz Golaszewski Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20260526102240.4949-1-chaitanya.msabnis@gmail.com --- drivers/i2c/busses/i2c-davinci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index a773ba082321..66c23535656b 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -117,7 +117,7 @@ /* timeout for pm runtime autosuspend */ #define DAVINCI_I2C_PM_TIMEOUT 1000 /* ms */ -#define DAVINCI_I2C_DEFAULT_BUS_FREQ 100 +#define DAVINCI_I2C_DEFAULT_BUS_FREQ 100000 struct davinci_i2c_dev { struct device *dev; -- cgit v1.2.3 From 05f95729ca844704d15e49ce14868af4b403b32b Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Fri, 22 May 2026 22:34:23 -0400 Subject: l2tp: use refcount_inc_not_zero in l2tp_session_get_by_ifname A reader in l2tp_session_get_by_ifname() can return a pointer to a session whose refcount has reached zero. The getter takes its reference with plain refcount_inc(), but every other session getter in the same file (l2tp_v2_session_get, l2tp_v3_session_get, and the corresponding _get_next variants) uses refcount_inc_not_zero() because the IDR/RCU lookup can race with refcount_dec_and_test() -> l2tp_session_free() -> kfree_rcu(). The ifname getter is the only outlier; the inconsistency was raised on-list after 979c017803c4 ("l2tp: use list_del_rcu in l2tp_session_unhash"). A reader inside rcu_read_lock_bh() that matches session->ifname can be preempted between the strcmp() and the refcount_inc(). If the last reference drops on another CPU in that window, the reader's refcount_inc() runs on a counter that has reached zero. refcount_t catches the addition-on-zero, prints "refcount_t: addition on 0; use-after-free", saturates the counter, and returns the saturated pointer to the caller. Session memory is held live by the in-flight RCU read section, but the kfree_rcu() callback queued from l2tp_session_free() will free it once the grace period closes; a caller that dereferences the returned session past that point hits a slab-use-after-free. On PREEMPT_RT local_bh_disable() is a per-CPU sleeping lock and the preemption window is real; on stock PREEMPT kernels local_bh_disable() is a preempt_count increment that closes the cross-CPU race in practice (see below). Use refcount_inc_not_zero() and continue the list walk on failure, matching the other session getters in the file. The ifname getter is the only session getter in net/l2tp/ that still uses the bare refcount_inc() pattern; this change restores file-internal consistency. The success path is unchanged. Fixes: abe7a1a7d0b6 ("l2tp: improve tunnel/session refcount helpers") Cc: stable@vger.kernel.org Signed-off-by: Michael Bommarito Reviewed-by: James Chapman Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260523023423.2568972-1-michael.bommarito@gmail.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1455f67e01dd..9419c8555d22 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -441,12 +441,13 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel) { list_for_each_entry_rcu(session, &tunnel->session_list, list) { - if (!strcmp(session->ifname, ifname)) { - refcount_inc(&session->ref_count); - rcu_read_unlock_bh(); + if (strcmp(session->ifname, ifname)) + continue; + if (!refcount_inc_not_zero(&session->ref_count)) + continue; + rcu_read_unlock_bh(); - return session; - } + return session; } } } -- cgit v1.2.3 From d895767c337814cf4b97d5ad5375e5ed7e12018d Mon Sep 17 00:00:00 2001 From: "Lucien.Jheng" Date: Sun, 24 May 2026 14:39:15 +0800 Subject: net: phy: air_en8811h: add AN8811HB MCU assert/deassert support AN8811HB needs a MCU soft-reset cycle before firmware loading begins. Assert the MCU (hold it in reset) and immediately deassert (release) via a dedicated PBUS register pair (0x5cf9f8 / 0x5cf9fc), accessed through a registered mdio_device at PHY-addr+8. Add __air_pbus_reg_write() as a low-level helper taking a struct mdio_device *, create and register the PBUS mdio_device in an8811hb_probe() and store it in priv->pbusdev, then implement an8811hb_mcu_assert() / _deassert() on top of it. Add an8811hb_remove() to unregister the PBUS device on teardown. Wire both calls into an8811hb_load_firmware() and en8811h_restart_mcu() so every firmware load or MCU restart on AN8811HB correctly sequences the reset control registers. Fixes: 5afda1d734ed ("net: phy: air_en8811h: add Airoha AN8811HB support") Signed-off-by: Lucien Jheng Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260524063915.47961-1-lucienzx159@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/air_en8811h.c | 153 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/air_en8811h.c b/drivers/net/phy/air_en8811h.c index 29ae73e65caa..a86129ce693c 100644 --- a/drivers/net/phy/air_en8811h.c +++ b/drivers/net/phy/air_en8811h.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -170,9 +171,23 @@ #define AN8811HB_CLK_DRV_CKO_LDPWD BIT(13) #define AN8811HB_CLK_DRV_CKO_LPPWD BIT(14) +#define AN8811HB_MCU_SW_RST 0x5cf9f8 +#define AN8811HB_MCU_SW_RST_HOLD BIT(16) +#define AN8811HB_MCU_SW_RST_RUN (BIT(16) | BIT(0)) +#define AN8811HB_MCU_SW_START 0x5cf9fc +#define AN8811HB_MCU_SW_START_EN BIT(16) + +/* MII register constants for PBUS access (PHY addr + 8) */ +#define AIR_PBUS_ADDR_HIGH 0x1c +#define AIR_PBUS_DATA_HIGH 0x10 +#define AIR_PBUS_REG_ADDR_HIGH_MASK GENMASK(15, 6) +#define AIR_PBUS_REG_ADDR_LOW_MASK GENMASK(5, 2) + /* Led definitions */ #define EN8811H_LED_COUNT 3 +#define EN8811H_PBUS_ADDR_OFFS 8 + /* Default LED setup: * GPIO5 <-> LED0 On: Link detected, blink Rx/Tx * GPIO4 <-> LED1 On: Link detected at 2500 or 1000 Mbps @@ -201,6 +216,7 @@ struct en8811h_priv { struct clk_hw hw; struct phy_device *phydev; unsigned int cko_is_enabled; + struct mdio_device *pbusdev; }; enum { @@ -254,6 +270,31 @@ static int air_phy_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, AIR_EXT_PAGE_ACCESS, page); } +static int __air_pbus_reg_write(struct mdio_device *mdiodev, + u32 pbus_reg, u32 pbus_data) +{ + int ret; + + ret = __mdiobus_write(mdiodev->bus, mdiodev->addr, AIR_EXT_PAGE_ACCESS, + upper_16_bits(pbus_reg)); + if (ret < 0) + return ret; + + ret = __mdiobus_write(mdiodev->bus, mdiodev->addr, AIR_PBUS_ADDR_HIGH, + FIELD_GET(AIR_PBUS_REG_ADDR_HIGH_MASK, pbus_reg)); + if (ret < 0) + return ret; + + ret = __mdiobus_write(mdiodev->bus, mdiodev->addr, + FIELD_GET(AIR_PBUS_REG_ADDR_LOW_MASK, pbus_reg), + lower_16_bits(pbus_data)); + if (ret < 0) + return ret; + + return __mdiobus_write(mdiodev->bus, mdiodev->addr, AIR_PBUS_DATA_HIGH, + upper_16_bits(pbus_data)); +} + static int __air_buckpbus_reg_write(struct phy_device *phydev, u32 pbus_address, u32 pbus_data) { @@ -570,10 +611,67 @@ static int an8811hb_load_file(struct phy_device *phydev, const char *name, return ret; } +static int an8811hb_mcu_assert(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + int ret; + + phy_lock_mdio_bus(phydev); + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_RST, + AN8811HB_MCU_SW_RST_HOLD); + if (ret < 0) + goto unlock; + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_START, 0); + if (ret < 0) + goto unlock; + + msleep(50); + phydev_dbg(phydev, "MCU asserted\n"); + +unlock: + phy_unlock_mdio_bus(phydev); + return ret; +} + +static int an8811hb_mcu_deassert(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + int ret; + + phy_lock_mdio_bus(phydev); + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_START, + AN8811HB_MCU_SW_START_EN); + if (ret < 0) + goto unlock; + + ret = __air_pbus_reg_write(priv->pbusdev, AN8811HB_MCU_SW_RST, + AN8811HB_MCU_SW_RST_RUN); + if (ret < 0) + goto unlock; + + msleep(50); + phydev_dbg(phydev, "MCU deasserted\n"); + +unlock: + phy_unlock_mdio_bus(phydev); + return ret; +} + static int an8811hb_load_firmware(struct phy_device *phydev) { int ret; + ret = an8811hb_mcu_assert(phydev); + if (ret < 0) + return ret; + + ret = an8811hb_mcu_deassert(phydev); + if (ret < 0) + return ret; + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, EN8811H_FW_CTRL_1_START); if (ret < 0) @@ -662,6 +760,16 @@ static int en8811h_restart_mcu(struct phy_device *phydev) { int ret; + if (phy_id_compare_model(phydev->phy_id, AN8811HB_PHY_ID)) { + ret = an8811hb_mcu_assert(phydev); + if (ret < 0) + return ret; + + ret = an8811hb_mcu_deassert(phydev); + if (ret < 0) + return ret; + } + ret = air_buckpbus_reg_write(phydev, EN8811H_FW_CTRL_1, EN8811H_FW_CTRL_1_START); if (ret < 0) @@ -1166,6 +1274,7 @@ static int en8811h_leds_setup(struct phy_device *phydev) static int an8811hb_probe(struct phy_device *phydev) { + struct mdio_device *mdiodev; struct en8811h_priv *priv; int ret; @@ -1175,10 +1284,28 @@ static int an8811hb_probe(struct phy_device *phydev) return -ENOMEM; phydev->priv = priv; + /* + * The AN8811HB PHY address is restricted to 8-15 (decimal), + * depending on the board hardware strapping. + * This means the PBUS address is only in the range 16-21 (decimal), + * so we do not need to handle the case + * where the PBUS address exceeds 31 (decimal). + */ + mdiodev = mdio_device_create(phydev->mdio.bus, + phydev->mdio.addr + EN8811H_PBUS_ADDR_OFFS); + if (IS_ERR(mdiodev)) + return PTR_ERR(mdiodev); + + ret = mdio_device_register(mdiodev); + if (ret) + goto err_dev_free; + + priv->pbusdev = mdiodev; + ret = an8811hb_load_firmware(phydev); if (ret < 0) { phydev_err(phydev, "Load firmware failed: %d\n", ret); - return ret; + goto err_dev_create; } en8811h_print_fw_version(phydev); @@ -1191,22 +1318,29 @@ static int an8811hb_probe(struct phy_device *phydev) ret = en8811h_leds_setup(phydev); if (ret < 0) - return ret; + goto err_dev_create; priv->phydev = phydev; /* Co-Clock Output */ ret = an8811hb_clk_provider_setup(&phydev->mdio.dev, &priv->hw); if (ret) - return ret; + goto err_dev_create; /* Configure led gpio pins as output */ ret = air_buckpbus_reg_modify(phydev, AN8811HB_GPIO_OUTPUT, AN8811HB_GPIO_OUTPUT_345, AN8811HB_GPIO_OUTPUT_345); if (ret < 0) - return ret; + goto err_dev_create; return 0; + +err_dev_create: + mdio_device_remove(mdiodev); + +err_dev_free: + mdio_device_free(mdiodev); + return ret; } static int en8811h_probe(struct phy_device *phydev) @@ -1561,6 +1695,16 @@ static int en8811h_suspend(struct phy_device *phydev) return genphy_suspend(phydev); } +static void an8811hb_remove(struct phy_device *phydev) +{ + struct en8811h_priv *priv = phydev->priv; + + if (priv->pbusdev) { + mdio_device_remove(priv->pbusdev); + mdio_device_free(priv->pbusdev); + } +} + static struct phy_driver en8811h_driver[] = { { PHY_ID_MATCH_MODEL(EN8811H_PHY_ID), @@ -1587,6 +1731,7 @@ static struct phy_driver en8811h_driver[] = { PHY_ID_MATCH_MODEL(AN8811HB_PHY_ID), .name = "Airoha AN8811HB", .probe = an8811hb_probe, + .remove = an8811hb_remove, .get_features = en8811h_get_features, .config_init = an8811hb_config_init, .get_rate_matching = en8811h_get_rate_matching, -- cgit v1.2.3 From b4bc94353050b1fa7b702bd4c6600710dd926cff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 May 2026 20:13:35 +0000 Subject: tunnels: load network headers after skb_cow() in iptunnel_pmtud_build_icmp[v6]() Sashiko found that iptunnel_pmtud_build_icmp() and iptunnel_pmtud_build_icmpv6() were caching ip_hdr() and ipv6_hdr() before an skb_cow() call which can reallocate skb->head. Fix this possible UAF by initializing the local variables after the skb_cow() call. Remove skb_reset_network_header() calls which were not needed. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Eric Dumazet Reviewed-by: Stefano Brivio Link: https://patch.msgid.link/20260525201335.2361845-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel_core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2667f53482bd..c77a4c3fbe75 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); */ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; struct icmphdr *icmph; struct iphdr *niph; struct ethhdr eh; @@ -226,7 +226,6 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); - skb_reset_network_header(skb); err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph)); if (err) @@ -236,7 +235,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN); if (err) return err; - + iph = ip_hdr(skb); icmph = skb_push(skb, sizeof(*icmph)); *icmph = (struct icmphdr) { .type = ICMP_DEST_UNREACH, @@ -308,7 +307,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) */ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct ipv6hdr *ip6h; struct icmp6hdr *icmp6h; struct ipv6hdr *nip6h; struct ethhdr eh; @@ -323,7 +322,6 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); - skb_reset_network_header(skb); err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h)); if (err) @@ -334,6 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) if (err) return err; + ip6h = ipv6_hdr(skb); icmp6h = skb_push(skb, sizeof(*icmp6h)); *icmp6h = (struct icmp6hdr) { .icmp6_type = ICMPV6_PKT_TOOBIG, -- cgit v1.2.3 From 7d9ef0cb271555d8cf39fefe6c981e1493b25ecf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 May 2026 20:36:42 +0000 Subject: vxlan: do not reuse cached ip_hdr() value after skb_tunnel_check_pmtu() skb_tunnel_check_pmtu() can change skb->head. Reusing old_iph afer skb_tunnel_check_pmtu() can cause an UAF. Use instead ip_hdr(skb) as done in drivers/net/bareudp.c and drivers/net/geneve.c. Found by Sashiko. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Eric Dumazet Reviewed-by: Stefano Brivio Link: https://patch.msgid.link/20260525203642.2389723-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e88798497503..b5b1253ac08b 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2531,7 +2531,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), vni, md, flags, udp_sum); @@ -2605,7 +2605,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), -- cgit v1.2.3 From 509323077ef79a26ba0c60bb556e45c12c398b2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 May 2026 11:55:12 +0000 Subject: tunnels: do not assume transport header in iptunnel_pmtud_check_icmp() In some cases, iptunnel_pmtud_check_icmp() can be called while skb transport header is not set. This triggers an out-of-bound access, because (typeof(skb->transport_header))~0U is 65535. Access the icmp header based on IPv4 network header, after making sure icmp->type is present in skb linear part. Note that iptunnel_pmtud_check_icmpv6()) is fine. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Reported-by: Damiano Melotti Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260522115512.1519110-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel_core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index c77a4c3fbe75..d3c677e9bff2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -280,7 +280,6 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) */ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) { - const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); if (mtu < 576 || iph->frag_off != htons(IP_DF)) @@ -291,9 +290,17 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr)) return 0; - if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type)) - return 0; + if (iph->protocol == IPPROTO_ICMP) { + const struct icmphdr *icmph; + if (!pskb_network_may_pull(skb, iph->ihl * 4 + + offsetofend(struct icmphdr, type))) + return 0; + iph = ip_hdr(skb); + icmph = (void *)iph + iph->ihl * 4; + if (icmp_is_err(icmph->type)) + return 0; + } return iptunnel_pmtud_build_icmp(skb, mtu); } -- cgit v1.2.3 From 0e60dafe97eca61721f3db456f97d97a80c6c8ae Mon Sep 17 00:00:00 2001 From: Ali Ganiyev Date: Mon, 25 May 2026 10:23:47 +0900 Subject: ksmbd: OOB read regression in smb_check_perm_dacl() ACE-walk loops Commit d07b26f39246 ("ksmbd: require minimum ACE size in smb_check_perm_dacl()") introduced a transposed bounds check: if (offsetof(struct smb_ace, sid) + aces_size < CIFS_SID_BASE_SIZE) Since offsetof(..sid) is 8 and CIFS_SID_BASE_SIZE is 8, this evaluates to `aces_size < 0`. Because `aces_size` is always non-negative, this check becomes dead code and never breaks the loop. Worse, that commit removed the old 4-byte guard, meaning the loop now reads `ace->size` (offset 2) even when `aces_size` is 0-3 bytes. This re-opens a 2-byte heap out-of-bounds (OOB) read past the pntsd allocation during subsequent SMB2_CREATE operations. Fix this by properly transposing the comparison to require at least 16 bytes (8-byte offset + 8-byte SID base), matching the correct form used in smb_inherit_dacl(). Fixes: d07b26f39246 ("ksmbd: require minimum ACE size in smb_check_perm_dacl()") Cc: stable@vger.kernel.org Signed-off-by: Ali Ganiyev Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index c2d9be52a311..664b1b4a3233 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1446,8 +1446,8 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { - if (offsetof(struct smb_ace, sid) + - aces_size < CIFS_SID_BASE_SIZE) + if (aces_size < offsetof(struct smb_ace, sid) + + CIFS_SID_BASE_SIZE) break; ace_size = le16_to_cpu(ace->size); if (ace_size > aces_size || @@ -1467,8 +1467,8 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); aces_size = acl_size - sizeof(struct smb_acl); for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) { - if (offsetof(struct smb_ace, sid) + - aces_size < CIFS_SID_BASE_SIZE) + if (aces_size < offsetof(struct smb_ace, sid) + + CIFS_SID_BASE_SIZE) break; ace_size = le16_to_cpu(ace->size); if (ace_size > aces_size || -- cgit v1.2.3 From 2f15dcd0d4b502c704a52f5c7de128b163677978 Mon Sep 17 00:00:00 2001 From: Aleksandr Golovnya Date: Tue, 26 May 2026 01:50:18 +0700 Subject: ksmbd: release ksmbd_inode ref via ksmbd_inode_put on lookup paths ksmbd_query_inode_status() and ksmbd_lookup_fd_inode() both take a reference on a ksmbd_inode via __ksmbd_inode_lookup() (which performs atomic_inc_not_zero()) and later release it using a bare atomic_dec(&ci->m_count). Unlike ksmbd_inode_put(), a bare atomic_dec() does not check whether the reference count has reached zero, so if the caller happens to drop the last reference, the ksmbd_inode is leaked: it stays in the global inode hash table with m_count == 0, future __ksmbd_inode_lookup() calls reject it via atomic_inc_not_zero(), and ksmbd_inode_free() is never invoked. The race is: T1: __ksmbd_inode_lookup() -> atomic_inc_not_zero(): m_count = 2 T2: ksmbd_inode_put() -> atomic_dec_and_test(): m_count = 1 (not freed) T1: atomic_dec(&ci->m_count) -> m_count = 0 return (LEAK) In ksmbd_lookup_fd_inode() the matched-fp path (which now also uses ksmbd_inode_put()) cannot currently reach m_count == 0 because the matched ksmbd_file holds its own reference on ci, but converting it to the proper API keeps the three call sites consistent and avoids future regressions if the locking changes. Because ksmbd_inode_put() may free the ksmbd_inode if this drops the last reference, the call must happen after up_read(&ci->m_lock) on the two affected paths in ksmbd_lookup_fd_inode(). On the no-match path this is a pure reordering; on the matched path ksmbd_fp_get() is moved above the unlock so that the returned ksmbd_file is pinned before the inode reference is released. Signed-off-by: Aleksandr Golovnya Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 5a232d94f567..4d2d33df6231 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -217,7 +217,7 @@ int ksmbd_query_inode_status(struct dentry *dentry) ret = KSMBD_INODE_STATUS_OK; up_read(&ci->m_lock); - atomic_dec(&ci->m_count); + ksmbd_inode_put(ci); return ret; } @@ -719,14 +719,14 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry) down_read(&ci->m_lock); list_for_each_entry(lfp, &ci->m_fp_list, node) { if (inode == file_inode(lfp->filp)) { - atomic_dec(&ci->m_count); lfp = ksmbd_fp_get(lfp); up_read(&ci->m_lock); + ksmbd_inode_put(ci); return lfp; } } - atomic_dec(&ci->m_count); up_read(&ci->m_lock); + ksmbd_inode_put(ci); return NULL; } -- cgit v1.2.3 From cc57232cae23c0df91b4a59d0f519141ce9b5b02 Mon Sep 17 00:00:00 2001 From: Sean Shen Date: Tue, 26 May 2026 22:07:16 +0900 Subject: ksmbd: fix FSCTL permission bypass by adding a permission check for FSCTL_SET_SPARSE FSCTL_SET_SPARSE in fsctl_set_sparse() modifies the file's sparse attribute and saves it through xattr without any permission checks. This exposes two issues: 1) A client on a read-only share can change the sparse attribute on files it opened, even though the share is read-only. Other FSCTL write operations already check test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE), but FSCTL_SET_SPARSE does not. 2) Even on writable shares, clients without FILE_WRITE_DATA or FILE_WRITE_ATTRIBUTES access should not modify the sparse attribute. Similar handle-level checks exist in other functions but are missing here. Add both share-level writable check and per-handle access check. Use goto out on error to avoid leaking file references. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: Namjae Jeon Cc: Sergey Senozhatsky Cc: Steve French Signed-off-by: Sean Shen Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5128a693aca6..620bcfbbfd92 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8202,9 +8202,20 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, int ret = 0; __le32 old_fattr; + if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { + ksmbd_debug(SMB, "User does not have write permission\n"); + return -EACCES; + } + fp = ksmbd_lookup_fd_fast(work, id); if (!fp) return -ENOENT; + + if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_WRITE_ATTRIBUTES_LE))) { + ret = -EACCES; + goto out; + } + idmap = file_mnt_idmap(fp->filp); old_fattr = fp->f_ci->m_fattr; -- cgit v1.2.3 From dd433671fef381fdaf7b530c631e6b782d66e224 Mon Sep 17 00:00:00 2001 From: Qi Tang Date: Sat, 23 May 2026 22:32:45 +0800 Subject: ipv6: validate extension header length before copying to cmsg ip6_datagram_recv_specific_ctl() builds IPV6_{HOPOPTS,DSTOPTS,RTHDR} cmsgs (and their IPV6_2292* legacy counterparts) by trusting the on-wire hdrlen byte (ptr[1]) when computing the put_cmsg() length. The length was validated only at parse time (ipv6_parse_hopopts(), etc.). An nftables payload-write expression can rewrite hdrlen after parsing and before the skb reaches recvmsg; the write itself is in-bounds but put_cmsg() then reads up to ((hdrlen+1) << 3) = 2040 bytes from an 8-byte header. nftables is reachable from an unprivileged user namespace, so this is an unprivileged slab-out-of-bounds read: BUG: KASAN: slab-out-of-bounds in put_cmsg+0x3ac/0x540 put_cmsg+0x3ac/0x540 udpv6_recvmsg+0xca0/0x1250 sock_recvmsg+0xdf/0x190 ____sys_recvmsg+0x1b1/0x620 Add ipv6_get_exthdr_len() which validates that at least two bytes are accessible before reading the hdrlen field, then checks the computed length against skb_tail_pointer(skb), returning 0 on failure. Extension headers are kept in the linear skb area by pskb_may_pull() during input, so skb_tail_pointer() is the correct bound. Use ipv6_get_exthdr_len() at all non-AH call sites: the five standalone cmsg blocks (HbH, 2292HbH, 2292DSTOPTS x2, 2292RTHDR) and the three standard cases in the extension-header walk loop (DSTOPTS, ROUTING, default). AH retains an inline bounds check because its length formula differs ((ptr[1]+2)<<2). The walk loop also gets a pre-read bounds check at the top to validate ptr before any case accesses ptr[0] or ptr[1]. When the walk loop detects a corrupted header, return from the function instead of continuing to process later socket options. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Qi Tang Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260523143245.2281415-1-tpluszz77@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/datagram.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ca3605acb433..38d7b4845281 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -617,6 +617,18 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, } } +static u16 ipv6_get_exthdr_len(const struct sk_buff *skb, const u8 *ptr) +{ + u16 len; + + if (ptr + 2 > skb_tail_pointer(skb)) + return 0; + + len = (ptr[1] + 1) << 3; + + return (len <= skb_tail_pointer(skb) - ptr) ? len : 0; +} + void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { @@ -643,7 +655,10 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) { u8 *ptr = nh + sizeof(struct ipv6hdr); - put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, len, ptr); } if (opt->lastopt && @@ -664,26 +679,37 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, unsigned int len; u8 *ptr = nh + off; + if (ptr + 2 > skb_tail_pointer(skb)) + return; + switch (nexthdr) { case IPPROTO_DSTOPTS: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; if (np->rxopt.bits.dstopts) put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); break; case IPPROTO_ROUTING: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; if (np->rxopt.bits.srcrt) put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); break; case IPPROTO_AH: nexthdr = ptr[0]; len = (ptr[1] + 2) << 2; + if (ptr + len > skb_tail_pointer(skb)) + return; break; default: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; break; } @@ -705,19 +731,31 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) { u8 *ptr = nh + sizeof(struct ipv6hdr); - put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, len, ptr); } if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = nh + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, len, ptr); } if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + u16 len = ipv6_get_exthdr_len(skb, (u8 *)rthdr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, len, rthdr); } if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = nh + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, len, ptr); } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; -- cgit v1.2.3 From 0a10faad5ca58332ad70f7663ba82611f4daf736 Mon Sep 17 00:00:00 2001 From: Fabian Lippold Date: Tue, 26 May 2026 17:41:01 +0200 Subject: ALSA: hda/realtek: add quirk for HP Dragonfly Folio G3 2-in-1 Add PCI quirk for HP Dragonfly Folio G3 (PCI ID 103c:8a06) to select the CS35L41 SPI4 & GPIO LED fixup variant. Signed-off-by: Fabian Lippold Link: https://patch.msgid.link/20260526154418.1850568-3-fabianlippold1184@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 212adce0f8e6..dcbc669842e0 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7070,6 +7070,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x89da, "HP Spectre x360 14t-ea100", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x89e7, "HP Elite x2 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8a06, "HP Dragonfly Folio G3 2-in-1", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8a0f, "HP Pavilion 14-ec1xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8a1f, "HP Laptop 14s-dr5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8a20, "HP Laptop 15s-fq5xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), -- cgit v1.2.3 From 4db42e5fb9327c27b41f26bb9427ba0b97ecec30 Mon Sep 17 00:00:00 2001 From: Jakub Pisarczyk Date: Tue, 26 May 2026 22:18:30 +0200 Subject: ALSA: hda/cs420x: Add CS4208 fixup for iMac16,1 The 21.5" Retina 4K iMac (Late 2015, DMI product name "iMac16,1") ships with a Cirrus Logic CS4208 codec wired to an external speaker amplifier enabled through codec GPIO0 -- the same arrangement as the late-2013 MacBookPro 11,x. Without a matching entry in cs4208_mac_fixup_tbl[] the fixup picker logs: snd_hda_codec_cs420x hdaudioC1D0: CS4208: picked fixup for codec SSID 106b:0000 i.e. an empty fixup name, GPIO0 stays low, the external amp is never powered up, and the internal speakers are silent on a stock kernel. The codec SSID reported by hardware is 0x106b:0x7f00. Reusing CS4208_MBP11 (GPIO0 + SPDIF switch fixup) makes the internal speakers and S/PDIF output work out of the box, removing the need for users to set `options snd_hda_intel model=mbp11` via /etc/modprobe.d/. Tested on iMac16,1 (kernel 6.17.0): four internal drivers (Left tweeter, Left woofer, Right tweeter, Right woofer, exposed as the 4 channels of the analog-surround-40 ALSA profile) produce audio after the fixup is applied. Signed-off-by: Jakub Pisarczyk Link: https://patch.msgid.link/20260526201830.34097-1-pisarz77@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/cirrus/cs420x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/cirrus/cs420x.c b/sound/hda/codecs/cirrus/cs420x.c index 42559edbba05..85c2ecf46d38 100644 --- a/sound/hda/codecs/cirrus/cs420x.c +++ b/sound/hda/codecs/cirrus/cs420x.c @@ -582,6 +582,7 @@ static const struct hda_quirk cs4208_mac_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6), SND_PCI_QUIRK(0x106b, 0x7800, "MacPro 6,1", CS4208_MACMINI), SND_PCI_QUIRK(0x106b, 0x7b00, "MacBookPro 12,1", CS4208_MBP11), + SND_PCI_QUIRK(0x106b, 0x7f00, "iMac 16,1", CS4208_MBP11), {} /* terminator */ }; -- cgit v1.2.3 From 14912d497188283f5a0aa5daaa161e52f79c7f34 Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Wed, 27 May 2026 03:33:08 +0000 Subject: ALSA: usb-audio: Add iface reset and delay quirk for TAE1160 USB Audio Setting up the interface when suspended/resumeing fail on this card. Adding a reset and delay quirk will eliminate this problem. usb 1-1: new full-speed USB device number 2 using xhci-hcd usb 1-1: New USB device found, idVendor=25aa, idProduct=600b usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-1: Product: TAE1159 usb 1-1: Manufacturer: Generic usb 1-1: SerialNumber: 20210726905926 Signed-off-by: Lianqin Hu Link: https://patch.msgid.link/TYUPR06MB621736D7C85D43200E54E740D2082@TYUPR06MB6217.apcprd06.prod.outlook.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 31cbe383ae65..3d1b3523b020 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2449,6 +2449,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */ QUIRK_FLAG_SET_IFACE_FIRST), + DEVICE_FLG(0x25aa, 0x600b, /* TAE1159 */ + QUIRK_FLAG_FORCE_IFACE_RESET | QUIRK_FLAG_IFACE_DELAY), DEVICE_FLG(0x262a, 0x9302, /* ddHiFi TC44C */ QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */ -- cgit v1.2.3 From 1750ad1388e03fb27068cd1f22c9c8b4590fe936 Mon Sep 17 00:00:00 2001 From: Qiang Ma Date: Tue, 26 May 2026 15:46:40 +0800 Subject: KVM: arm64: PMU: Preserve AArch32 counter low bits AArch32 writes to PMU event counters cannot update the top 32 bits, even when PMUv3p5 makes the counters 64-bit. KVM therefore needs to preserve the existing high half and only update the low half written by the guest, unless the caller explicitly forces a full reset through PMCR.P. The current code masks @val down to the old high half before taking lower_32_bits(val), which means the low half is always zero. As a result, AArch32 writes to event counters discard the guest-provided low 32 bits instead of storing them. Build the new value from the old high 32 bits and the low 32 bits of the value supplied by the guest. Fixes: 26d2d0594d70 ("KVM: arm64: PMU: Do not let AArch32 change the counters' top 32 bits") Signed-off-by: Qiang Ma Signed-off-by: Marc Zyngier Link: https://patch.msgid.link/20260526074640.791991-1-maqianga@uniontech.com Cc: stable@vger.kernel.org --- arch/arm64/kvm/pmu-emul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index e1860acae641..c816db5d6761 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -174,8 +174,8 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) * action is to use PMCR.P, which will reset them to * 0 (the only use of the 'force' parameter). */ - val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); - val |= lower_32_bits(val); + val = (__vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32)) | + lower_32_bits(val); } __vcpu_assign_sys_reg(vcpu, reg, val); -- cgit v1.2.3 From f63ad68e18d774a5d15cd7e405ead63f6b322679 Mon Sep 17 00:00:00 2001 From: Cássio Gabriel Date: Wed, 27 May 2026 09:24:00 -0300 Subject: ASoC: codecs: simple-mux: Fix enum control bounds check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit simple_mux_control_put() rejects values greater than e->items, but enum control values are zero based. For the two-entry mux used by this driver, valid values are 0 and 1, so value 2 must be rejected as well. Accepting e->items can store an invalid mux state, pass it to the GPIO setter, and pass it on to the DAPM mux update path where it is used as an index into the enum text array. Use the same >= e->items check used by the ASoC enum helpers. Fixes: 342fbb7578d1 ("ASoC: add simple-mux") Signed-off-by: Cássio Gabriel Link: https://patch.msgid.link/20260527-asoc-simple-mux-enum-bounds-v1-1-3f805b9fc671@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/simple-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/simple-mux.c b/sound/soc/codecs/simple-mux.c index 069555f35f73..c2f906a3f074 100644 --- a/sound/soc/codecs/simple-mux.c +++ b/sound/soc/codecs/simple-mux.c @@ -51,7 +51,7 @@ static int simple_mux_control_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *c = snd_soc_dapm_to_component(dapm); struct simple_mux *priv = snd_soc_component_get_drvdata(c); - if (ucontrol->value.enumerated.item[0] > e->items) + if (ucontrol->value.enumerated.item[0] >= e->items) return -EINVAL; if (priv->mux == ucontrol->value.enumerated.item[0]) -- cgit v1.2.3 From 0f7abb6eaa3c3965f925e231c18409dac4f5a0c1 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Thu, 21 May 2026 13:46:11 +0100 Subject: KVM: arm64: Fix meta-page unsharing in pKVM hyp tracing As the hyp_trace_buffer_unshare_hyp() function name suggests we should unshare all the previously shared pages, otherwise we leak hyp-shared pages which won't be reusable for hyp memory. Fix the typo by calling __unshare_page() on the meta-page, ensuring all previously shared pages are correctly unshared. Fixes: 3aed038aac8d ("KVM: arm64: Add trace remote for the nVHE/pKVM hyp") Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20260521124613.911067-2-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c index 8b7f2bf2fba8..06805b426101 100644 --- a/arch/arm64/kvm/hyp_trace.c +++ b/arch/arm64/kvm/hyp_trace.c @@ -189,7 +189,7 @@ static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, if (cpu > last_cpu) break; - __share_page(rb_desc->meta_va); + __unshare_page(rb_desc->meta_va); for (p = 0; p < rb_desc->nr_page_va; p++) __unshare_page(rb_desc->page_va[p]); } -- cgit v1.2.3 From a23780ea9db3f3cadbb52ff6151384bff89d95d2 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Thu, 21 May 2026 13:46:12 +0100 Subject: KVM: arm64: Fix rollback in hyp_trace_buffer_share_hyp() When sharing the trace buffer with the hypervisor, if sharing a page fails, the rollback path in hyp_trace_buffer_share_hyp() misses unsharing the metadata page (meta_va) which was successfully shared before entering the page sharing loop. Additionally, if a failure occurs, the cleanup calls hyp_trace_buffer_unshare_hyp() with an incorrect CPU index. Since that CPU's pages were already rolled back locally in the loop, this leads to duplicate unsharing attempts. Fix both issues affecting the rollback. Fixes: 3aed038aac8d ("KVM: arm64: Add trace remote for the nVHE/pKVM hyp") Reported-by: Sashiko Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20260521124613.911067-3-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp_trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c index 06805b426101..8595f9bdb3dc 100644 --- a/arch/arm64/kvm/hyp_trace.c +++ b/arch/arm64/kvm/hyp_trace.c @@ -212,14 +212,15 @@ static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer) } if (ret) { - for (p--; p >= 0; p--) + while (--p >= 0) __unshare_page(rb_desc->page_va[p]); + __unshare_page(rb_desc->meta_va); break; } } if (ret) - hyp_trace_buffer_unshare_hyp(trace_buffer, cpu--); + hyp_trace_buffer_unshare_hyp(trace_buffer, --cpu); return ret; } -- cgit v1.2.3 From adae9996c04fea3b1791099b6d79e1df76d50849 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Thu, 21 May 2026 13:46:13 +0100 Subject: KVM: arm64: Fix memory leak in hyp_trace_unload() During trace remote loading, hyp_trace_load() allocates the descriptor pages but fails to store the allocated size in trace_buffer->desc_size. As a result, when unloading the trace buffer, hyp_trace_unload() calls free_pages_exact() with a size of 0 which fails to free the memory. Fix this by updating the descriptor size in trace_buffer->desc_size. Fixes: 3aed038aac8d ("KVM: arm64: Add trace remote for the nVHE/pKVM hyp") Reported-by: Sashiko Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20260521124613.911067-4-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp_trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c index 8595f9bdb3dc..c4b3ee552131 100644 --- a/arch/arm64/kvm/hyp_trace.c +++ b/arch/arm64/kvm/hyp_trace.c @@ -249,6 +249,7 @@ static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv) goto err_free_desc; trace_buffer->desc = desc; + trace_buffer->desc_size = desc_size; ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size); if (ret) @@ -298,6 +299,7 @@ static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv) hyp_trace_buffer_free_bpages_backing(trace_buffer); free_pages_exact(trace_buffer->desc, trace_buffer->desc_size); trace_buffer->desc = NULL; + trace_buffer->desc_size = 0; } static int hyp_trace_enable_tracing(bool enable, void *priv) -- cgit v1.2.3 From f657a6a3ba4c20bc01f5be3752d53498ee1bfe35 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Fri, 22 May 2026 22:05:32 +0530 Subject: drm/xe: Restore IDLEDLY regiter on engine reset Wa_16023105232 programs the register IDLEDLY. The register is reset whenever the engine is reset. Therefore it should be added to the GuC save-restore register list for it to be restored after reset. Fixes: 7c53ff050ba8 ("drm/xe: Apply Wa_16023105232") Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260522163531.1365540-2-balasubramani.vivekanandan@intel.com Signed-off-by: Balasubramani Vivekanandan (cherry picked from commit df1cfe24743a93b71eab27687e148ab8ae9b69e3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ads.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 2b835d48b565..5760251cb685 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -767,6 +767,11 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, } } + if (XE_GT_WA(hwe->gt, 16023105232)) + guc_mmio_regset_write_one(ads, regset_map, + RING_IDLEDLY(hwe->mmio_base), + count++); + return count; } -- cgit v1.2.3 From 2e7f55eb408c3f72ee1957a0d0ad11d8648a6379 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 17 May 2026 09:17:42 -0400 Subject: drm/amdgpu: fix lock leak on ENOMEM in AMDGPU_GEM_OP_GET_MAPPING_INFO The AMDGPU_GEM_OP_GET_MAPPING_INFO branch of amdgpu_gem_op_ioctl() holds three cleanup-tracked resources before calling kvcalloc(): the drm_gem_object reference from drm_gem_object_lookup(), the drm_exec lock on the looked-up GEM via drm_exec_lock_obj(), and the drm_exec lock on the per-process VM root page directory via amdgpu_vm_lock_pd(). All three are released by the out_exec label that every other error path in this function jumps to. The kvcalloc() failure path returns -ENOMEM directly, skipping out_exec and leaking all three. The leaked per-process VM root PD dma_resv lock is the load-bearing leak: any subsequent operation on the same VM (further GEM ops, command-submission, eviction, TTM shrinker callbacks) blocks on the held lock. DRM_IOCTL_AMDGPU_GEM_OP is DRM_AUTH | DRM_RENDER_ALLOW, so this is an unprivileged-local denial of service against the caller's GPU context, reachable by any process with /dev/dri/renderD* access. Route the failure through out_exec so drm_exec_fini() and drm_gem_object_put() run. Reproduced on stock 7.0.0-10, Ryzen 7 5700U / Radeon Vega (Lucienne): the failing ioctl returns -ENOMEM and a second GET_MAPPING_INFO on the same fd then blocks in drm_exec_lock_obj() on the leaked dma_resv. SIGKILL on the caller does not reap the task; the fd-release path during process exit goes through amdgpu_gem_object_close() -> drm_exec_prepare_obj() on the same lock, leaving the task in D state until the box is rebooted. The patched kernel was not rebuilt and re-tested on this hardware; the fix is mechanical. Tested on a single Lucienne / Vega box only. Ziyi Guo posted an independent INT_MAX-bound check for args->num_entries in the same branch [1]; the two patches are complementary and can land in either order. Fixes: 4d82724f7f2b ("drm/amdgpu: Add mapping info option for GEM_OP ioctl") Link: https://lore.kernel.org/all/20260208000255.4073363-1-n7l8m4@u.northwestern.edu/ # [1] Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Alex Deucher (cherry picked from commit b69d3256d79de15f54c322986ff4da68f1d65b0a) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 123d4a09114d..06dd2e8a5b47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -1094,8 +1094,10 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, * be retried. */ vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL); - if (!vm_entries) - return -ENOMEM; + if (!vm_entries) { + r = -ENOMEM; + goto out_exec; + } amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) { if (num_mappings < args->num_entries) { -- cgit v1.2.3 From a1ba4594232c87c3b8defd6f89a2e40f8b08395d Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Sun, 8 Feb 2026 00:02:55 +0000 Subject: drm/amdgpu: check num_entries in GEM_OP GET_MAPPING_INFO kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL) at amdgpu_gem.c:1050 uses the user-supplied num_entries directly without any upper bounds check. Since num_entries is a __u32 and sizeof(drm_amdgpu_gem_vm_entry) is 32 bytes, a large num_entries produces an allocation exceeding INT_MAX, triggering WARNING in __kvmalloc_node_noprof(), causing a kernel WARNING, TAINT_WARN, and panic on CONFIG_PANIC_ON_WARN=y systems. Add a size bounds check before we invoke the kvzalloc() to reject oversized num_entries early with -EINVAL. Fixes: 4d82724f7f2b ("drm/amdgpu: Add mapping info option for GEM_OP ioctl") Signed-off-by: Ziyi Guo Signed-off-by: Alex Deucher (cherry picked from commit 1fe7bf5457f6efd7be60b17e23163ba54341d73d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 06dd2e8a5b47..fe6d988e7f24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -1093,6 +1093,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, * If that number is larger than the size of the array, the ioctl must * be retried. */ + if (args->num_entries > INT_MAX / sizeof(*vm_entries)) { + r = -EINVAL; + goto out_exec; + } + vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL); if (!vm_entries) { r = -ENOMEM; -- cgit v1.2.3 From 4a03d23ce6ad474cb15862563bc9132e16e3e31e Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 19 May 2026 15:02:00 +0530 Subject: drm/amdgpu/userq: Fix doorbell object cleanup of queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unpin and unref the door bell obj if queue creation fails before initialization is complete. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 8c7506f7ba945f21e5abe7f8eac0a3acca6b5330) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index f070ea37d918..2301a44a03b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -787,7 +787,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = uq_funcs->mqd_create(queue, &args->in); if (r) { drm_file_err(uq_mgr->file, "Failed to create Queue\n"); - goto clean_mapping; + goto clean_doorbell_bo; } /* Update VM owner at userq submit-time for page-fault attribution. */ @@ -808,7 +808,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); mutex_unlock(&uq_mgr->userq_mutex); - goto clean_doorbell; + goto erase_doorbell; } } @@ -831,10 +831,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) args->out.queue_id = qid; return 0; -clean_doorbell: +erase_doorbell: xa_erase_irq(&adev->userq_doorbell_xa, index); clean_mqd: uq_funcs->mqd_destroy(queue); +clean_doorbell_bo: + amdgpu_bo_reserve(queue->db_obj.obj, true); + amdgpu_bo_unpin(queue->db_obj.obj); + amdgpu_bo_unreserve(queue->db_obj.obj); + amdgpu_bo_unref(&queue->db_obj.obj); clean_mapping: amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); -- cgit v1.2.3 From ba4c0ff47ee098c8e17d25f9dc050e6276bf9979 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 19 May 2026 15:12:42 +0530 Subject: drm/amdgpu/userq: Fix the mutex_init cleanup for fence_drv_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mutex fence_drv_lock is destroyed in amdgpu_userq_fence_driver_free also in one of the jump condition mutex_destroy is also called leading to double mutex_destroy. So rearranging the code so amdgpu_userq_fence_driver_free takes care of the clean up along with mutex_destroy. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 384dbef269d101e5b671fc7b942c56734cd1d186) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 2301a44a03b1..d6390cc5a798 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -748,12 +748,12 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); - mutex_init(&queue->fence_drv_lock); - xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) goto free_queue; + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + mutex_init(&queue->fence_drv_lock); /* Make sure the queue can actually run with those virtual addresses. */ r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); if (r) @@ -844,7 +844,6 @@ clean_mapping: amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(fpriv->vm.root.bo); - mutex_destroy(&queue->fence_drv_lock); free_fence_drv: amdgpu_userq_fence_driver_free(queue); free_queue: -- cgit v1.2.3 From ec78a85d95e9c37b6ca16d6ed1639fa64d5dd6dc Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Thu, 14 May 2026 11:53:50 -0400 Subject: drm/amd/display: Write REFCLK to 48MHz on DCN21 [Why&How] dccg21_init() calls dccg2_init() which hardcodes 100MHz refclk values for MICROSECOND_TIME_BASE_DIV and MILLISECOND_TIME_BASE_DIV. DCN21 uses 48MHz refclk, so the wrong values corrupt DCCG timing and cause eDP link training failure on cold boot. Write the correct 48MHz values directly instead of calling dccg2_init(). v2: Fixed typo Fixes: e6e2b956fc81 ("drm/amd/display: Add missing DCCG register entries for DCN20-DCN316") Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/5272 Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/5311 Reported-by: Max Chernoff Tested-by: Max Chernoff Signed-off-by: Ivan Lipski Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 08236c3ef284cd2d110e5e3d51fc9615e551f9dc) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c index c4d4eea140f3..1f23dfccf07a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c @@ -105,15 +105,26 @@ static void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppcl * dccg2_init() unconditionally overwrites MICROSECOND_TIME_BASE_DIV to * 0x00120264, destroying the marker before it can be read. * - * Guard the call: if the S0i3 marker is present, skip dccg2_init() so the + * Guard the call: if the S0i3 marker is present, skip init so the * WA can function correctly. bios_golden_init() will handle init in that case. + * + * DCN21 uses 48MHz refclk, not 100MHz, so we must explicitly set the correct + * values (48MHz is taken from rn_clk_mgr_construct()). */ static void dccg21_init(struct dccg *dccg) { + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + if (dccg2_is_s0i3_golden_init_wa_done(dccg)) return; - dccg2_init(dccg); + /* 48MHz refclk from rn_clk_mgr_construct() */ + REG_WRITE(MICROSECOND_TIME_BASE_DIV, 0x00120230); + REG_WRITE(MILLISECOND_TIME_BASE_DIV, 0x0010bb80); + REG_WRITE(DISPCLK_FREQ_CHANGE_CNTL, 0x0e01003c); + + if (REG(REFCLK_CNTL)) + REG_WRITE(REFCLK_CNTL, 0); } static const struct dccg_funcs dccg21_funcs = { -- cgit v1.2.3 From e984d61d92e702096058f0f828f4b2b8563b88ce Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 7 May 2026 15:51:49 -0400 Subject: drm/amdkfd: fix NULL pointer bug in svm_range_set_attr The process_info could be NULL if user doesn't call kfd_ioctl_acquire_vm before calling kfd_ioctl_svm. Signed-off-by: Eric Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 83a26c812e0529eb040d31a76f73e33e637243d4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 35ec67d9739b..3841943da5ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3732,6 +3732,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, svms = &p->svms; + if (!process_info) + return -EINVAL; + mutex_lock(&process_info->lock); svm_range_list_lock_and_flush_work(svms, mm); -- cgit v1.2.3 From d8d9c820405eb1fcbde959de8898ad7d716a2d7b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 18 May 2026 17:42:15 +0530 Subject: drm/amdgpu: simplify return value in amdgpu_userq_get_doorbell_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_userq_get_doorbell_index returns a uint64 type index as well as a int type failure values. Simplifying this and using a int type return value and getting the index in input pointer of type uint64 type. Also since it's used at once place making it static would be better. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit e947ec9d0529d5f93dbdb33cd197347f6a7b2922) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 21 +++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 4 ---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index d6390cc5a798..34c0d9ee94f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -536,12 +536,13 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, amdgpu_bo_unref(&userq_obj->obj); } -uint64_t +static int amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_db_info *db_info, - struct drm_file *filp) + struct drm_file *filp, + u64 *index) { - uint64_t index; + u64 doorbell_index; struct drm_gem_object *gobj; struct amdgpu_userq_obj *db_obj = db_info->db_obj; int r, db_size; @@ -588,12 +589,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } - index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, - db_info->doorbell_offset, db_size); + doorbell_index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, + db_info->doorbell_offset, db_size); drm_dbg_driver(adev_to_drm(uq_mgr->adev), - "[Usermode queues] doorbell index=%lld\n", index); + "[Usermode queues] doorbell index=%lld\n", doorbell_index); amdgpu_bo_unreserve(db_obj->obj); - return index; + *index = doorbell_index; + return 0; unpin_bo: amdgpu_bo_unpin(db_obj->obj); @@ -776,10 +778,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) db_info.doorbell_handle = queue->doorbell_handle; db_info.db_obj = &queue->db_obj; db_info.doorbell_offset = args->in.doorbell_offset; - index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); - if (index == (uint64_t)-EINVAL) { + r = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp, &index); + if (r) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); - r = -EINVAL; goto clean_mapping; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 49b33e2d6932..033b8a0de6b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -163,10 +163,6 @@ void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr); void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); -uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_db_info *db_info, - struct drm_file *filp); - u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev); bool amdgpu_userq_enabled(struct drm_device *dev); -- cgit v1.2.3 From cedee93d43f893ce67e39b57c67240965c7c5a69 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 18 May 2026 18:33:00 +0530 Subject: drm/amdgpu/userq: add amdgpu_bo_unpin when amdgpu_ttm_alloc_gart fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unpin the wptr_obj->obj when amdgpu_ttm_alloc_gart fails. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit d8145c437ccdc2d91c579787290f82788172bea0) --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 5b4121ddc78c..026940fad524 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -81,7 +81,7 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev, ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); if (ret) { DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto fail_map; + goto fail_alloc_gart; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); @@ -89,6 +89,8 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev, drm_exec_fini(&exec); return 0; +fail_alloc_gart: + amdgpu_bo_unpin(wptr_obj->obj); fail_map: amdgpu_bo_unref(&wptr_obj->obj); fail_lock: -- cgit v1.2.3 From a00caed2302c604c19a5cab781e34d7ba4fa7558 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 18 May 2026 18:55:25 +0530 Subject: drm/amdgpu/userq: reserve root bo without interruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the code to make it an uninterruptible reservation for root bo. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit d409ab4e387d94b2e593d558b54b7bfd315e0e75) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 34c0d9ee94f2..5da107dffcce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -620,11 +620,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que /* Cancel any pending hang detection work and cleanup */ cancel_delayed_work_sync(&queue->hang_detect_work); - r = amdgpu_bo_reserve(vm->root.bo, false); - if (r) { - drm_file_err(uq_mgr->file, "Failed to reserve root bo during userqueue destroy\n"); - return r; - } + amdgpu_bo_reserve(vm->root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(vm->root.bo); -- cgit v1.2.3 From cf4aafdccefccc7f8236fed028d06725246e289e Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Mon, 18 May 2026 19:58:08 +0530 Subject: drm/amdgpu/userq: make sure queue is valid in the hang_detect_work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thread 1: Running amdgpu_userq_destroy which eventually remove the queue from door bell and set userq_mgr = NULL. Thread2: An interrupt might have scheduled the hang_detect_work which still need userq_mgr to be valid but could get an NULL ptrs. To fix that make sure we cancel the hang_detect_work again before setting userq_mgr to NULL. Along with that we also need all the queue va to remain valid till we could be running anything on the queue and hence moving the userq_va post hang_detect handler is cancelled. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 1a66ceb98b137d18d303b9889f0e7d8c4db73943) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 5da107dffcce..8d7dad1d30eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -427,8 +427,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); amdgpu_userq_fence_driver_free(queue); queue->fence_drv = NULL; - queue->userq_mgr = NULL; - list_del(&queue->userq_va_list); up_read(&adev->reset_domain->sem); } @@ -619,11 +617,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que /* Cancel any pending hang detection work and cleanup */ cancel_delayed_work_sync(&queue->hang_detect_work); - - amdgpu_bo_reserve(vm->root.bo, true); - amdgpu_userq_buffer_vas_list_cleanup(adev, queue); - amdgpu_bo_unreserve(vm->root.bo); - mutex_lock(&uq_mgr->userq_mutex); amdgpu_userq_wait_for_last_fence(queue); @@ -635,6 +628,13 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); + cancel_delayed_work_sync(&queue->hang_detect_work); + amdgpu_bo_reserve(vm->root.bo, true); + amdgpu_userq_buffer_vas_list_cleanup(adev, queue); + amdgpu_bo_unreserve(vm->root.bo); + list_del(&queue->userq_va_list); + queue->userq_mgr = NULL; + amdgpu_bo_reserve(queue->db_obj.obj, true); amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unreserve(queue->db_obj.obj); -- cgit v1.2.3 From 0fb4a8e64a9db74eeda8da7d0b78985392ae483b Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 11 May 2026 16:49:19 +0800 Subject: drm/amdgpu: fix potential overflow in fs_info.debugfs_name Use snprintf() with sizeof(fs_info.debugfs_name) so a long RAS block name plus the "_err_inject" suffix cannot overflow the 32-byte buffer. Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher (cherry picked from commit 1a58070fda26857a8f6acc0ab05428e60d5c6844) --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6c644cfe6695..fc9f3adf9912 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2280,7 +2280,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) list_for_each_entry(obj, &con->head, node) { if (amdgpu_ras_is_supported(adev, obj->head.block) && (obj->attr_inuse == 1)) { - sprintf(fs_info.debugfs_name, "%s_err_inject", + snprintf(fs_info.debugfs_name, sizeof(fs_info.debugfs_name), + "%s_err_inject", get_ras_block_str(&obj->head)); fs_info.head = obj->head; amdgpu_ras_debugfs_create(adev, &fs_info, dir); -- cgit v1.2.3 From 6842b6a4b72da9b2906ffc5ca9d846ace2c54c14 Mon Sep 17 00:00:00 2001 From: David Francis Date: Thu, 14 May 2026 10:31:20 -0400 Subject: drm/amdkfd: Check for pdd drm file first in CRIU restore path CRIU restore ioctls are meant to be called by CRIU with no existing drm file. There's an error path for if the drm file unexpectedly exists. It was positioned so it was missing a fput(drm_file). Do that check earlier, as soon as we have the pdd. Signed-off-by: David Francis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 2bab781dac78916c5cc8de76345a4102449267d7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 03b266b26738..8785f7810157 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2300,6 +2300,11 @@ static int criu_restore_devices(struct kfd_process *p, ret = -EINVAL; goto exit; } + + if (pdd->drm_file) { + ret = -EINVAL; + goto exit; + } pdd->user_gpu_id = device_buckets[i].user_gpu_id; drm_file = fget(device_buckets[i].drm_fd); @@ -2310,11 +2315,6 @@ static int criu_restore_devices(struct kfd_process *p, goto exit; } - if (pdd->drm_file) { - ret = -EINVAL; - goto exit; - } - /* create the vm using render nodes for kfd pdd */ if (kfd_process_device_init_vm(pdd, drm_file)) { pr_err("could not init vm for given pdd\n"); -- cgit v1.2.3 From dd4f3ee535b3b0ac027f75dbf9dc5fc88733c765 Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Tue, 19 May 2026 10:41:54 +0200 Subject: drm/amd/pm/si: Disregard vblank time when no displays are connected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When no displays are connected, there is no vblank happening so the power management code shouldn't worry about it. This fixes a regression that caused the memory clock to be stuck at maximum when there were no displays connected to a SI GPU. Fixes: 9003a0746864 ("drm/amd/pm: Treat zero vblank time as too short in si_dpm (v3)") Fixes: 9d73b107a61b ("drm/amd/pm: Use pm_display_cfg in legacy DPM (v2)") Reviewed-by: Alex Deucher Tested-by: Jeremy Klarenbeek Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher (cherry picked from commit 6d87e0199f7b83735b56e422d59f170a201897a8) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 36942467d4ad..c3aff5d0c53d 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3076,6 +3076,10 @@ static bool si_dpm_vblank_too_short(void *handle) /* we never hit the non-gddr5 limit so disable it */ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; + /* Disregard vblank time when there are no displays connected */ + if (!adev->pm.pm_display_cfg.num_display) + return false; + /* Consider zero vblank time too short and disable MCLK switching. * Note that the vblank time is set to maximum when no displays are attached, * so we'll still enable MCLK switching in that case. -- cgit v1.2.3 From ca8e7a119a2e4045324cffb8f9f58bedcc3dc928 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 20 May 2026 16:13:09 +0530 Subject: drm/amdgpu/userq: remove amdgpu_userq_create/destroy_object wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the amdgpu_userq_create/destroy_object wrappers and use directly the kernel bo allocation function which does all the things which are done in wrapper. Signed-off-by: Sunil Khatri Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit deb02080ca5d3f015cf71e56067a39ef2f141998) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 67 ------------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 7 ---- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 29 +++++++++---- 3 files changed, 21 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 8d7dad1d30eb..a93f5c238e55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -465,74 +465,7 @@ retry: dma_fence_put(ev_fence); } -int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj, - int size) -{ - struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_bo_param bp; - int r; - - memset(&bp, 0, sizeof(bp)); - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - bp.type = ttm_bo_type_kernel; - bp.size = size; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); - if (r) { - drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r); - return r; - } - r = amdgpu_bo_reserve(userq_obj->obj, true); - if (r) { - drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r); - goto free_obj; - } - - r = amdgpu_bo_pin(userq_obj->obj, AMDGPU_GEM_DOMAIN_GTT); - if (r) - goto unresv; - - r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); - if (r) { - drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); - goto unpin_bo; - } - - r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); - if (r) { - drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); - goto unpin_bo; - } - - userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); - amdgpu_bo_unreserve(userq_obj->obj); - memset(userq_obj->cpu_ptr, 0, size); - return 0; - -unpin_bo: - amdgpu_bo_unpin(userq_obj->obj); -unresv: - amdgpu_bo_unreserve(userq_obj->obj); -free_obj: - amdgpu_bo_unref(&userq_obj->obj); - - return r; -} - -void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj) -{ - amdgpu_bo_kunmap(userq_obj->obj); - amdgpu_bo_unpin(userq_obj->obj); - amdgpu_bo_unref(&userq_obj->obj); -} static int amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 033b8a0de6b1..76ef5cfab52e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -151,13 +151,6 @@ void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev); void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); -int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj, - int size); - -void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj); - void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr); void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 026940fad524..71251370c8b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -192,12 +192,16 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, * for the same. */ size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; - r = amdgpu_userq_create_object(uq_mgr, ctx, size); + r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0, + AMDGPU_GEM_DOMAIN_GTT, + &ctx->obj, &ctx->gpu_addr, + &ctx->cpu_ptr); if (r) { DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); return r; } + memset(ctx->cpu_ptr, 0, size); return 0; } @@ -270,13 +274,19 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, return -ENOMEM; } - r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, - AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size)); + r = amdgpu_bo_create_kernel(adev, + AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size), + 0, AMDGPU_GEM_DOMAIN_GTT, + &queue->mqd.obj, &queue->mqd.gpu_addr, + &queue->mqd.cpu_ptr); if (r) { DRM_ERROR("Failed to create MQD object for userqueue\n"); goto free_props; } + memset(queue->mqd.cpu_ptr, 0, + AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size)); + /* Initialize the MQD BO with user given values */ userq_props->wptr_gpu_addr = mqd_user->wptr_va; userq_props->rptr_gpu_addr = mqd_user->rptr_va; @@ -432,10 +442,12 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, return 0; free_ctx: - amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + amdgpu_bo_free_kernel(&queue->fw_obj.obj, &queue->fw_obj.gpu_addr, + &queue->fw_obj.cpu_ptr); free_mqd: - amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); + amdgpu_bo_free_kernel(&queue->mqd.obj, &queue->mqd.gpu_addr, + &queue->mqd.cpu_ptr); free_props: kfree(userq_props); @@ -445,11 +457,12 @@ free_props: static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue) { - struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; - amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + amdgpu_bo_free_kernel(&queue->fw_obj.obj, &queue->fw_obj.gpu_addr, + &queue->fw_obj.cpu_ptr); kfree(queue->userq_prop); - amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); + amdgpu_bo_free_kernel(&queue->mqd.obj, &queue->mqd.gpu_addr, + &queue->mqd.cpu_ptr); } static int mes_userq_preempt(struct amdgpu_usermode_queue *queue) -- cgit v1.2.3 From 93f5534b35a05ef8a0109c1eefa800062fee810a Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 12 May 2026 10:19:52 -0400 Subject: drm/amdkfd: fix a vulnerability of integer overflow in kfd debugger get_queue_ids() computes array_size = num_queues * sizeof(uint32_t), which could overflow on 32-bit size_t build. using array_size() instead, it saturates to SIZE_MAX on overflow. Signed-off-by: Eric Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 2d57a0475f085c08b49312dfd8edcb461845f285) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e0a31e11f0ff..0d7296c739ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -3308,12 +3308,14 @@ static void copy_context_work_handler(struct work_struct *work) static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) { - size_t array_size = num_queues * sizeof(uint32_t); - if (!usr_queue_id_array) return NULL; - return memdup_user(usr_queue_id_array, array_size); + if (num_queues > KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + return ERR_PTR(-EINVAL); + + return memdup_user(usr_queue_id_array, + array_size(num_queues, sizeof(uint32_t))); } int resume_queues(struct kfd_process *p, -- cgit v1.2.3 From 0b8a1600ab50f331aeeba47c777a1b34cba606bf Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 20 May 2026 16:25:50 +0530 Subject: drm/amdgpu/userq: move mqd_destroy to later stage to keep core obj valid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mqd_destroy cleans up queue core objects like mqd and fw_object which are needed for any pending fence to signal properly. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 4ad65d610096498c8e265615aba42b3c47441bb5) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index a93f5c238e55..28a1849e7dcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -417,12 +417,10 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; - const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; /* Wait for mode-1 reset to complete */ down_read(&adev->reset_domain->sem); - uq_funcs->mqd_destroy(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); amdgpu_userq_fence_driver_free(queue); @@ -541,15 +539,15 @@ static int amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); - struct amdgpu_vm *vm = &fpriv->vm; - + const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + struct amdgpu_vm *vm = queue->vm; int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); /* Cancel any pending hang detection work and cleanup */ cancel_delayed_work_sync(&queue->hang_detect_work); + mutex_lock(&uq_mgr->userq_mutex); amdgpu_userq_wait_for_last_fence(queue); @@ -566,6 +564,7 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(vm->root.bo); list_del(&queue->userq_va_list); + uq_funcs->mqd_destroy(queue); queue->userq_mgr = NULL; amdgpu_bo_reserve(queue->db_obj.obj, true); -- cgit v1.2.3 From 181307acf8ea597ad63fd574b44d0f98a329a61b Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 20 May 2026 16:39:49 +0530 Subject: drm/amdgpu/userq: use array instead of list for userq_vas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use arrays instead of list for userq_vas since we have fixed no of bos. Also, we dont have to worry to free that memory later since this array would be free along with queue only. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit ef7dc711a664b0c548ecfdf13a00436b7446b8e7) --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 88 ++++++++---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 20 ++++--- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 14 +++-- 3 files changed, 45 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 28a1849e7dcd..cf192500800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -215,33 +215,15 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) xa_unlock_irqrestore(xa, flags); } -static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, - struct amdgpu_bo_va_mapping *va_map, u64 addr) -{ - struct amdgpu_userq_va_cursor *va_cursor; - struct userq_va_list; - - va_cursor = kzalloc_obj(*va_cursor); - if (!va_cursor) - return -ENOMEM; - - INIT_LIST_HEAD(&va_cursor->list); - va_cursor->gpu_addr = addr; - va_map->bo_va->userq_va_mapped = true; - list_add(&va_cursor->list, &queue->userq_va_list); - - return 0; -} - int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, - u64 addr, u64 expected_size) + u64 addr, u64 expected_size, + u64 *va_out) { struct amdgpu_bo_va_mapping *va_map; struct amdgpu_vm *vm = queue->vm; u64 user_addr; u64 size; - int r = 0; /* Caller must hold vm->root.bo reservation */ dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv); @@ -250,20 +232,18 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); - if (!va_map) { - r = -EINVAL; - goto out_err; - } + if (!va_map) + return -EINVAL; + /* Only validate the userq whether resident in the VM mapping range */ if (user_addr >= va_map->start && va_map->last - user_addr + 1 >= size) { - amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr); + va_map->bo_va->userq_va_mapped = true; + *va_out = user_addr; return 0; } - r = -EINVAL; -out_err: - return r; + return -EINVAL; } static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) @@ -284,14 +264,16 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) { - struct amdgpu_userq_va_cursor *va_cursor, *tmp; - int r = 0; + int i, r = 0; - list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { - r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr); + for (i = 0; i < ARRAY_SIZE(queue->userq_vas.va_array); i++) { + if (!queue->userq_vas.va_array[i]) + continue; + r += amdgpu_userq_buffer_va_mapped(queue->vm, + queue->userq_vas.va_array[i]); dev_dbg(queue->userq_mgr->adev->dev, "validate the userq mapping:%p va:%llx r:%d\n", - queue, va_cursor->gpu_addr, r); + queue, queue->userq_vas.va_array[i], r); } if (r != 0) @@ -300,24 +282,7 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) return false; } -static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *queue) -{ - struct amdgpu_userq_va_cursor *va_cursor, *tmp; - struct amdgpu_bo_va_mapping *mapping; - /* Caller must hold vm->root.bo reservation */ - dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv); - - list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { - mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); - if (mapping) - dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", - queue, va_cursor->gpu_addr); - list_del(&va_cursor->list); - kfree(va_cursor); - } -} static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) { @@ -540,7 +505,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que { struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; - struct amdgpu_vm *vm = queue->vm; int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); @@ -560,10 +524,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que mutex_unlock(&uq_mgr->userq_mutex); cancel_delayed_work_sync(&queue->hang_detect_work); - amdgpu_bo_reserve(vm->root.bo, true); - amdgpu_userq_buffer_vas_list_cleanup(adev, queue); - amdgpu_bo_unreserve(vm->root.bo); - list_del(&queue->userq_va_list); uq_funcs->mqd_destroy(queue); queue->userq_mgr = NULL; @@ -669,7 +629,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } kref_init(&queue->refcount); - INIT_LIST_HEAD(&queue->userq_va_list); queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; @@ -690,14 +649,17 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto free_fence_drv; if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, - args->in.queue_size) || + args->in.queue_size, + &queue->userq_vas.va.queue_rb) || amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, - AMDGPU_GPU_PAGE_SIZE) || + AMDGPU_GPU_PAGE_SIZE, + &queue->userq_vas.va.rptr) || amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, - AMDGPU_GPU_PAGE_SIZE)) { + AMDGPU_GPU_PAGE_SIZE, + &queue->userq_vas.va.wptr)) { r = -EINVAL; amdgpu_bo_unreserve(fpriv->vm.root.bo); - goto clean_mapping; + goto free_fence_drv; } amdgpu_bo_unreserve(fpriv->vm.root.bo); @@ -709,7 +671,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp, &index); if (r) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); - goto clean_mapping; + goto free_fence_drv; } queue->doorbell_index = index; @@ -769,10 +731,6 @@ clean_doorbell_bo: amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unreserve(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); -clean_mapping: - amdgpu_bo_reserve(fpriv->vm.root.bo, true); - amdgpu_userq_buffer_vas_list_cleanup(adev, queue); - amdgpu_bo_unreserve(fpriv->vm.root.bo); free_fence_drv: amdgpu_userq_fence_driver_free(queue); free_queue: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 76ef5cfab52e..28cfc6682333 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -48,11 +48,6 @@ struct amdgpu_userq_obj { struct amdgpu_bo *obj; }; -struct amdgpu_userq_va_cursor { - u64 gpu_addr; - struct list_head list; -}; - struct amdgpu_usermode_queue { int queue_type; enum amdgpu_userq_state state; @@ -93,7 +88,17 @@ struct amdgpu_usermode_queue { struct delayed_work hang_detect_work; struct kref refcount; - struct list_head userq_va_list; + union { + struct { + u64 queue_rb; + u64 wptr; + u64 rptr; + u64 eop; + u64 shadow; + u64 csa; + } va; + u64 va_array[6]; + } userq_vas; }; struct amdgpu_userq_funcs { @@ -174,7 +179,8 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell); int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, - u64 addr, u64 expected_size); + u64 addr, u64 expected_size, u64 *va_out); + void amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t saddr); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 71251370c8b3..98aa00eeb2f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -318,8 +318,9 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, kfree(compute_mqd); goto free_mqd; } - r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va, - 2048); + r = amdgpu_userq_input_va_validate(adev, queue, + compute_mqd->eop_va, 2048, + &queue->userq_vas.va.eop); amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(compute_mqd); @@ -368,7 +369,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, goto free_mqd; } r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va, - shadow_info.shadow_size); + shadow_info.shadow_size, + &queue->userq_vas.va.shadow); if (r) { amdgpu_bo_unreserve(queue->vm->root.bo); kfree(mqd_gfx_v11); @@ -376,7 +378,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, } r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va, - shadow_info.csa_size); + shadow_info.csa_size, + &queue->userq_vas.va.csa); amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(mqd_gfx_v11); @@ -406,7 +409,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, goto free_mqd; } r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va, - 32); + 32, + &queue->userq_vas.va.csa); amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(mqd_sdma_v11); -- cgit v1.2.3 From 962d684b5dc0741dcd93485d41b450de402d5592 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 18 Feb 2026 12:53:27 +0100 Subject: drm/amdgpu: fix amdgpu_hmm_range_get_pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The notifier sequence must only be read once or otherwise we could work with invalid pages. While at it also fix the coding style, e.g. drop the pre-initialized return value and use the common define for 2G range. Signed-off-by: Christian König Reviewed-by: Vitaly Prosyak Tested-by: Vitaly Prosyak Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit c08972f555945cda57b0adb72272a37910153390) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index f72990ac046e..0a9582da3a33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -51,8 +51,6 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_hmm.h" -#define MAX_WALK_BYTE (2UL << 30) - /** * amdgpu_hmm_invalidate_gfx - callback to notify about mm change * @@ -170,11 +168,13 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, void *owner, struct amdgpu_hmm_range *range) { - unsigned long end; + const u64 max_bytes = SZ_2G; + + struct hmm_range *hmm_range = &range->hmm_range; unsigned long timeout; unsigned long *pfns; - int r = 0; - struct hmm_range *hmm_range = &range->hmm_range; + unsigned long end; + int r; pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); if (unlikely(!pfns)) { @@ -191,8 +191,9 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, end = start + npages * PAGE_SIZE; hmm_range->dev_private_owner = owner; + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); do { - hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end); + hmm_range->end = min(hmm_range->start + max_bytes, end); pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); @@ -200,7 +201,6 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); retry: - hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { if (r == -EBUSY && !time_after(jiffies, timeout)) @@ -210,7 +210,7 @@ retry: if (hmm_range->end == end) break; - hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; + hmm_range->hmm_pfns += max_bytes >> PAGE_SHIFT; hmm_range->start = hmm_range->end; } while (hmm_range->end < end); -- cgit v1.2.3 From 1c824497d8acd3187d585d6187cedc1897dcc871 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 18 Feb 2026 12:31:29 +0100 Subject: drm/amdgpu: fix calling VM invalidation in amdgpu_hmm_invalidate_gfx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we don't invalidate page tables on next CS. Signed-off-by: Christian König Reviewed-by: Vitaly Prosyak Tested-by: Vitaly Prosyak Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit b6444d1bcbc34f6f2a31a3aab3059be082f3683e) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 0a9582da3a33..5bfa5a84b09c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -76,6 +76,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); + amdgpu_vm_bo_invalidate(bo, false); r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fccd758b6699..c9f88ecce1a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1631,6 +1631,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; + struct amdgpu_bo *bo; bool clear, unlock; int r; @@ -1650,11 +1651,13 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - resv = bo_va->base.bo->tbo.base.resv; + bo = bo_va->base.bo; + resv = bo->tbo.base.resv; spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) { + if (!adev->debug_vm && !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && + dma_resv_trylock(resv)) { clear = false; unlock = true; /* The caller is already holding the reservation lock */ -- cgit v1.2.3 From bfa9d28960ed677d556bdf097073bc3129686229 Mon Sep 17 00:00:00 2001 From: Pavitra Jha Date: Thu, 21 May 2026 04:04:14 -0400 Subject: Bluetooth: hci_conn: Fix memory leak in hci_le_big_terminate() hci_le_big_terminate() allocates iso_list_data via kzalloc_obj but returns 0 without freeing it when neither pa_sync_term nor big_sync_term flags are set after evaluating the PA and BIG sync connection state. This early-return path was introduced when hci_le_big_terminate() was refactored to take struct hci_conn instead of raw u8 parameters, adding PA/BIG flag evaluation logic. The existing kfree() on hci_cmd_sync_queue failure does not cover this path. Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Cc: stable@vger.kernel.org Signed-off-by: Pavitra Jha Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 17b46ad6a349..54eabaa46960 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -870,8 +870,10 @@ static int hci_le_big_terminate(struct hci_dev *hdev, struct hci_conn *conn) d->big_sync_term = true; } - if (!d->pa_sync_term && !d->big_sync_term) + if (!d->pa_sync_term && !d->big_sync_term) { + kfree(d); return 0; + } ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); -- cgit v1.2.3 From 9dbd84990394c51f5cee1e8871bb5ff8af5ed939 Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 20 May 2026 22:30:36 -0400 Subject: Bluetooth: L2CAP: fix chan ref leak in l2cap_chan_timeout() on !conn __set_chan_timer() takes a l2cap_chan reference via l2cap_chan_hold() before scheduling the delayed work. The normal path in l2cap_chan_timeout() drops this reference with l2cap_chan_put() at the end, but the early return when chan->conn is NULL skips the put, leaking the reference. Add the missing l2cap_chan_put() before the early return. Fixes: adf0398cee86 ("Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout") Cc: stable@vger.kernel.org Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fdccd62ccca8..5668c92b3f58 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -411,8 +411,10 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); - if (!conn) + if (!conn) { + l2cap_chan_put(chan); return; + } mutex_lock(&conn->lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling -- cgit v1.2.3 From 8c8e620467a7b51562dbcefbd1f09f288d7d710d Mon Sep 17 00:00:00 2001 From: Siwei Zhang Date: Wed, 20 May 2026 22:12:20 -0400 Subject: Bluetooth: L2CAP: use chan timer to close channels in cleanup_listen() l2cap_chan_close() removes the channel from conn->chan_l, which must be done under conn->lock. cleanup_listen() runs under the parent sk_lock, so acquiring conn->lock would invert the established conn->lock -> chan->lock -> sk_lock order. Instead of calling l2cap_chan_close() directly, schedule l2cap_chan_timeout with delay 0 to close the channel asynchronously. The timeout handler already acquires conn->lock and chan->lock in the correct order. The timer is only armed when chan->conn is still set: if it is already NULL, l2cap_conn_del() has already processed this channel (l2cap_chan_del + l2cap_sock_teardown_cb + l2cap_sock_close_cb), so there is nothing left to do. If l2cap_conn_del() races in after the timer is armed, __clear_chan_timer() inside l2cap_chan_del() cancels it; if the timer has already fired, the handler returns harmlessly because chan->conn was cleared. Fixes: 3df91ea20e74 ("Bluetooth: Revert to mutexes from RCU list") Cc: # 0b58004: Bluetooth: fix UAF in l2cap_sock_cleanup_listen() vs l2cap_conn_del() Signed-off-by: Siwei Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b34e7da8d906..c138aa4ae266 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1499,6 +1499,10 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) * pin it (hold_unless_zero() additionally skips a chan already past * its last reference). We then drop the sk lock before taking * chan->lock, so sk and chan locks are never held together. + * + * Since we cannot call l2cap_chan_close() without conn->lock, + * schedule l2cap_chan_timeout to close the channel; it already + * acquires conn->lock -> chan->lock in the correct order. */ while ((sk = bt_accept_dequeue(parent, NULL))) { struct l2cap_chan *chan; @@ -1516,14 +1520,12 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) state_to_string(chan->state)); l2cap_chan_lock(chan); - __clear_chan_timer(chan); - l2cap_chan_close(chan, ECONNRESET); - /* l2cap_conn_del() may already have killed this socket - * (it sets SOCK_DEAD); skip the duplicate to avoid a - * double sock_put()/l2cap_chan_put(). + /* Since we cannot call l2cap_chan_close() without + * conn->lock, schedule its timer to trigger the close + * and cleanup of this channel. */ - if (!sock_flag(sk, SOCK_DEAD)) - l2cap_sock_kill(sk); + if (chan->conn) + __set_chan_timer(chan, 0); l2cap_chan_unlock(chan); l2cap_chan_put(chan); -- cgit v1.2.3 From 2a3ac9ee11dbb9845f3947cef4a79dba658cf6f6 Mon Sep 17 00:00:00 2001 From: Muhammad Bilal Date: Wed, 20 May 2026 18:56:43 -0400 Subject: Bluetooth: HIDP: fix missing length checks in hidp_input_report() hidp_input_report() reads keyboard and mouse payload data from an skb without first verifying that skb->len contains enough data. hidp_recv_intr_frame() pulls the 1-byte HIDP header before dispatching to hidp_input_report(). If a paired device sends a truncated packet, the handler reads beyond the valid skb data, resulting in an out-of-bounds read of skb data. The OOB bytes may be interpreted as phantom key presses or spurious mouse movement. Replace the open-coded length tracking and pointer arithmetic with skb_pull_data() calls. skb_pull_data() returns NULL if the requested bytes are not present, eliminating the need for a manual size variable and the separate skb->len guard. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Muhammad Bilal Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hidp/core.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 976f91eeb745..70344bd3248a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -179,12 +179,21 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) { struct input_dev *dev = session->input; unsigned char *keys = session->keys; - unsigned char *udata = skb->data + 1; - signed char *sdata = skb->data + 1; - int i, size = skb->len - 1; + unsigned char *udata; + signed char *sdata; + u8 *hdr; + int i; + + hdr = skb_pull_data(skb, 1); + if (!hdr) + return; - switch (skb->data[0]) { + switch (*hdr) { case 0x01: /* Keyboard report */ + udata = skb_pull_data(skb, 8); + if (!udata) + break; + for (i = 0; i < 8; i++) input_report_key(dev, hidp_keycode[i + 224], (udata[0] >> i) & 1); @@ -213,6 +222,10 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) break; case 0x02: /* Mouse report */ + sdata = skb_pull_data(skb, 3); + if (!sdata) + break; + input_report_key(dev, BTN_LEFT, sdata[0] & 0x01); input_report_key(dev, BTN_RIGHT, sdata[0] & 0x02); input_report_key(dev, BTN_MIDDLE, sdata[0] & 0x04); @@ -222,7 +235,7 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) input_report_rel(dev, REL_X, sdata[1]); input_report_rel(dev, REL_Y, sdata[2]); - if (size > 3) + if (skb->len > 0) input_report_rel(dev, REL_WHEEL, sdata[3]); break; } -- cgit v1.2.3 From 82855073c1081732656734b74d7d1d5e4cfd0da7 Mon Sep 17 00:00:00 2001 From: Shuai Zhang Date: Thu, 21 May 2026 13:25:47 +0800 Subject: Bluetooth: btusb: Allow firmware re-download when version matches The Bluetooth host decides whether to download firmware by reading the controller firmware download completion flag and firmware version information. If a USB error occurs during the firmware download process (for example due to a USB disconnect), the download is aborted immediately. An incomplete firmware transfer does not cause the controller to set the download completion flag, but the firmware version information may be updated at an early stage of the download process. In this case, after USB reconnection, the host attempts to re-download the firmware because the download completion flag is not set. However, since the controller reports the same firmware version as the target firmware, the download is skipped. This ultimately results in the firmware not being properly updated on the controller. This change removes the restriction that skips firmware download when the versions are equal. It covers scenarios where the USB connection can be disconnected at any time and ensures that firmware download can be retriggered after USB reconnection, allowing the Bluetooth firmware to be correctly and completely updated. Fixes: 3267c884cefa ("Bluetooth: btusb: Add support for QCA ROME chipset family") Cc: stable@vger.kernel.org Signed-off-by: Shuai Zhang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7f5fce93d984..830fefb342c6 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3540,7 +3540,13 @@ static int btusb_setup_qca_load_rampatch(struct hci_dev *hdev, "firmware rome 0x%x build 0x%x", rver_rom, rver_patch, ver_rom, ver_patch); - if (rver_rom != ver_rom || rver_patch <= ver_patch) { + /* Allow rampatch when the patch version equals the firmware version. + * A firmware download may be aborted by a transient USB error (e.g. + * disconnect) after the controller updates version info but before + * completion. + * Allowing equal versions enables re-flashing during recovery. + */ + if (rver_rom != ver_rom || rver_patch < ver_patch) { bt_dev_err(hdev, "rampatch file version did not match with firmware"); err = -EINVAL; goto done; -- cgit v1.2.3 From 3c40d381ce04f9575a5d8b542898183c3b4b38dc Mon Sep 17 00:00:00 2001 From: Zhao Dongdong Date: Tue, 26 May 2026 11:21:39 +0800 Subject: Bluetooth: 6lowpan: check skb_clone() return value in send_mcast_pkt() The skb_clone() function can return NULL if memory allocation fails. send_mcast_pkt() calls skb_clone() without checking the return value, which can lead to a NULL pointer dereference in send_pkt() when it dereferences skb->data. Add a NULL check after skb_clone() and skip the peer if the clone fails. Fixes: 18722c247023 ("Bluetooth: Enable 6LoWPAN support for BT LE devices") Signed-off-by: Zhao Dongdong Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 2f03b780b40d..960a19b3e26d 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -486,6 +486,8 @@ static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) int ret; local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + continue; BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p", netdev->name, -- cgit v1.2.3 From bfea6091e0fffb270c20e74384b660910277eb6c Mon Sep 17 00:00:00 2001 From: Doruk Tan Ozturk Date: Mon, 25 May 2026 18:24:38 +0200 Subject: Bluetooth: hci_sync: fix UAF in hci_le_create_cis_sync hci_le_create_cis_sync() dereferences conn->conn_timeout after releasing both rcu_read_lock() and hci_dev_lock(hdev). The conn pointer was obtained from an RCU-protected iteration over hdev->conn_hash.list and is not valid once these locks are dropped. A concurrent disconnect can free the hci_conn between the unlock and the dereference, causing a use-after-free read. The cancellation mechanism in hci_conn_del() cannot prevent this because hci_le_create_cis_pending() queues hci_create_cis_sync with data=NULL: hci_cmd_sync_queue(hdev, hci_create_cis_sync, NULL, NULL); While hci_conn_del() dequeues with data=conn: hci_cmd_sync_dequeue(hdev, NULL, conn, NULL); Since NULL != conn, the lookup in _hci_cmd_sync_lookup_entry() never matches, and the pending work item is not cancelled. Fix this by saving conn->conn_timeout into a local variable while the locks are still held, so the stale conn pointer is never dereferenced after unlock. This is the same class of bug as the one fixed by commit 035c25007c9e ("Bluetooth: hci_sync: Fix UAF on le_read_features_complete") which addressed the identical pattern in a different function. This vulnerability was identified using 0sec.ai, an open-source automated security auditing platform (https://github.com/0sec-labs). Fixes: c09b80be6ffc ("Bluetooth: hci_conn: Fix not waiting for HCI_EVT_LE_CIS_ESTABLISHED") Cc: stable@vger.kernel.org Reported-by: Doruk Tan Ozturk Signed-off-by: Doruk Tan Ozturk Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index aff8562a8690..1faf8df6d159 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6699,6 +6699,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f); size_t aux_num_cis = 0; struct hci_conn *conn; + u16 timeout = 0; u8 cig = BT_ISO_QOS_CIG_UNSET; /* The spec allows only one pending LE Create CIS command at a time. If @@ -6769,6 +6770,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) set_bit(HCI_CONN_CREATE_CIS, &conn->flags); cis->acl_handle = cpu_to_le16(conn->parent->handle); cis->cis_handle = cpu_to_le16(conn->handle); + timeout = conn->conn_timeout; aux_num_cis++; if (aux_num_cis >= cmd->num_cis) @@ -6788,7 +6790,7 @@ done: return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS, struct_size(cmd, cis, cmd->num_cis), cmd, HCI_EVT_LE_CIS_ESTABLISHED, - conn->conn_timeout, NULL); + timeout, NULL); } int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle) -- cgit v1.2.3 From fa21e86caba2347e89eb65af926205a36a097c53 Mon Sep 17 00:00:00 2001 From: Shuai Zhang Date: Mon, 25 May 2026 14:51:56 +0800 Subject: Bluetooth: hci_qca: Use 100 ms SSR delay for rampatch and NVM loading When bt_en is pulled high by hardware, the host does not re-download the firmware after SSR. The controller loads the rampatch and NVM internally. On HMT chip, the rampatch is ~264 KB and the NVM is ~9.4 KB. The loading process takes approximately 70 ms. The previous 50 ms delay is too short, causing the controller to not respond to the reset command sent by the host, which leads to BT initialization failure: Bluetooth: hci0: QCA memdump Done, received 458752, total 458752 Bluetooth: hci0: mem_dump_status: 2 Bluetooth: hci0: Opcode 0x0c03 failed: -110 Increase the delay to 100 ms, which was confirmed as a safe value by the controller, to ensure the controller has finished loading the firmware before the host sends commands. Steps to reproduce: 1. Trigger SSR and wait for SSR to complete: hcitool cmd 0x3f 0c 26 2. Run "bluetoothctl power on" and observe that BT fails to start. Fixes: fce1a9244a0f ("Bluetooth: hci_qca: Fix SSR (SubSystem Restart) fail when BT_EN is pulled up by hw") Cc: stable@vger.kernel.org Reviewed-by: Dmitry Baryshkov Signed-off-by: Shuai Zhang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ed280399bf47..34500137df2c 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1680,8 +1680,8 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code) mod_timer(&qca->tx_idle_timer, jiffies + msecs_to_jiffies(qca->tx_idle_delay)); - /* Controller reset completion time is 50ms */ - msleep(50); + /* Wait for the controller to load the rampatch and NVM. */ + msleep(100); clear_bit(QCA_SSR_TRIGGERED, &qca->flags); clear_bit(QCA_IBS_DISABLED, &qca->flags); -- cgit v1.2.3 From 426a35d7530722ffa2d89d759c39f5157e0e500d Mon Sep 17 00:00:00 2001 From: Jeremy Erazo Date: Wed, 20 May 2026 18:23:31 +0000 Subject: smb: client: detect short folioq copy in cifs_copy_folioq_to_iter() cifs_copy_folioq_to_iter() copies a requested number of bytes from a folio queue into the destination iterator. Since the encrypted SMB2 READ path was changed to pass the server-declared payload length (data_len) instead of the larger folioq buffer length, the caller can ask for fewer bytes than the folio queue holds. In that case the helper continues walking the remaining folios after data_size has reached zero and calls copy_folio_to_iter() with len = 0, which is unnecessary work. The helper also returns 0 (success) when the folio queue is exhausted before data_size bytes have been copied. The caller has no way to distinguish that from a full copy and the reported transfer count ends up larger than the amount of data placed in the iterator. Add an early exit when data_size reaches zero, and return an error when the folio queue is exhausted before all requested bytes have been copied. Signed-off-by: Jeremy Erazo Reviewed-by: David Howells Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 61b60114e4b8..d4875f9532b4 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4706,9 +4706,15 @@ cifs_copy_folioq_to_iter(struct folio_queue *folioq, size_t data_size, { for (; folioq; folioq = folioq->next) { for (int s = 0; s < folioq_count(folioq); s++) { - struct folio *folio = folioq_folio(folioq, s); - size_t fsize = folio_size(folio); - size_t n, len = umin(fsize - skip, data_size); + struct folio *folio; + size_t fsize, n, len; + + if (data_size == 0) + return 0; + + folio = folioq_folio(folioq, s); + fsize = folio_size(folio); + len = umin(fsize - skip, data_size); n = copy_folio_to_iter(folio, skip, len, iter); if (n != len) { @@ -4721,6 +4727,12 @@ cifs_copy_folioq_to_iter(struct folio_queue *folioq, size_t data_size, } } + if (data_size != 0) { + cifs_dbg(VFS, "%s: short copy, %zu bytes missing\n", + __func__, data_size); + return smb_EIO2(smb_eio_trace_rx_copy_to_iter, 0, data_size); + } + return 0; } -- cgit v1.2.3 From 9d2491197a00acf8c423512078458c2855102b66 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 22 May 2026 18:28:49 -0500 Subject: smb: client: fix uninitialized variable in smb2_writev_callback compiling with W=2 pointed out that "written may be used uninitialized" Fixes: 20d72b00ca81 ("netfs: Fix the request's work item to not require a ref") Cc: stable@vger.kernel.org Reviewed-by: David Howells Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 3bd300347f16..fbeb2156ddb6 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4955,7 +4955,7 @@ smb2_writev_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int rreq_debug_id = wdata->rreq->debug_id; unsigned int subreq_debug_index = wdata->subreq.debug_index; ssize_t result = 0; - size_t written; + size_t written = 0; WARN_ONCE(wdata->server != server, "wdata server %p != mid server %p", -- cgit v1.2.3 From a17dc12bfed8868e6a86f3b45c16065a70641acb Mon Sep 17 00:00:00 2001 From: "Alexis Lothoré (eBPF Foundation)" Date: Wed, 27 May 2026 21:12:31 +0200 Subject: x86/ftrace: Relocate %rip-relative percpu refs in dynamic trampolines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_CALL_DEPTH_TRACKING enabled on an x86 retbleed-affected platform (eg: Skylake), with retbleed=stuff, registering a dynamic ftrace trampoline crashes on the first call into the traced function: BUG: unable to handle page fault for address: ffff88817ae18880 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 4b53067 P4D 4b53067 PUD 0 Oops: Oops: 0002 [#1] SMP PTI CPU: 3 UID: 0 PID: 187 Comm: usleep Not tainted 7.0.10 #243 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.17.0-2-2 04/01/2014 Code: 24 78 00 00 00 00 48 89 ea 48 89 54 24 20 48 8b b4 24 b8 00 00 00 48 8b bc 24 b0 00 00 00 48 89 bc 24 80 00 00 00 48 83 ef 05 <65> 48 c1 3d 1f a8 b6 02 05 48 8b 15 f6 00 00 00 4c 89 3c 24 4c 89 Call Trace: ? find_held_lock ? exc_page_fault ? lock_release ? __x64_sys_clock_nanosleep ? lockdep_hardirqs_on_prepare ? trace_hardirqs_on __x64_sys_clock_nanosleep do_syscall_64 ? exc_page_fault ? call_depth_return_thunk entry_SYSCALL_64_after_hwframe ... Kernel panic - not syncing: Fatal exception This small reproducer allows to easily trigger the crash: # echo 'p __x64_sys_clock_nanosleep' > /sys/kernel/tracing/kprobe_events # echo 1 > /sys/kernel/tracing/events/kprobes/p___x64_sys_clock_nanosleep_0/enable # usleep 1 Monitoring the crash under GDB points to the exact instruction in charge of incrementing the call depth: sarq $5, %gs:__x86_call_depth(%rip) This instruction matches the one inserted by the ftrace_regs_caller from ftrace_64.S. This emitted code was likely working fine until the introduction of 59bec00ace28 ("x86/percpu: Introduce %rip-relative addressing to PER_CPU_VAR()"): it has made the call depth accounting addressing relative to $rip, instead of being based on an absolute address. As this code exact location depends on where the trampoline lives in memory, the corresponding displacement needs to be adjusted at runtime to actually correctly find the per-cpu __x86_call_depth value, otherwise the targeted address is wrong, leading to the page fault seen above. Fix the %rip-relative displacement of the copied CALL_DEPTH_ACCOUNT instruction (from ftrace_regs_caller) by calling text_poke_apply_relocation(), as it is done for example by the x86 BPF JIT compiler through x86_call_depth_emit_accounting(). This corrects both CALL_DEPTH_ACCOUNT slots, in ftrace_caller and ftrace_regs_caller. [ bp: Massage. ] Fixes: 59bec00ace28 ("x86/percpu: Introduce %rip-relative addressing to PER_CPU_VAR()") Signed-off-by: Alexis Lothoré (eBPF Foundation) Signed-off-by: Borislav Petkov (AMD) Acked-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Cc: Link: https://patch.msgid.link/20260527-fix_call_depth_in_trampoline-v1-1-1c1abc8ae310@bootlin.com --- arch/x86/kernel/ftrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0543b57f54ee..17d6edfcb7e0 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -375,6 +375,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; } + /* + * Generated trampoline may contain rIP-relative addressing which + * displacement needs to be fixed. + */ + text_poke_apply_relocation(trampoline, trampoline, size, + (void *)start_offset, size); + /* * The address of the ftrace_ops that is used for this trampoline * is stored at the end of the trampoline. This will be used to -- cgit v1.2.3 From 8ba68464e4787b6a7ec938826e16124df20fd23d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 26 May 2026 21:33:19 +0200 Subject: bonding: refuse to enslave CAN devices syzbot reported a kernel paging request crash in can_rx_unregister() inside net/can/af_can.c. The crash occurs because a virtual CAN device (vxcan) is being enslaved to a bonding master. During the enslavement process, the bonding driver mutates and modifies the network device states to fit an Ethernet-like aggregation model. However, CAN devices operate on a completely different Layer 2 architecture, relying on the CAN mid-layer private data structure (can_ml_priv) instead of standard Ethernet structures. Since bonding does not initialize or maintain these CAN structures, subsequent operations on the half-enslaved interface (such as closing associated sockets via isotp_release) lead to a null-pointer dereference when accessing the CAN receiver lists. Bonding CAN interfaces is architecturally invalid as CAN lacks MAC addresses, ARP capabilities, and standard Ethernet link-layer mechanisms. While generic loopback devices are blocked globally in net/core/dev.c, virtual CAN devices bypass this check because they do not carry the IFF_LOOPBACK flag, despite acting as local software-loopbacks. Fix this by explicitly blocking network devices of type ARPHRD_CAN from being enslaved at the very beginning of bond_enslave(). This prevents illegal state mutations, eliminates the resulting KASAN crashes, and avoids potential memory leaks from incomplete socket cleanups. As the CAN support has been added a long time after bonding the Fixes-tag points to the introduction of ARPHRD_CAN that would have needed a specific handling in bonding_main.c. Fixes: cd05acfe65ed ("[CAN]: Allocate protocol numbers for PF_CAN") Reported-by: syzbot+8ed98cbd0161632bce95@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8ed98cbd0161632bce95 Signed-off-by: Oliver Hartkopp Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20260526-bonding-candev-v1-1-ba1df400918a@hartkopp.net Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index af82a3df2c5d..82e779f7916b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1890,6 +1890,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct sockaddr_storage ss; int res = 0, i; + if (slave_dev->type == ARPHRD_CAN) { + BOND_NL_ERR(bond_dev, extack, + "CAN devices cannot be enslaved"); + return -EPERM; + } + if (slave_dev->flags & IFF_MASTER && !netif_is_bond_master(slave_dev)) { BOND_NL_ERR(bond_dev, extack, -- cgit v1.2.3 From 5eec4427b89c2fb2beac54920101e55a2f1c0c21 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 26 May 2026 09:48:16 +0300 Subject: bridge: Fix sleep in atomic context in netlink path Since the introduction of the netlink configuration path for bridge ports in commit 25c71c75ac87 ("bridge: bridge port parameters over netlink"), br_setport() was always called with the bridge lock held around it. Back then this decision made sense: The bridge lock protects the STP state of the bridge and its ports and at that time the function only processed three STP related netlink attributes (cost, priority and state). Nowadays, br_setport() processes a lot more attributes and most of them do not need the bridge lock: * Bridge flags: Only require RTNL. Read locklessly by the data path. Annotations can be added in net-next. * FDB port flushing: Only requires the FDB lock. * Multicast attributes: Only require the multicast lock. * Group forward mask: Only requires RTNL. Read locklessly by the data path. Annotations can be added in net-next. * Backup port and NHID: Only require RTNL. Read locklessly by the data path. This is a problem as the bridge calls dev_set_promiscuity() when certain bridge port flags change and this function can sleep since the commit cited below, resulting in a splat such as [1]. Fix this by reducing the scope of the bridge lock and only take it when processing the three STP related attributes that require it. This is consistent with the multicast attributes where each attribute acquires the multicast lock instead of having one critical section for all relevant attributes. [1] BUG: sleeping function called from invalid context at net/core/dev_addr_lists.c:1262 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 356, name: bridge preempt_count: 201, expected: 0 RCU nest depth: 0, expected: 0 2 locks held by bridge/356: #0: ffffffff919473a0 (rtnl_mutex){+.+.}-{4:4}, at: rtnetlink_rcv_msg (net/core/rtnetlink.c:80 net/core/rtnetlink.c:7002) #1: ffff888115072d58 (&br->lock){+...}-{3:3}, at: br_setlink (./include/linux/spinlock.h:348 net/bridge/br_netlink.c:1117) Preemption disabled at: 0x0 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack_lvl (lib/dump_stack.c:94 lib/dump_stack.c:120) __might_resched.cold (kernel/sched/core.c:9163) netif_rx_mode_run (net/core/dev_addr_lists.c:1262) netif_rx_mode_sync (net/core/dev_addr_lists.c:1428) dev_set_promiscuity (net/core/dev_api.c:289) br_manage_promisc (net/bridge/br_if.c:135 net/bridge/br_if.c:172) br_port_flags_change (net/bridge/br_if.c:242 net/bridge/br_if.c:747) br_setport (net/bridge/br_netlink.c:1000) br_setlink (net/bridge/br_netlink.c:1118) rtnl_bridge_setlink (net/core/rtnetlink.c:5572) rtnetlink_rcv_msg (net/core/rtnetlink.c:7005) netlink_rcv_skb (net/netlink/af_netlink.c:2550) netlink_unicast (net/netlink/af_netlink.c:1318 net/netlink/af_netlink.c:1344) netlink_sendmsg (net/netlink/af_netlink.c:1894) __sock_sendmsg (net/socket.c:787 (discriminator 4) net/socket.c:802 (discriminator 4)) ____sys_sendmsg (net/socket.c:2698) ___sys_sendmsg (net/socket.c:2752) __sys_sendmsg (net/socket.c:2784) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) Fixes: 78cd408356fe ("net: add missing instance lock to dev_set_promiscuity") Reviewed-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260526064818.272516-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_netlink.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c04a4d0889ae..b9591dd755f9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1000,19 +1000,25 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], br_port_flags_change(p, changed_mask); if (tb[IFLA_BRPORT_COST]) { + spin_lock_bh(&p->br->lock); err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + spin_unlock_bh(&p->br->lock); if (err) return err; } if (tb[IFLA_BRPORT_PRIORITY]) { + spin_lock_bh(&p->br->lock); err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + spin_unlock_bh(&p->br->lock); if (err) return err; } if (tb[IFLA_BRPORT_STATE]) { + spin_lock_bh(&p->br->lock); err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + spin_unlock_bh(&p->br->lock); if (err) return err; } @@ -1114,9 +1120,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, if (err) return err; - spin_lock_bh(&p->br->lock); err = br_setport(p, tb, extack); - spin_unlock_bh(&p->br->lock); } else { /* Binary compatibility with old RSTP */ if (nla_len(protinfo) < sizeof(u8)) @@ -1203,17 +1207,10 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *data[], struct netlink_ext_ack *extack) { - struct net_bridge *br = netdev_priv(brdev); - int ret; - if (!data) return 0; - spin_lock_bh(&br->lock); - ret = br_setport(br_port_get_rtnl(dev), data, extack); - spin_unlock_bh(&br->lock); - - return ret; + return br_setport(br_port_get_rtnl(dev), data, extack); } static int br_port_fill_slave_info(struct sk_buff *skb, -- cgit v1.2.3 From 6d34594cc619d0d4b07d5afcad8b5984f3526dcf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 26 May 2026 09:48:17 +0300 Subject: bridge: Fix sleep in atomic context in sysfs path Since the start of the git history, brport_store() always acquired the bridge lock. Back then this decision made sense: The bridge lock protects the STP state of the bridge and its ports and at that time the function was only used by two STP related attributes (cost and priority). Nowadays, brport_store() processes a lot more attributes and most of them do not need the bridge lock: * Bridge flags: Only require RTNL. Read locklessly by the data path. Annotations can be added in net-next. * FDB port flushing: Only requires the FDB lock. * Multicast attributes: Only require the multicast lock. * Group forward mask: Only requires RTNL. Read locklessly by the data path. Annotations can be added in net-next. * Backup port: Only requires RTNL. Read locklessly by the data path. This is a problem as the bridge calls dev_set_promiscuity() when certain bridge port flags change and this function can sleep since the commit cited below, resulting in a splat such as [1]. Fix this by reducing the scope of the bridge lock and only take it when processing the two STP related attributes that require it. Remove the now stale comment from br_switchdev_set_port_flag(). The SWITCHDEV_F_DEFER flag can be removed in net-next. [1] BUG: sleeping function called from invalid context at net/core/dev_addr_lists.c:1262 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 372, name: bash preempt_count: 201, expected: 0 RCU nest depth: 0, expected: 0 5 locks held by bash/372: #0: ffff88810c51c3f0 (sb_writers#7){.+.+}-{0:0}, at: ksys_write (fs/read_write.c:740) #1: ffff888115ce9480 (&of->mutex){+.+.}-{4:4}, at: kernfs_fop_write_iter (fs/kernfs/file.c:343) #2: ffff88810b9fd330 (kn->active#37){.+.+}-{0:0}, at: kernfs_fop_write_iter (fs/kernfs/file.c:80 fs/kernfs/file.c:344) #3: ffffffffa59473a0 (rtnl_mutex){+.+.}-{4:4}, at: brport_store (net/bridge/br_sysfs_if.c:326) #4: ffff8881099d2d58 (&br->lock){+...}-{3:3}, at: brport_store (./include/linux/spinlock.h:348 net/bridge/br_sysfs_if.c:345) Preemption disabled at: 0x0 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack_lvl (lib/dump_stack.c:94 lib/dump_stack.c:120) __might_resched.cold (kernel/sched/core.c:9163) netif_rx_mode_run (net/core/dev_addr_lists.c:1262) netif_rx_mode_sync (net/core/dev_addr_lists.c:1428) dev_set_promiscuity (net/core/dev_api.c:289) br_manage_promisc (net/bridge/br_if.c:135 net/bridge/br_if.c:172) br_port_flags_change (net/bridge/br_if.c:242 net/bridge/br_if.c:747) store_learning (net/bridge/br_sysfs_if.c:79 net/bridge/br_sysfs_if.c:235) brport_store (net/bridge/br_sysfs_if.c:346) kernfs_fop_write_iter (fs/kernfs/file.c:352) new_sync_write (fs/read_write.c:595) vfs_write (fs/read_write.c:688) ksys_write (fs/read_write.c:740) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:121) Fixes: 78cd408356fe ("net: add missing instance lock to dev_set_promiscuity") Reviewed-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260526064818.272516-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_switchdev.c | 1 - net/bridge/br_sysfs_if.c | 30 ++++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 18b558a931ad..ee3ad9dfbab9 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -99,7 +99,6 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, attr.u.brport_flags.val = flags; attr.u.brport_flags.mask = mask; - /* We run from atomic context here */ err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev, &info.info, extack); err = notifier_to_errno(err); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 1f57c36a7fc0..d6df81fa0d13 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -86,16 +86,34 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) return sysfs_emit(buf, "%d\n", p->path_cost); } -static BRPORT_ATTR(path_cost, 0644, - show_path_cost, br_stp_set_path_cost); +static int store_path_cost(struct net_bridge_port *p, unsigned long v) +{ + int ret; + + spin_lock_bh(&p->br->lock); + ret = br_stp_set_path_cost(p, v); + spin_unlock_bh(&p->br->lock); + return ret; +} + +static BRPORT_ATTR(path_cost, 0644, show_path_cost, store_path_cost); static ssize_t show_priority(struct net_bridge_port *p, char *buf) { return sysfs_emit(buf, "%d\n", p->priority); } -static BRPORT_ATTR(priority, 0644, - show_priority, br_stp_set_port_priority); +static int store_priority(struct net_bridge_port *p, unsigned long v) +{ + int ret; + + spin_lock_bh(&p->br->lock); + ret = br_stp_set_port_priority(p, v); + spin_unlock_bh(&p->br->lock); + return ret; +} + +static BRPORT_ATTR(priority, 0644, show_priority, store_priority); static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) { @@ -334,17 +352,13 @@ static ssize_t brport_store(struct kobject *kobj, ret = -ENOMEM; goto out_unlock; } - spin_lock_bh(&p->br->lock); ret = brport_attr->store_raw(p, buf_copy); - spin_unlock_bh(&p->br->lock); kfree(buf_copy); } else if (brport_attr->store) { val = simple_strtoul(buf, &endp, 0); if (endp == buf) goto out_unlock; - spin_lock_bh(&p->br->lock); ret = brport_attr->store(p, val); - spin_unlock_bh(&p->br->lock); } if (!ret) { -- cgit v1.2.3 From 147f3b1f23cbd74f1022cc5689570a06f6bc47c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 26 May 2026 09:48:18 +0300 Subject: selftests: rtnetlink: Add bridge promiscuity tests Add two test cases that always pass, but trigger sleeping in atomic context BUGs without "bridge: Fix sleep in atomic context in netlink path" and "bridge: Fix sleep in atomic context in sysfs path". Reviewed-by: Nikolay Aleksandrov Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260526064818.272516-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 63 ++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c499953d4885..ace3a99023ed 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -24,6 +24,8 @@ ALL_TESTS=" kci_test_macsec kci_test_macsec_vlan kci_test_team_bridge_macvlan + kci_test_bridge_promisc_netlink + kci_test_bridge_promisc_sysfs kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -61,6 +63,14 @@ check_fail() fi } +sysfs_write() +{ + local val="$1" + local path="$2" + + echo "$val" > "$path" +} + run_cmd_common() { local cmd="$*" @@ -680,6 +690,59 @@ kci_test_team_bridge_macvlan() end_test "PASS: team_bridge_macvlan" } +# Test that changing bridge port flags via the netlink path does not sleep with +# the bridge spin lock held. +kci_test_bridge_promisc_netlink() +{ + local dummy="test_dummy1" + local bridge="test_br1" + local team="test_team1" + local ret=0 + + run_cmd ip link add $team up type team + run_cmd ip link add $bridge up type bridge vlan_filtering 1 + run_cmd ip link add $dummy up type dummy + run_cmd ip link set $dummy master $bridge + run_cmd ip link set $team master $bridge + + # This causes the bridge driver to sync all the static FDB entries to + # the team device (which supports unicast filtering) and remove it from + # promiscuous mode. The call to dev_set_promiscuity() can sleep due to + # Rx mode inlining, which is a problem if the bridge spin lock is held. + run_cmd bridge link set dev $dummy flood off learning off + + run_cmd ip link del $dummy + run_cmd ip link del $bridge + run_cmd ip link del $team + + end_test "PASS: bridge_promisc_netlink" +} + +# Same as kci_test_bridge_promisc_netlink(), but the flags are changed via the +# sysfs path. +kci_test_bridge_promisc_sysfs() +{ + local dummy="test_dummy1" + local bridge="test_br1" + local team="test_team1" + local ret=0 + + run_cmd ip link add $team up type team + run_cmd ip link add $bridge up type bridge vlan_filtering 1 + run_cmd ip link add $dummy up type dummy + run_cmd ip link set $dummy master $bridge + run_cmd ip link set $team master $bridge + + run_cmd sysfs_write 0 /sys/class/net/$dummy/brport/unicast_flood + run_cmd sysfs_write 0 /sys/class/net/$dummy/brport/learning + + run_cmd ip link del $dummy + run_cmd ip link del $bridge + run_cmd ip link del $team + + end_test "PASS: bridge_promisc_sysfs" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ -- cgit v1.2.3 From 7281b096b072f6c6e30420e3467d738f2e4c4b57 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:24 -0700 Subject: ethtool: coalesce: cap profile updates at NET_DIM_PARAMS_NUM_PROFILES ethnl_update_profile() walks the ETHTOOL_A_PROFILE_IRQ_MODERATION nest list with an index 'i' and writes new_profile[i++] without bounding i. The destination is kmemdup()'d at NET_DIM_PARAMS_NUM_PROFILES entries (5), but the Netlink nest count is entirely user-controlled. Netlink policies do not have support for constraining the number of nested entries (or number of multi-attr entries). Fixes: f750dfe825b9 ("ethtool: provide customized dim profile management") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/coalesce.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 1e2c5c7048a8..e73fc3e5a02b 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -472,6 +472,12 @@ static int ethnl_update_profile(struct net_device *dev, nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION, nests, rem) { + if (i >= NET_DIM_PARAMS_NUM_PROFILES) { + NL_SET_BAD_ATTR(extack, nest); + ret = -E2BIG; + goto err_out; + } + ret = nla_parse_nested(tb, len_irq_moder - 1, nest, coalesce_irq_moderation_policy, extack); -- cgit v1.2.3 From a888bbd43940cada72f7686337741ce86d1cf869 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:25 -0700 Subject: ethtool: tsconfig: fix reply error handling A couple of trivial bugs in error handling in tsconfig_send_reply(). If we failed to allocate rskb we need to set the error. If we did allocate it but failed to send it - we need to remember to free it. Fixes: 6e9e2eed4f39 ("net: ethtool: Add support for tsconfig command to get/set hwtstamp config") Reviewed-by: Vadim Fedorenko Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260526153533.2779187-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/tsconfig.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index e4f518e49d4c..e9db4ee2299d 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -224,16 +224,21 @@ static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY, ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload); - if (!rskb) + if (!rskb) { + ret = -ENOMEM; goto err_cleanup; + } ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base); if (ret < 0) - goto err_cleanup; + goto err_free_msg; genlmsg_end(rskb, reply_payload); ret = genlmsg_reply(rskb, info); + rskb = NULL; +err_free_msg: + nlmsg_free(rskb); err_cleanup: kfree(reply_data); kfree(req_info); -- cgit v1.2.3 From 596c51ed9e125b12c4d85b4530dfd4c7847634b7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:26 -0700 Subject: ethtool: linkstate: fix unbalanced ethnl_ops_complete() on PHY lookup error linkstate_prepare_data() calls ethnl_req_get_phydev() before ethnl_ops_begin(), but routes its error path through "goto out" which calls ethnl_ops_complete(). Fixes: fe55b1d401c6 ("ethtool: linkstate: migrate linkstate functions to support multi-PHY setups") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/linkstate.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 8a5985fd7712..24569e92942c 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -106,10 +106,8 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); - if (IS_ERR(phydev)) { - ret = PTR_ERR(phydev); - goto out; - } + if (IS_ERR(phydev)) + return PTR_ERR(phydev); ret = ethnl_ops_begin(dev); if (ret < 0) -- cgit v1.2.3 From ab5bf428fb6bd361163c7247b92750d1d24ca2ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:27 -0700 Subject: ethtool: pse-pd: fix missing ethnl_ops_complete() pse_prepare_data() is missing ethnl_ops_complete() if ethnl_req_get_phydev() returned an error. Move getting phydev up so that we don't have to worry about this (similar order to linkstate_prepare_data()). Note that phydev may still be NULL (this is checked in pse_get_pse_attributes()), the goal isn't really to avoid the _begin() / _complete() calls, only to simplify the error handling. While at it propagate the original error. Why this code overrides the error with -ENODEV but !phydev generates -EOPNOTSUPP is unclear to me... Fixes: 31748765bed3 ("net: ethtool: pse-pd: Target the command to the requested PHY") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/pse-pd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2eb9bdc2dcb9..757c9e0cc856 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -62,14 +62,14 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - ret = ethnl_ops_begin(dev); - if (ret < 0) - return ret; - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) - return -ENODEV; + return PTR_ERR(phydev); + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; ret = pse_get_pse_attributes(phydev, info->extack, data); -- cgit v1.2.3 From 6386bd772de64e6760306eb91c7e86163af6c22f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:28 -0700 Subject: ethtool: tsconfig: fix missing ethnl_ops_complete() tsconfig_prepare_data() calls ethnl_ops_begin(), we need to call ethnl_ops_complete() before returning the error. Fixes: 6e9e2eed4f39 ("net: ethtool: Add support for tsconfig command to get/set hwtstamp config") Reviewed-by: Vadim Fedorenko Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260526153533.2779187-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/tsconfig.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index e9db4ee2299d..fc4f93cfa459 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -69,8 +69,10 @@ static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, if (ret) goto out; - if (ts_info.phc_index == -1) - return -ENODEV; + if (ts_info.phc_index == -1) { + ret = -ENODEV; + goto out; + } data->hwprov_desc.index = ts_info.phc_index; data->hwprov_desc.qualifier = ts_info.phc_qualifier; -- cgit v1.2.3 From 1de405699c62c3a9544bcdcfb9eff8a01cfc7582 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:29 -0700 Subject: ethtool: tsinfo: fix uninitialized stats on the by-PHC path tsinfo_prepare_data() has two code paths: a "by-PHC" path for user-specified hardware timestamping providers, and the old path. Commit 89e281ebff72 ("ethtool: init tsinfo stats if requested") added ethtool_stats_init() to mark stat slots as ETHTOOL_STAT_NOT_SET before the driver callback populates them, but placed the call inside the old-path block. When commit b9e3f7dc9ed9 ("net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology") added the by-PHC early return, it landed above the stats initialization. On that path the stats array retains the zero-fill from ethnl_init_reply_data()'s zalloc. This leads to the reply including a stats nest with four zero-valued attributes that should have been absent. Reject GET requests for stats with HWTSTAMP_PROVIDER or dump. Fixes: b9e3f7dc9ed9 ("net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/tsinfo.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index a865f0fdd26b..f54fe6b662b2 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -83,6 +83,11 @@ tsinfo_parse_request(struct ethnl_req_info *req_base, if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) return 0; + if (req_base->flags & ETHTOOL_FLAG_STATS) { + NL_SET_ERR_MSG(extack, "can't query statistics for a provider"); + return -EOPNOTSUPP; + } + return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], &req->hwprov_desc, extack, &mod); } @@ -523,6 +528,12 @@ int ethnl_tsinfo_start(struct netlink_callback *cb) if (ret < 0) goto free_reply_data; + if (req_info->base.flags & ETHTOOL_FLAG_STATS) { + NL_SET_ERR_MSG(cb->extack, "stats not supported in dump"); + ret = -EOPNOTSUPP; + goto err_dev_put; + } + ctx->req_info = req_info; ctx->reply_data = reply_data; ctx->pos_ifindex = 0; @@ -532,6 +543,8 @@ int ethnl_tsinfo_start(struct netlink_callback *cb) return 0; +err_dev_put: + ethnl_parse_header_dev_put(&req_info->base); free_reply_data: kfree(reply_data); free_req_info: -- cgit v1.2.3 From c3fc9976f686f9a95baf87db9d387f218fd65394 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:30 -0700 Subject: ethtool: tsinfo: don't pass ERR_PTR to genlmsg_cancel on prepare failure The goto err label leads to: genlmsg_cancel(skb, ehdr); return ret; If ethnl_tsinfo_prepare_dump() failed, it has not started a genlmsg. There's nothing to cancel, and passing an error pointer to genlmsg_cancel() would cause a crash. Fixes: b9e3f7dc9ed9 ("net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology") Reviewed-by: Maxime Chevallier Reviewed-by: Kory Maincent Link: https://patch.msgid.link/20260526153533.2779187-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/tsinfo.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index f54fe6b662b2..14bf01e3b55c 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -407,10 +407,8 @@ static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, continue; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); - if (IS_ERR(ehdr)) { - ret = PTR_ERR(ehdr); - goto err; - } + if (IS_ERR(ehdr)) + return PTR_ERR(ehdr); reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; ret = ops->get_ts_info(dev, &reply_data->ts_info); -- cgit v1.2.3 From a8d8bef6b45bf7cc0b1f6110c5cd8d0160a9bad7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:31 -0700 Subject: ethtool: strset: fix header attribute index in ethnl_req_get_phydev() strset_prepare_data() passes ETHTOOL_A_HEADER_FLAGS (3) as the header attribute to ethnl_req_get_phydev(). This is incorrect, in the main attr space 3 is ETHTOOL_A_STRSET_COUNTS_ONLY, not the request header attr. The correct constant is ETHTOOL_A_STRSET_HEADER (1). ethnl_req_get_phydev() only uses this value for the extack, so this is not a "functionally visible"(?) bug. Fixes: e96c93aa4be9 ("net: ethtool: strset: Allow querying phy stats by index") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/strset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index bb1e829ba099..94c4718d31ae 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -311,7 +311,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STRSET_HEADER, info->extack); /* phydev can be NULL, check for errors only */ -- cgit v1.2.3 From 2376586f85f972fefe701f095bb37dcfe7405d21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:32 -0700 Subject: ethtool: eeprom: add missing ethnl_ops_begin() / _complete() during fallback All ethtool driver op calls should be sandwiched between ethnl_ops_begin() / ethnl_ops_complete(). In Netlink eeprom code, if the paged access failed we fall back to old API, but we first call _complete() and the fallback never does its own ethnl_ops_begin(). Move the fallback into the _begin() / _complete() section. Fixes: 96d971e307cc ("ethtool: Add fallback to get_module_eeprom from netlink command") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/eeprom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index a557e3996c85..836316df3092 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -141,12 +141,11 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base, return 0; err_ops: + if (ret == -EOPNOTSUPP) + ret = eeprom_fallback(request, reply); ethnl_ops_complete(dev); err_free: kfree(page_data.data); - - if (ret == -EOPNOTSUPP) - return eeprom_fallback(request, reply); return ret; } -- cgit v1.2.3 From 67cfdd9210b99f260b3e0afeb9525e0acc7be31e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 May 2026 08:35:33 -0700 Subject: ethtool: eeprom: add more safeties to EEPROM Netlink fallback The Netlink fallback path for reading module EEPROM (fallback_set_params()) validates that offset < eeprom_len, but does not check that offset + length stays within eeprom_len. The ioctl equivalent (ethtool_get_any_eeprom() in ioctl.c) has always enforced both bounds: if (eeprom.offset + eeprom.len > total_len) return -EINVAL; This could lead to surprises in both drivers and device FW. Add the missing offset + length validation to fallback_set_params(), mirroring the ioctl. Similarly - ethtool core in general, and ethtool_get_any_eeprom() in particular tries to zero-init all buffers passed to the drivers to avoid any extra work of zeroing things out. eeprom_fallback() uses a plain kmalloc(), change it to zalloc. Fixes: 96d971e307cc ("ethtool: Add fallback to get_module_eeprom from netlink command") Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20260526153533.2779187-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/eeprom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 836316df3092..0b8cfeddb014 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -44,6 +44,9 @@ static int fallback_set_params(struct eeprom_req_info *request, if (offset >= modinfo->eeprom_len) return -EINVAL; + if (length > modinfo->eeprom_len - offset) + return -EINVAL; + eeprom->cmd = ETHTOOL_GMODULEEEPROM; eeprom->len = length; eeprom->offset = offset; @@ -69,7 +72,7 @@ static int eeprom_fallback(struct eeprom_req_info *request, if (err < 0) return err; - data = kmalloc(eeprom.len, GFP_KERNEL); + data = kzalloc(eeprom.len, GFP_KERNEL); if (!data) return -ENOMEM; err = ethtool_get_module_eeprom_call(dev, &eeprom, data); -- cgit v1.2.3 From 9d5e7a46a9f6d8f503b41bfefef70659845f1679 Mon Sep 17 00:00:00 2001 From: Rahul Chandelkar Date: Mon, 25 May 2026 21:10:31 +0530 Subject: ipv6: rpl: fix hdrlen overflow in ipv6_rpl_srh_decompress() ipv6_rpl_srh_decompress() computes: outhdr->hdrlen = (((n + 1) * sizeof(struct in6_addr)) >> 3); hdrlen is __u8. For n >= 127 the result exceeds 255 and silently truncates. With n=127 (cmpri=15, cmpre=15, pad=0, hdrlen=16): (128 * 16) >> 3 = 256, truncated to 0 as __u8 The caller in ipv6_rpl_srh_rcv() then places the compressed header at buf + ((ohdr->hdrlen + 1) << 3). With hdrlen=0 this is buf + 8, but the decompressed region occupies buf[0..2055] (8-byte header plus 128 full addresses). The compressed header overlaps the decompressed data, and ipv6_rpl_srh_compress() writes into this overlap, corrupting the routing header of the forwarded packet. The existing guard at exthdrs.c:546 checks (n + 1) > 255, which prevents n+1 from overflowing unsigned char (the segments_left field), but does not prevent the computed hdrlen from overflowing __u8. n=127 passes because 128 <= 255, yet hdrlen=256 does not fit. Tighten the bound to (n + 1) > 127. This caps n at 126, giving hdrlen = (127 * 16) >> 3 = 254, which fits in __u8. The compressed header then lands at buf + ((254 + 1) << 3) = buf + 2040, exactly past the decompressed region (buf[0..2039]). No overlap. 127 segments is well beyond any realistic RPL deployment. Fixes: 8610c7c6e3bd ("net: ipv6: add support for rpl sr exthdr") Signed-off-by: Rahul Chandelkar Link: https://patch.msgid.link/20260525154031.2290876-1-rc@rexion.ai Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index aca2a2abd2df..43f46ef9c53b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -548,7 +548,7 @@ looped_back: * unsigned char which is segments_left field. Should not be * higher than that. */ - if (r || (n + 1) > 255) { + if (r || (n + 1) > 127) { kfree_skb(skb); return -1; } -- cgit v1.2.3 From 4c9ad387aa2d6785299722e54224d34764edaeb3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2026 16:53:54 +0200 Subject: iommu, debugobjects: avoid gcc-16.1 section mismatch warnings gcc-16 has gained some more advanced inter-procedual optimization techniques that enable it to inline the dummy_tlb_add_page() and dummy_tlb_flush() function pointers into a specialized version of __arm_v7s_unmap: WARNING: modpost: vmlinux: section mismatch in reference: __arm_v7s_unmap+0x2cc (section: .text) -> dummy_tlb_add_page (section: .init.text) ERROR: modpost: Section mismatches detected. >From what I can tell, the transformation is correct, as this is only called when __arm_v7s_unmap() is called from arm_v7s_do_selftests(), which is also __init. Since __arm_v7s_unmap() however is not __init, gcc cannot inline the inner function calls directly. In debug_objects_selftest(), the same thing happens. Both the caller and the leaf function are __init, but the IPA pulls it into a non-init one: WARNING: modpost: vmlinux: section mismatch in reference: lookup_object_or_alloc+0x7c (section: .text.lookup_object_or_alloc) -> is_static_object (section: .init.text) Marking the affected functions as not "__init" would reliably avoid this issue but is not a good solution because it removes an otherwise correct annotation. I tried marking the functions as 'noinline', but that ended up not covering all the affected configurations. With some more experimenting, I found that marking these functions as __attribute__((noipa)) is both logical and reliable. In order to keep the syntax readable, add a custom macro for this in include/linux/compiler_attributes.h next to other related macros and use it to annotate both files. Link: https://lore.kernel.org/all/abRB6g-48ZX6Yl2r@willie-the-truck/ Cc: Will Deacon Cc: Thomas Gleixner Cc: Andrew Morton Cc: Miguel Ojeda Cc: linux-kbuild@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Acked-by: Thomas Gleixner Acked-by: Miguel Ojeda Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 18 ++++++++++++------ include/linux/compiler_attributes.h | 11 +++++++++++ lib/debugobjects.c | 2 +- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 40e33257d3c2..1dbef8c55007 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -777,21 +777,27 @@ struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = { static struct io_pgtable_cfg *cfg_cookie __initdata; -static void __init dummy_tlb_flush_all(void *cookie) +/* + * __noipa prevents gcc from turning indirect iommu_flush_ops calls + * into direct calls from a specialized __arm_v7s_unmap() that triggers + * a build time section mismatch assertion. + */ +static __noipa void __init dummy_tlb_flush_all(void *cookie) { WARN_ON(cookie != cfg_cookie); } -static void __init dummy_tlb_flush(unsigned long iova, size_t size, - size_t granule, void *cookie) +static __noipa void __init dummy_tlb_flush(unsigned long iova, size_t size, + size_t granule, void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) +static __noipa void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, + size_t granule, + void *cookie) { dummy_tlb_flush(iova, granule, granule, cookie); } diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index c16d4199bf92..836a50f5917a 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -396,6 +396,17 @@ # define __disable_sanitizer_instrumentation #endif +/* + * Optional: not supported by clang + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Attributes.html#index-noipa + */ +#if __has_attribute(noipa) +# define __noipa __attribute__((noipa)) +#else +# define __noipa +#endif + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 12e2e42e6a31..c93b7ca3e1ab 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1212,7 +1212,7 @@ struct self_test { static __initconst const struct debug_obj_descr descr_type_test; -static bool __init is_static_object(void *addr) +static __noipa bool __init is_static_object(void *addr) { struct self_test *obj = addr; -- cgit v1.2.3 From 8aeb879baf12fe64889f019da9a4f8347c604e91 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 May 2026 11:06:31 +0200 Subject: x86/kvm/vmx: Fix x86_64 CFI build It was missed that idt_do_interrupt_irqoff() gets compiled on x84_64; this is a problem for CFI builds because it includes an unadorned indirect call. It is however completely dead code. Rework things to not emit this function at all. Fixes: 0701c9e17bd9 ("x86/kvm/vmx: Move IRQ/NMI dispatch from KVM into x86 core") Reported-by: Nathan Chancellor Reported-by: Calvin Owens Signed-off-by: Peter Zijlstra (Intel) Tested-by: Nathan Chancellor Link: https://patch.msgid.link/20260526090631.GA4149641@noisy.programming.kicks-ass.net --- arch/x86/entry/common.c | 2 +- arch/x86/entry/entry.S | 2 ++ arch/x86/kernel/idt.c | 12 ++---------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 06c7c6ebd6f9..14cd43d4da6c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -55,7 +55,7 @@ noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector) * The FRED NMI context is significantly different and will not work * right (specifically FRED fixed the NMI recursion issue). */ - idt_entry_from_kvm(vector); + idt_do_nmi_irqoff(); } EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm); #endif diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index a56e043b266d..2bc217bb5475 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -109,11 +109,13 @@ EXPORT_SYMBOL(__ref_stack_chk_guard); RET .endm +#ifndef CONFIG_X86_64 .pushsection .text, "ax" SYM_FUNC_START(idt_do_interrupt_irqoff) IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 SYM_FUNC_END(idt_do_interrupt_irqoff) .popsection +#endif .pushsection .noinstr.text, "ax" SYM_FUNC_START(idt_do_nmi_irqoff) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 7bcf1decc034..90a22e24a9eb 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -268,18 +268,10 @@ void __init idt_setup_early_pf(void) } #endif -#if IS_ENABLED(CONFIG_KVM_INTEL) -noinstr void idt_entry_from_kvm(unsigned int vector) +#if IS_ENABLED(CONFIG_KVM_INTEL) && !defined(CONFIG_X86_64) +void idt_entry_from_kvm(unsigned int vector) { - if (vector == NMI_VECTOR) - return idt_do_nmi_irqoff(); - - /* - * Only the NMI path requires noinstr. - */ - instrumentation_begin(); idt_do_interrupt_irqoff(gate_offset(idt_table + vector)); - instrumentation_end(); } #endif -- cgit v1.2.3 From 98b34f3e8c3492cfc89ff943c9d92b4d52863d1d Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 25 May 2026 08:25:48 -0400 Subject: net: Introduce skb tc depth field to track packet loops Add a 2-bit per-skb tc depth field to track packet loops across the stack. The previous per-CPU loop counters like MIRRED_NEST_LIMIT assume a single call stack and lose state in two cases: 1) When a packet is queued and reprocessed later (e.g., egress->ingress via backlog), the per-cpu state is gone by the time it is dequeued. 2) With XPS/RPS a packet may arrive on one CPU and be processed on another. A per-skb field solves both by travelling with the packet itself. The field fits in existing padding, using 2 bits that were previously a hole: pahole before(-) and after (+) diff looks like: __u8 slow_gro:1; /* 132: 3 1 */ __u8 csum_not_inet:1; /* 132: 4 1 */ __u8 unreadable:1; /* 132: 5 1 */ + __u8 tc_depth:2; /* 132: 6 1 */ - /* XXX 2 bits hole, try to pack */ /* XXX 1 byte hole, try to pack */ __u16 tc_index; /* 134 2 */ There used to be a ttl field which was removed as part of tc_verd in commit aec745e2c520 ("net-tc: remove unused tc_verd fields"). It was already unused by that time, due to remove earlier in commit c19ae86a510c ("tc: remove unused redirect ttl"). The first user of this field is netem, which increments tc_depth on duplicated packets before re-enqueueing them at the root qdisc. On re-entry, netem skips duplication for any skb with tc_depth already set, bounding recursion to a single level regardless of tree topology. The other user is mirred which increments it on each pass and limits to depth to MIRRED_DEFER_LIMIT (3). The new field was called ttl in earlier versions of this patch but renamed to tc_depth to avoid confusion with IP ttl. Note (looking at you Sashiko! Dont ignore me and continue bringing this up): 1. Since both mirred and netem utilize the same 2-bit tc_depth field it is possible when netem and mirred are used together that netem qdisc to skip the duplication step. This is a known trade-off, as a 2-bit field cannot independently track both features' recursion depths and it is not considered sane to have a setup that addresses both features on at the same time. 2. skb_scrub_packet does not clear tc_depth. This means a packet's loop history is preserved even across namespaces. While this might be restrictive for some topologies, it is also design intent to provide robustness against loops across namespaces. Reviewed-by: Stephen Hemminger Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260525122556.973584-2-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2bcf78a4de7b..3f06254ab1b7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -821,6 +821,7 @@ enum skb_tstamp_type { * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @skb_iif: ifindex of device we arrived on + * @tc_depth: counter for packet duplication * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices @@ -1030,6 +1031,7 @@ struct sk_buff { __u8 csum_not_inet:1; #endif __u8 unreadable:1; + __u8 tc_depth:2; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif -- cgit v1.2.3 From eda0b7f203bb166c98d1418b204135bd566ac83b Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 25 May 2026 08:25:49 -0400 Subject: net/sched: Revert "net/sched: Restrict conditions for adding duplicating netems to qdisc tree" This reverts commit ec8e0e3d7adef940cdf9475e2352c0680189d14e. The original patch rejects any tree containing two netems when either has duplication set, even when they sit on unrelated classes of the same classful parent. That broke configurations that have worked since netem was introduced. The re-entrancy problem the original commit was trying to solve is handled by later patch using tc_depth flag. Doing this revert will (re)expose the original bug with multiple netem duplication. When this patch is backported make sure and get the full series. Fixes: ec8e0e3d7ade ("net/sched: Restrict conditions for adding duplicating netems to qdisc tree") Reported-by: Ji-Soo Chung Reported-by: Gerlinde Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220774 Reported-by: zyc zyc Closes: https://lore.kernel.org/all/19adda5a1e2.12410b78222774.9191120410578703463@zohomail.cn/ Reported-by: Manas Ghandat Closes: https://lore.kernel.org/netdev/f69b2c8f-8325-4c2e-a011-6dbc089f30e4@gmail.com/ Reviewed-by: Stephen Hemminger Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260525122556.973584-3-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/sch_netem.c | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bc18e1976b6e..d97acd2f3923 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1007,41 +1007,6 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } -static const struct Qdisc_class_ops netem_class_ops; - -static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, - struct netlink_ext_ack *extack) -{ - struct Qdisc *root, *q; - unsigned int i; - - root = qdisc_root_sleeping(sch); - - if (sch != root && root->ops->cl_ops == &netem_class_ops) { - if (duplicates || - ((struct netem_sched_data *)qdisc_priv(root))->duplicate) - goto err; - } - - if (!qdisc_dev(root)) - return 0; - - hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { - if (sch != q && q->ops->cl_ops == &netem_class_ops) { - if (duplicates || - ((struct netem_sched_data *)qdisc_priv(q))->duplicate) - goto err; - } - } - - return 0; - -err: - NL_SET_ERR_MSG(extack, - "netem: cannot mix duplicating netems with other netems in tree"); - return -EINVAL; -} - /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) @@ -1118,11 +1083,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; - - ret = check_netem_in_tree(sch, qopt->duplicate, extack); - if (ret) - goto unlock; - q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. -- cgit v1.2.3 From b213a4c6074fc4ee4f1cdef9a73b34732606b637 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 25 May 2026 08:25:50 -0400 Subject: Revert "selftests/tc-testing: Add tests for restrictions on netem duplication" This reverts commit ecdec65ec78d67d3ebd17edc88b88312054abe0d. The tests added were related to check_netem_in_tree() which was just reverted in the previous patch. Reviewed-by: Stephen Hemminger Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260525122556.973584-4-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 5 +- .../tc-testing/tc-tests/qdiscs/netem.json | 81 ---------------------- 2 files changed, 3 insertions(+), 83 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 848696c373fc..82c38a13dfbf 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -702,6 +702,7 @@ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", @@ -714,8 +715,8 @@ { "kind": "hfsc", "handle": "1:", - "bytes": 294, - "packets": 3 + "bytes": 392, + "packets": 4 } ], "matchCount": "1", diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 718d2df2aafa..3c4444961488 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -336,86 +336,5 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] - }, - { - "id": "d34d", - "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", - "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" - ], - "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "b33f", - "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", - "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" - ], - "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "2", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "cafe", - "name": "NETEM test qdisc duplication restriction in qdisc tree", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] - }, - { - "id": "1337", - "name": "NETEM test qdisc duplication restriction in qdisc tree across branches", - "category": ["qdisc", "netem"], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc", - "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit", - "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem", - "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit" - ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", - "expExitCode": "2", - "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc netem", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DUMMY handle 1:0 root" - ] } ] -- cgit v1.2.3 From 9552b11e3edabc97cfcd9f29103d5afbce7ae183 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 25 May 2026 08:25:51 -0400 Subject: net/sched: fix packet loop on netem when duplicate is on When netem duplicates a packet it re-enqueues the copy at the root qdisc. If another netem sits in the tree the copy can be duplicated again, recursing until the stack or memory is exhausted. The original duplication guard temporarily zeroed q->duplicate around the re-enqueue, but that does not cover all cases because it is per-qdisc state shared across all concurrent enqueue paths and is not safe without additional locking. Use the skb tc_depth field introduced in an earlier patch: - increment it on the duplicate before re-enqueue - skip duplication for any skb whose tc_depth is already non-zero. This marks the packet itself rather than mutating qdisc state, therefore it is safe regardless of tree topology or concurrency. Fixes: 0afb51e72855 ("[PKT_SCHED]: netem: reinsert for duplication") Reported-by: William Liu Reported-by: Savino Dicanosa Closes: https://lore.kernel.org/netdev/8DuRWwfqjoRDLDmBMlIfbrsZg9Gx50DHJc1ilxsEBNe2D6NMoigR_eIRIG0LOjMc3r10nUUZtArXx4oZBIdUfZQrwjcQhdinnMis_0G7VEk=@willsroot.io/ Co-developed-by: Victor Nogueira Signed-off-by: Victor Nogueira Reviewed-by: William Liu Reviewed-by: Stephen Hemminger Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260525122556.973584-5-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/sch_netem.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d97acd2f3923..17a79fe2f091 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -461,7 +461,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb->prev = NULL; /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) + if (q->duplicate && skb->tc_depth == 0 && + q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) ++count; /* Drop packet? */ @@ -540,11 +541,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, */ if (skb2) { struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - q->duplicate = 0; + skb2->tc_depth++; /* prevent duplicating a dup... */ rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; skb2 = NULL; } -- cgit v1.2.3 From db875221ab08d213a83bf30196ae8b64d55a3403 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 25 May 2026 08:25:52 -0400 Subject: net/sched: Fix ethx:ingress -> ethy:egress -> ethx:ingress mirred loop When mirred redirects to ingress (from either ingress or egress) the loop state from sched_mirred_dev array dev is lost because of 1) the packet deferral into the backlog and 2) the fact the sched_mirred_dev array is cleared. In such cases, if there was a loop we won't discover it. Here's a simple test to reproduce: ip a add dev port0 10.10.10.11/24 tc qdisc add dev port0 clsact tc filter add dev port0 egress protocol ip \ prio 10 matchall action mirred ingress redirect dev port1 tc qdisc add dev port1 clsact tc filter add dev port1 ingress protocol ip \ prio 10 matchall action mirred egress redirect dev port0 ping -c 1 -W0.01 10.10.10.10 Fixes: fe946a751d9b ("net/sched: act_mirred: add loop detection") Tested-by: Victor Nogueira Reviewed-by: Stephen Hemminger Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260525122556.973584-6-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/act_mirred.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2c5a7a321a94..dd5e7ea7ef26 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -26,6 +26,10 @@ #include #include +#define MIRRED_DEFER_LIMIT 3 +_Static_assert(MIRRED_DEFER_LIMIT <= 3, + "MIRRED_DEFER_LIMIT exceeds tc_depth bitfield width"); + static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -234,12 +238,15 @@ tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; - if (!want_ingress) + if (!want_ingress) { err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (!at_ingress) - err = netif_rx(skb); - else - err = netif_receive_skb(skb); + } else { + skb->tc_depth++; + if (!at_ingress) + err = netif_rx(skb); + else + err = netif_receive_skb(skb); + } return err; } @@ -426,6 +433,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, struct netdev_xmit *xmit; bool m_mac_header_xmit; struct net_device *dev; + bool want_ingress; int i, m_eaction; u32 blockid; @@ -434,7 +442,8 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, #else xmit = this_cpu_ptr(&softnet_data.xmit); #endif - if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT)) { + if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT || + skb->tc_depth >= MIRRED_DEFER_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); return TC_ACT_SHOT; @@ -453,23 +462,27 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, tcf_action_inc_overlimit_qstats(&m->common); return retval; } - for (i = 0; i < xmit->sched_mirred_nest; i++) { - if (xmit->sched_mirred_dev[i] != dev) - continue; - pr_notice_once("tc mirred: loop on device %s\n", - netdev_name(dev)); - tcf_action_inc_overlimit_qstats(&m->common); - return retval; - } - xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; + m_eaction = READ_ONCE(m->tcfm_eaction); + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (!want_ingress) { + for (i = 0; i < xmit->sched_mirred_nest; i++) { + if (xmit->sched_mirred_dev[i] != dev) + continue; + pr_notice_once("tc mirred: loop on device %s\n", + netdev_name(dev)); + tcf_action_inc_overlimit_qstats(&m->common); + return retval; + } + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; + } m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); - xmit->sched_mirred_nest--; + if (!want_ingress) + xmit->sched_mirred_nest--; return retval; } -- cgit v1.2.3 From a005fa5d7502eefec7ee6e1c01adadc06de2f9ad Mon Sep 17 00:00:00 2001 From: "Kito Xu (veritas501)" Date: Mon, 25 May 2026 08:25:53 -0400 Subject: net/sched: act_mirred: Fix blockcast recursion bypass leading to stack overflow tcf_mirred_act() checks sched_mirred_nest against MIRRED_NEST_LIMIT (4) to prevent deep recursion. However, when the action uses blockcast (tcfm_blockid != 0), the function returns at the tcf_blockcast() call BEFORE reaching the counter increment. As a result, the recursion counter never advances and the limit check is entirely bypassed. When two devices share a TC egress block with a mirred blockcast rule, a packet egressing on device A is mirrored to device B via blockcast; device B's egress TC re-enters tcf_mirred_act() via blockcast and mirrors back to A, creating an unbounded recursion loop: tcf_mirred_act -> tcf_blockcast -> tcf_mirred_to_dev -> dev_queue_xmit -> sch_handle_egress -> tcf_classify -> tcf_mirred_act -> (repeat) This recursion continues until the kernel stack overflows. The bug is reachable from an unprivileged user via unshare(CLONE_NEWUSER | CLONE_NEWNET): user namespaces grant CAP_NET_ADMIN in the new network namespace, which is sufficient to create dummy devices, attach clsact qdiscs with shared blocks, and install mirred blockcast filters. BUG: TASK stack guard page was hit at ffffc90000b7fff8 Oops: stack guard page: 0000 [#1] SMP KASAN NOPTI CPU: 2 UID: 1000 PID: 169 Comm: poc Not tainted 7.0.0-rc7-next-20260410 RIP: 0010:xas_find+0x17/0x480 Call Trace: xa_find+0x17b/0x1d0 tcf_mirred_act+0x640/0x1060 tcf_action_exec+0x400/0x530 basic_classify+0x128/0x1d0 tcf_classify+0xd83/0x1150 tc_run+0x328/0x620 __dev_queue_xmit+0x797/0x3100 tcf_mirred_to_dev+0x7b1/0xf70 tcf_mirred_act+0x68a/0x1060 [repeating ~30+ times until stack overflow] Kernel panic - not syncing: Fatal exception in interrupt Fix this by incrementing sched_mirred_nest before calling tcf_blockcast() and decrementing it on return, mirroring the non-blockcast path. This ensures subsequent recursive entries see the updated counter and are correctly limited by MIRRED_NEST_LIMIT. Fixes: fe946a751d9b ("net/sched: act_mirred: add loop detection") Signed-off-by: Kito Xu (veritas501) Link: https://patch.msgid.link/20260525122556.973584-7-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/act_mirred.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index dd5e7ea7ef26..dbe4a4ff3e08 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -396,14 +396,12 @@ static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, const u32 blockid, struct tcf_result *res, - int retval) + int m_eaction, int retval) { const u32 exception_ifindex = skb->dev->ifindex; struct tcf_block *block; bool is_redirect; - int m_eaction; - m_eaction = READ_ONCE(m->tcfm_eaction); is_redirect = tcf_mirred_is_act_redirect(m_eaction); /* we are already under rcu protection, so can call block lookup @@ -453,8 +451,16 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); - if (blockid) - return tcf_blockcast(skb, m, blockid, res, retval); + m_eaction = READ_ONCE(m->tcfm_eaction); + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (blockid) { + if (!want_ingress) + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = NULL; + retval = tcf_blockcast(skb, m, blockid, res, m_eaction, retval); + if (!want_ingress) + xmit->sched_mirred_nest--; + return retval; + } dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { @@ -463,8 +469,6 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, return retval; } - m_eaction = READ_ONCE(m->tcfm_eaction); - want_ingress = tcf_mirred_act_wants_ingress(m_eaction); if (!want_ingress) { for (i = 0; i < xmit->sched_mirred_nest; i++) { if (xmit->sched_mirred_dev[i] != dev) -- cgit v1.2.3 From e80ad525fc7e8c933ad78478c5dda286cfd55c60 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Mon, 25 May 2026 08:25:54 -0400 Subject: net/sched: act_mirred: Fix return code in early mirred redirect error paths Since retval is set as TC_ACT_STOLEN in the mirred redirect case, returning retval in cases where redirect failed will make the callers not register the skb as being dropped. Fix this by returning TC_ACT_SHOT instead in such scenarios. Fixes: 16085e48cb48 ("net/sched: act_mirred: Create function tcf_mirred_to_dev and improve readability") Reported-by: Sashiko Closes: https://sashiko.dev/#/patchset/20260413082027.2244884-1-hxzene%40gmail.com Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260525122556.973584-8-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/act_mirred.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index dbe4a4ff3e08..553342c55cf7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -372,7 +372,8 @@ assign_prev: dev_is_mac_header_xmit(dev_prev), m_eaction, retval); - return retval; + /* If the packet wasn't redirected, we have to register as a drop */ + return TC_ACT_SHOT; } static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, @@ -410,7 +411,7 @@ static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, block = tcf_block_lookup(dev_net(skb->dev), blockid); if (!block || xa_empty(&block->ports)) { tcf_action_inc_overlimit_qstats(&m->common); - return retval; + return is_redirect ? TC_ACT_SHOT : retval; } if (is_redirect) @@ -428,8 +429,8 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); + bool m_mac_header_xmit, is_redirect; struct netdev_xmit *xmit; - bool m_mac_header_xmit; struct net_device *dev; bool want_ingress; int i, m_eaction; @@ -462,11 +463,13 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, return retval; } + is_redirect = tcf_mirred_is_act_redirect(m_eaction); + dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); - return retval; + goto err_out; } if (!want_ingress) { @@ -476,7 +479,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, pr_notice_once("tc mirred: loop on device %s\n", netdev_name(dev)); tcf_action_inc_overlimit_qstats(&m->common); - return retval; + goto err_out; } xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; } @@ -489,6 +492,11 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, xmit->sched_mirred_nest--; return retval; + +err_out: + if (is_redirect) + retval = TC_ACT_SHOT; + return retval; } static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, -- cgit v1.2.3 From d38dc56a0225664e494221b5b251931b35d125ef Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Mon, 25 May 2026 08:25:55 -0400 Subject: selftests/tc-testing: Add mirred test cases exercising loops Add mirred loop test cases to validate that those will be caught and other test cases that were previously misinterpreted as loops by mirred. This commit adds 12 test cases: - Redirect multiport: dummy egress -> dev1 ingress -> dummy egress (Loop) - Redirect singleport: dev1 ingress -> dev1 egress -> dev1 ingress (Loop) - Redirect multiport: dev1 ingress -> dummy ingress -> dev1 egress (No Loop) - Redirect multiport: dev1 ingress -> dummy ingress -> dev1 ingress (Loop) - Redirect multiport: dev1 ingress -> dummy egress -> dev1 ingress (Loop) - Redirect multiport: dummy egress -> dev1 ingress -> dummy egress, different prios (Loop) - Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress -> dev1 egress (No Loop) - Redirect multiport: dev1 ingress -> dummy egress -> dev1 egress (No Loop) - Redirect multiport: dev1 ingress -> dummy egress -> dummy ingress (No Loop) - Redirect singleport: dev1 ingress -> dev1 ingress (Loop) - Redirect singleport: dummy egress -> dummy ingress (No Loop) - Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress (No Loop) Acked-by: Jamal Hadi Salim Acked-by: Stephen Hemminger Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260525122556.973584-9-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/actions/mirred.json | 616 ++++++++++++++++++++- 1 file changed, 615 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b056eb966871..d0cad6571691 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -1144,6 +1144,620 @@ "teardown": [ "$TC qdisc del dev $DUMMY clsact" ] + }, + { + "id": "531c", + "name": "Redirect multiport: dummy egress -> dev1 ingress -> dummy egress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact", + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "b1d7", + "name": "Redirect singleport: dev1 ingress -> dev1 egress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DEV1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 egress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "c66d", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "aa99", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 2, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "37d7", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "6d02", + "name": "Redirect multiport: dummy egress -> dev1 ingress -> dummy egress, different prios (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 3 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact", + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "8115", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 12 matchall action mirred egress redirect dev $DEV1 index 3", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "9eb3", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dev1 egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred egress redirect dev $DEV1 index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "d837", + "name": "Redirect multiport: dev1 ingress -> dummy egress -> dummy ingress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DUMMY index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "2071", + "name": "Redirect singleport: dev1 ingress -> dev1 ingress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DEV1 index 1", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] + }, + { + "id": "0101", + "name": "Redirect singleport: dummy egress -> dummy ingress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY clsact", + "$TC filter add dev $DUMMY egress protocol ip prio 11 matchall action mirred ingress redirect dev $DUMMY index 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] + }, + { + "id": "cf97", + "name": "Redirect multiport: dev1 ingress -> dummy ingress -> dummy egress (No Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip prio 10 matchall action mirred ingress redirect dev $DUMMY index 1", + "$TC qdisc add dev $DUMMY clsact" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY ingress protocol ip prio 11 matchall action mirred egress redirect dev $DUMMY index 2", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "index": 1, + "stats": { + "packets": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DEV1 clsact", + "$TC qdisc del dev $DUMMY clsact" + ] } - ] -- cgit v1.2.3 From 0f6e00aa5f652f5653e0039b9c9a8835f4b4174b Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Mon, 25 May 2026 08:25:56 -0400 Subject: selftests/tc-testing: Add netem test case exercising loops Add a netem nested duplicate test case to validate that it won't cause an infinite loop Acked-by: Jamal Hadi Salim Acked-by: Stephen Hemminger Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260525122556.973584-10-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/qdiscs/netem.json | 33 +++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 3c4444961488..472b672a600d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -336,5 +336,36 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] - } + }, + { + "id": "8c17", + "name": "Test netem's recursive duplicate", + "category": [ + "qdisc", + "netem" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1000 duplicate 100%", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1000 duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 10.10.11.11 -W 0.01", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "netem", + "handle": "1:", + "bytes": 294, + "packets": 3 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + } ] -- cgit v1.2.3 From 463a1271aa26eac992851b9d98cc75bc3cd4a1ed Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 25 May 2026 22:45:24 +0800 Subject: net: hibmcge: disable Relaxed Ordering to fix RX packet corruption When SMMU is disabled, the hibmcge driver may receive corrupted packets. The hardware writes packet data and descriptors to the same page, but with Relaxed Ordering enabled, PCI write transactions may not be strictly ordered. This can cause the driver to observe a valid descriptor before the corresponding packet data is fully written. Fix this by clearing PCI_EXP_DEVCTL_RELAX_EN in the PCI bridge control register to ensure strict write ordering between packet data and descriptors. Fixes: f72e25594061 ("net: hibmcge: Implement rx_poll function to receive packets") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20260525144525.94884-2-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 068da2fd1fea..f721e9893804 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -420,6 +420,9 @@ static int hbg_pci_init(struct pci_dev *pdev) return -ENOMEM; pci_set_master(pdev); + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN); + pci_save_state(pdev); return 0; } -- cgit v1.2.3 From b545b6ea1802b32436fa97f1d2918718212cc831 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 25 May 2026 22:45:25 +0800 Subject: net: hibmcge: move dma_rmb() after dma_sync_single_for_cpu() in RX path The dma_rmb() barrier was placed before dma_sync_single_for_cpu(), which is incorrect. DMA sync must complete first to make the buffer accessible to the CPU, then the rmb barrier ensures subsequent descriptor reads observe the latest data written by the hardware. Reorder the operations so dma_sync_single_for_cpu() is called before dma_rmb() to guarantee the driver reads consistent data from the DMA buffer. Fixes: f72e25594061 ("net: hibmcge: Implement rx_poll function to receive packets") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20260525144525.94884-3-shaojijie@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c index a4ea92c31c2f..0ae314994676 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c @@ -452,12 +452,12 @@ static bool hbg_sync_data_from_hw(struct hbg_priv *priv, { struct hbg_rx_desc *rx_desc; - /* make sure HW write desc complete */ - dma_rmb(); - dma_sync_single_for_cpu(&priv->pdev->dev, buffer->page_dma, buffer->page_size, DMA_FROM_DEVICE); + /* make sure HW write desc complete */ + dma_rmb(); + rx_desc = (struct hbg_rx_desc *)buffer->page_addr; return FIELD_GET(HBG_RX_DESC_W2_PKT_LEN_M, rx_desc->word2) != 0; } -- cgit v1.2.3 From 98d0912e9f841e5529a5b89a972805f34cb1c69d Mon Sep 17 00:00:00 2001 From: Minh Nguyen Date: Tue, 26 May 2026 11:12:39 +0700 Subject: net: skbuff: fix missing zerocopy reference in pskb_carve helpers pskb_carve_inside_header() and pskb_carve_inside_nonlinear() both copy the old skb_shared_info header into a new buffer via memcpy(), which includes the destructor_arg pointer (uarg) for MSG_ZEROCOPY skbs. Neither function calls net_zcopy_get() for the new shinfo, creating an unaccounted holder: every skb_shared_info with destructor_arg set will call skb_zcopy_clear() once when freed, but the corresponding net_zcopy_get() was never called for the new copy. Repeated calls drive uarg->refcnt to zero prematurely, freeing ubuf_info_msgzc while TX skbs still hold live destructor_arg pointers. KASAN reports use-after-free on a freed ubuf_info_msgzc: BUG: KASAN: slab-use-after-free in skb_release_data+0x77b/0x810 Read of size 8 at addr ffff88801574d3e8 by task poc/220 Call Trace: skb_release_data+0x77b/0x810 kfree_skb_list_reason+0x13e/0x610 skb_release_data+0x4cd/0x810 sk_skb_reason_drop+0xf3/0x340 skb_queue_purge_reason+0x282/0x440 rds_tcp_inc_free+0x1e/0x30 rds_recvmsg+0x354/0x1780 __sys_recvmsg+0xdf/0x180 Allocated by task 219: msg_zerocopy_realloc+0x157/0x7b0 tcp_sendmsg_locked+0x2892/0x3ba0 Freed by task 219: ip_recv_error+0x74a/0xb10 tcp_recvmsg+0x475/0x530 The skb consuming the late access still referenced the same uarg via shinfo->destructor_arg copied by pskb_carve_inside_nonlinear() without a refcount bump. This has been verified to be reliably exploitable: a working proof-of-concept achieves full root privilege escalation from an unprivileged local user on a default kernel configuration. The fix follows the pattern of pskb_expand_head() which has the same memcpy/cloned structure. For pskb_carve_inside_header(), net_zcopy_get() is placed after skb_orphan_frags() succeeds, so the orphan error path needs no cleanup. For pskb_carve_inside_nonlinear(), net_zcopy_get() is placed after all failure points and just before skb_release_data(), so no error path needs cleanup at all -- matching pskb_expand_head() more closely and avoiding the need for a balancing net_zcopy_put(). Fixes: 6fa01ccd8830 ("skbuff: Add pskb_extract() helper function") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-sonnet-4-6 Signed-off-by: Minh Nguyen Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260526041240.329462-1-minhnguyen.080505@gmail.com Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d247acd447e4..0d3cc115f2e7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6833,6 +6833,8 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_kfree_head(data); return -ENOMEM; } + if (skb_zcopy(skb)) + net_zcopy_get(skb_zcopy(skb)); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) @@ -6976,6 +6978,8 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_kfree_head(data); return -ENOMEM; } + if (skb_zcopy(skb)) + net_zcopy_get(skb_zcopy(skb)); skb_release_data(skb, SKB_CONSUMED); skb->head = data; -- cgit v1.2.3 From cc993e0927ec8bd98ea33377ada03295fcda0f24 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:15 -0400 Subject: net/handshake: Use spin_lock_bh for hn_lock nvmet_tcp_state_change(), a socket callback that runs in BH context, can reach handshake_req_cancel() via nvmet_tcp_schedule_release_queue() and tls_handshake_cancel(). handshake_req_cancel() acquires hn->hn_lock with plain spin_lock(). If a process-context thread on the same CPU holds hn->hn_lock when a softirq invokes the cancel path, the lock attempt deadlocks. This is the only caller that invokes tls_handshake_cancel() from BH context; every other consumer calls it from process context. Deferring the cancel to process context in the NVMe target is not straightforward: nvmet_tcp_schedule_release_queue() must call tls_handshake_cancel() atomically with its state transition to DISCONNECTING. If the cancel were deferred, the handshake completion callback could fire in the window before the cancel runs, observe the unexpected state, and return without dropping its kref on the queue. Reworking that interlock is considerably more invasive than hardening the handshake lock. Convert all hn->hn_lock acquisitions from spin_lock/spin_unlock to spin_lock_bh/spin_unlock_bh so the lock is never taken with softirqs enabled. Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-1-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- net/handshake/netlink.c | 4 ++-- net/handshake/request.c | 14 +++++++------- net/handshake/tlshd.c | 2 ++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index b989456fc4c5..97114ec8027a 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -202,10 +202,10 @@ static void __net_exit handshake_net_exit(struct net *net) * accepted and are in progress will be destroyed when * the socket is closed. */ - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); list_splice_init(&requests, &hn->hn_requests); - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); while (!list_empty(&requests)) { req = list_first_entry(&requests, struct handshake_req, hr_list); diff --git a/net/handshake/request.c b/net/handshake/request.c index 2829adbeb149..5d4a17f902d2 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -167,12 +167,12 @@ static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) { bool ret = false; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); if (!list_empty(&req->hr_list)) { __remove_pending_locked(hn, req); ret = true; } - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); return ret; } @@ -182,7 +182,7 @@ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) struct handshake_req *req, *pos; req = NULL; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); list_for_each_entry(pos, &hn->hn_requests, hr_list) { if (pos->hr_proto->hp_handler_class != class) continue; @@ -190,7 +190,7 @@ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) req = pos; break; } - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); return req; } @@ -249,7 +249,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max) goto out_err; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); ret = -EOPNOTSUPP; if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags)) goto out_unlock; @@ -258,7 +258,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_unlock; if (!__add_pending_locked(hn, req)) goto out_unlock; - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); ret = handshake_genl_notify(net, req->hr_proto, flags); if (ret) { @@ -274,7 +274,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, return 0; out_unlock: - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); out_err: /* Restore original destructor so socket teardown still runs on failure */ req->hr_sk->sk_destruct = req->hr_odestruct; diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 8f9532a15f43..af294c6cc717 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -425,6 +425,8 @@ EXPORT_SYMBOL(tls_server_hello_psk); * Request cancellation races with request completion. To determine * who won, callers examine the return value from this function. * + * Context: May be called from process or softirq context. + * * Return values: * %true - Uncompleted handshake request was canceled * %false - Handshake request already completed or not found -- cgit v1.2.3 From 9015985b5eb1a90eb86caf5bce1dfcf1aa38f8ad Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:16 -0400 Subject: nvme-tcp: store negative errno in queue->tls_err nvme_tcp_tls_done() assigns queue->tls_err in three branches. The ENOKEY lookup failure and the EOPNOTSUPP initializer both store negative errnos. The third branch, reached when the handshake layer reports a non-zero status, stores -status. The handshake layer delivers status to the consumer callback as a negative errno; the other in-tree consumers -- xs_tls_handshake_done() and the nvmet target callback -- treat their status argument that way. The extra negation in nvme_tcp_tls_done() flips the sign, leaving tls_err as a positive value (for instance, +EIO), which nvme_tcp_start_tls() then returns to its caller. Drop the extra negation so queue->tls_err uniformly carries a negative errno on failure. Fixes: be8e82caa685 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Reviewed-by: Alistair Francis Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-2-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 15d36d6a728e..68a1d7640494 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1702,7 +1702,7 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) qid, pskid, status); if (status) { - queue->tls_err = -status; + queue->tls_err = status; goto out_complete; } -- cgit v1.2.3 From 6b22d433aa13f68e3cd9534ca9a5f4277bfa01c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:17 -0400 Subject: net/handshake: Pass negative errno through handshake_complete() handshake_complete() declares status as unsigned int and tls_handshake_done() negates that value (-status) before handing it to the TLS consumer. Consumers match on negative errno constants -- xs_tls_handshake_done() has switch (status) { case 0: case -EACCES: case -ETIMEDOUT: lower_transport->xprt_err = status; break; default: lower_transport->xprt_err = -EACCES; } so the API as designed expects callers to pass positive errno values that the tlshd shim then negates. Three internal callers in handshake_nl_accept_doit(), the net-exit drain, and a kunit test follow kernel convention and pass negative errnos -- -EIO, -ETIMEDOUT, -ETIMEDOUT. The implicit conversion to unsigned int turns -ETIMEDOUT into 0xFFFFFF92; the subsequent -status in tls_handshake_done() wraps back to 110, the consumer's switch falls through, and the xprt reports -EACCES on what should be -ETIMEDOUT or -EIO. Fix the API rather than the call sites. The natural kernel convention is negative errno in, negative errno out. Change handshake_complete() and hp_done to take int status, drop the negation in tls_handshake_done(), and negate once in handshake_nl_done_doit() where status arrives from the wire as an unsigned netlink attribute. The three internal callers were already correct under that convention and need no change. At the same wire boundary, declare MAX_ERRNO as the netlink policy upper bound for HANDSHAKE_A_DONE_STATUS. Attribute validation rejects out-of-range values before handshake_nl_done_doit() runs, and negating a bounded u32 there stays within int range -- closing the UBSAN-visible signed- integer overflow that an unconstrained u32 would invoke. Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-3-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/handshake.yaml | 8 ++++++++ net/handshake/genl.c | 3 ++- net/handshake/genl.h | 1 + net/handshake/handshake-test.c | 2 +- net/handshake/handshake.h | 4 ++-- net/handshake/netlink.c | 2 +- net/handshake/request.c | 2 +- net/handshake/tlshd.c | 4 ++-- 8 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml index 95c3fade7a8d..1024297b3851 100644 --- a/Documentation/netlink/specs/handshake.yaml +++ b/Documentation/netlink/specs/handshake.yaml @@ -12,6 +12,12 @@ protocol: genetlink doc: Netlink protocol to request a transport layer security handshake. definitions: + - + type: const + name: max-errno + value: 4095 + header: linux/err.h + scope: kernel - type: enum name: handler-class @@ -80,6 +86,8 @@ attribute-sets: - name: status type: u32 + checks: + max: max-errno - name: sockfd type: s32 diff --git a/net/handshake/genl.c b/net/handshake/genl.c index 870612609491..4b20cd9cdd0e 100644 --- a/net/handshake/genl.c +++ b/net/handshake/genl.c @@ -10,6 +10,7 @@ #include "genl.h" #include +#include /* HANDSHAKE_CMD_ACCEPT - do */ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = { @@ -18,7 +19,7 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN /* HANDSHAKE_CMD_DONE - do */ static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = { - [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_STATUS] = NLA_POLICY_MAX(NLA_U32, MAX_ERRNO), [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, }, [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, }, }; diff --git a/net/handshake/genl.h b/net/handshake/genl.h index 8d3e18672daf..46b65f131669 100644 --- a/net/handshake/genl.h +++ b/net/handshake/genl.h @@ -11,6 +11,7 @@ #include #include +#include int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info); int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 55442b2f518a..df3948e807a0 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -25,7 +25,7 @@ static int test_accept_func(struct handshake_req *req, struct genl_info *info, return 0; } -static void test_done_func(struct handshake_req *req, unsigned int status, +static void test_done_func(struct handshake_req *req, int status, struct genl_info *info) { } diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index a48163765a7a..2289b0e274f4 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -57,7 +57,7 @@ struct handshake_proto { int (*hp_accept)(struct handshake_req *req, struct genl_info *info, int fd); void (*hp_done)(struct handshake_req *req, - unsigned int status, + int status, struct genl_info *info); void (*hp_destroy)(struct handshake_req *req); }; @@ -86,7 +86,7 @@ struct handshake_req *handshake_req_hash_lookup(struct sock *sk); struct handshake_req *handshake_req_next(struct handshake_net *hn, int class); int handshake_req_submit(struct socket *sock, struct handshake_req *req, gfp_t flags); -void handshake_complete(struct handshake_req *req, unsigned int status, +void handshake_complete(struct handshake_req *req, int status, struct genl_info *info); bool handshake_req_cancel(struct sock *sk); diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index 97114ec8027a..039344979de9 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -160,7 +160,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) status = -EIO; if (info->attrs[HANDSHAKE_A_DONE_STATUS]) - status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); + status = -(int)nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); handshake_complete(req, status, info); sockfd_put(sock); diff --git a/net/handshake/request.c b/net/handshake/request.c index 5d4a17f902d2..97f9f8239949 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -284,7 +284,7 @@ out_err: } EXPORT_SYMBOL(handshake_req_submit); -void handshake_complete(struct handshake_req *req, unsigned int status, +void handshake_complete(struct handshake_req *req, int status, struct genl_info *info) { struct sock *sk = req->hr_sk; diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index af294c6cc717..7567150c2a4f 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -93,7 +93,7 @@ static void tls_handshake_remote_peerids(struct tls_handshake_req *treq, * */ static void tls_handshake_done(struct handshake_req *req, - unsigned int status, struct genl_info *info) + int status, struct genl_info *info) { struct tls_handshake_req *treq = handshake_req_private(req); @@ -104,7 +104,7 @@ static void tls_handshake_done(struct handshake_req *req, if (!status) set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags); - treq->th_consumer_done(treq->th_consumer_data, -status, + treq->th_consumer_done(treq->th_consumer_data, status, treq->th_peerid[0]); } -- cgit v1.2.3 From 09dba37eee70d0596e26645015f1aa95a9848e9d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:18 -0400 Subject: net/handshake: Take a long-lived file reference at submit handshake_nl_accept_doit() needs the file pointer backing req->hr_sk->sk_socket to survive the window between handshake_req_next() and the subsequent FD_PREPARE() and get_file(). The submit-side sock_hold() does not provide that. sk_refcnt keeps struct sock alive, but struct socket is owned by sock->file: when the consumer fputs the last file reference, sock_release() tears the socket down regardless of any sock_hold. Add an hr_file pointer to struct handshake_req and acquire an explicit reference on sock->file during handshake_req_submit(). handshake_complete() and handshake_req_cancel() release the reference on the completion-bit-winning path. The submit error path must also release the file reference, but after rhashtable insertion a concurrent handshake_req_cancel() can discover the request and race the error path. Gate the error-path cleanup -- sk_destruct restoration, fput, and request destruction -- with test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED), the same serialization handshake_complete() and handshake_req_cancel() already use. When cancel has already claimed ownership, the submit error path returns without touching the request; socket teardown handles final destruction. The accept-side dereferences are not yet retargeted; that change comes in the next patch. Signed-off-by: Chuck Lever Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-4-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- net/handshake/handshake.h | 2 ++ net/handshake/netlink.c | 6 ------ net/handshake/request.c | 42 +++++++++++++++++++++++++++++++++++------- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index 2289b0e274f4..da61cadd1ad3 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -24,6 +24,7 @@ enum hn_flags_bits { HANDSHAKE_F_NET_DRAINING, }; +struct file; struct handshake_proto; /* One handshake request */ @@ -32,6 +33,7 @@ struct handshake_req { struct rhash_head hr_rhash; unsigned long hr_flags; const struct handshake_proto *hr_proto; + struct file *hr_file; struct sock *hr_sk; void (*hr_odestruct)(struct sock *sk); diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index 039344979de9..1a5821eb7184 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -210,12 +210,6 @@ static void __net_exit handshake_net_exit(struct net *net) while (!list_empty(&requests)) { req = list_first_entry(&requests, struct handshake_req, hr_list); list_del(&req->hr_list); - - /* - * Requests on this list have not yet been - * accepted, so they do not have an fd to put. - */ - handshake_complete(req, -ETIMEDOUT, NULL); } } diff --git a/net/handshake/request.c b/net/handshake/request.c index 97f9f8239949..da064511ab86 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -215,9 +216,16 @@ EXPORT_SYMBOL_IF_KUNIT(handshake_req_next); * A zero return value from handshake_req_submit() means that * exactly one subsequent completion callback is guaranteed. * - * A negative return value from handshake_req_submit() means that - * no completion callback will be done and that @req has been - * destroyed. + * A negative return value from handshake_req_submit() guarantees that + * no completion callback will occur and that @req is no longer owned by + * the caller. If cancellation wins the completion race after the request + * has been published, final destruction is deferred until socket teardown. + * + * The caller must hold a reference on @sock->file for the duration + * of this call. Once the request is published to the accept side, a + * concurrent completion or cancellation may release the request's pin on + * @sock->file; the caller's reference is what keeps @sock->sk valid until + * handshake_req_submit() returns. */ int handshake_req_submit(struct socket *sock, struct handshake_req *req, gfp_t flags) @@ -236,6 +244,14 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, kfree(req); return -EINVAL; } + + /* + * Pin sock->file for the lifetime of the request so the + * accept side does not race a consumer that releases the + * socket while a handshake is pending. + */ + req->hr_file = get_file(sock->file); + req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; @@ -267,7 +283,11 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_err; } - /* Prevent socket release while a handshake request is pending */ + /* + * Pin struct sock so sk_destruct does not run until the + * handshake completion path releases it; struct socket is + * held separately via hr_file above. + */ sock_hold(req->hr_sk); trace_handshake_submit(net, req, req->hr_sk); @@ -276,10 +296,13 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, out_unlock: spin_unlock_bh(&hn->hn_lock); out_err: - /* Restore original destructor so socket teardown still runs on failure */ - req->hr_sk->sk_destruct = req->hr_odestruct; trace_handshake_submit_err(net, req, req->hr_sk, ret); - handshake_req_destroy(req); + if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + /* Restore original destructor so socket teardown still runs. */ + req->hr_sk->sk_destruct = req->hr_odestruct; + fput(req->hr_file); + handshake_req_destroy(req); + } return ret; } EXPORT_SYMBOL(handshake_req_submit); @@ -291,11 +314,15 @@ void handshake_complete(struct handshake_req *req, int status, struct net *net = sock_net(sk); if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + struct file *file = req->hr_file; + trace_handshake_complete(net, req, sk, status); req->hr_proto->hp_done(req, status, info); /* Handshake request is no longer pending */ sock_put(sk); + + fput(file); } } EXPORT_SYMBOL_IF_KUNIT(handshake_complete); @@ -344,6 +371,7 @@ out_true: /* Handshake request is no longer pending */ sock_put(sk); + fput(req->hr_file); return true; } EXPORT_SYMBOL(handshake_req_cancel); -- cgit v1.2.3 From f4251190e58b209999c1ba9e6d2976136a1be055 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:19 -0400 Subject: net/handshake: hand off the pinned file reference to accept_doit handshake_req_next() removes the request from the per-net pending list and drops hn_lock before handshake_nl_accept_doit() reads req->hr_sk->sk_socket and dereferences sock->file (once in FD_PREPARE() and again in get_file()). In that window a consumer running tls_handshake_cancel() followed by sockfd_put() (svc_sock_free) or __fput_sync() (xs_reset_transport) releases sock->file. sock_release() then runs sock_orphan(), zeroing sk_socket, and frees the struct socket. The accept-side code either reads NULL through sk_socket or chases freed memory. The submit-side sock_hold() does not prevent this. sk_refcnt protects struct sock, but struct socket and sock->file are independently refcounted via the file descriptor the consumer owns. Pinning sk leaves sock and sock->file unprotected. Retarget the accept-side dereferences at req->hr_file, which was pinned at submit time, instead of req->hr_sk->sk_socket->file. Pinning on its own is not sufficient: a consumer that cancels between handshake_req_next() returning and accept_doit reaching FD_PREPARE() takes the !remove_pending() branch in handshake_req_cancel() and drops hr_file before the accept side takes its own reference. Hand off an additional file reference inside handshake_req_next(), under hn_lock, so the accept side operates on a reference that no concurrent handshake_req_cancel() can revoke. FD_PREPARE() consumes that handed-off reference, either by transferring it to the new fd in fd_publish() or by dropping it in the cleanup destructor on error; the explicit get_file() that previously balanced FD_PREPARE() is therefore redundant and goes away. Update handshake_req_cancel_test2 and _test3 to simulate the FD_PREPARE() consumption with an fput() so the kunit file-count assertions stay balanced. Reported-by: Chris Mason Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-5-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- net/handshake/handshake-test.c | 8 ++++++++ net/handshake/netlink.c | 7 ++----- net/handshake/request.c | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index df3948e807a0..9cc7a95f4120 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -375,6 +375,10 @@ static void handshake_req_cancel_test2(struct kunit *test) /* Pretend to accept this request */ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD); KUNIT_ASSERT_PTR_EQ(test, req, next); + /* Simulate FD_PREPARE() consuming the file reference handed + * off by handshake_req_next(); see handshake_nl_accept_doit(). + */ + fput(filp); /* Act */ result = handshake_req_cancel(sock->sk); @@ -417,6 +421,10 @@ static void handshake_req_cancel_test3(struct kunit *test) /* Pretend to accept this request */ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD); KUNIT_ASSERT_PTR_EQ(test, req, next); + /* Simulate FD_PREPARE() consuming the file reference handed + * off by handshake_req_next(); see handshake_nl_accept_doit(). + */ + fput(filp); /* Pretend to complete this request */ handshake_complete(next, -ETIMEDOUT, NULL); diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index 1a5821eb7184..21d6cbd52fcd 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -92,7 +92,6 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct handshake_net *hn = handshake_pernet(net); struct handshake_req *req = NULL; - struct socket *sock; int class, err; err = -EOPNOTSUPP; @@ -107,15 +106,13 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) err = -EAGAIN; req = handshake_req_next(hn, class); if (req) { - sock = req->hr_sk->sk_socket; - - FD_PREPARE(fdf, O_CLOEXEC, sock->file); + FD_PREPARE(fdf, O_CLOEXEC, req->hr_file); if (fdf.err) { + fput(req->hr_file); /* drop ref from handshake_req_next() */ err = fdf.err; goto out_complete; } - get_file(sock->file); /* FD_PREPARE() consumes a reference. */ err = req->hr_proto->hp_accept(req, info, fd_prepare_fd(fdf)); if (err) goto out_complete; /* Automatic cleanup handles fput */ diff --git a/net/handshake/request.c b/net/handshake/request.c index da064511ab86..e2d7ee7ce6e0 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -178,6 +178,17 @@ static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) return ret; } +/** + * handshake_req_next - Return the next queued handshake request + * @hn: per-net handshake state + * @class: handler class to match + * + * On a non-NULL return, the caller owns an extra reference + * on @req->hr_file. FD_PREPARE() consumes it on success; on + * the FD_PREPARE() failure path the caller must fput() it. + * + * Return: pointer to a removed handshake_req, or NULL. + */ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) { struct handshake_req *req, *pos; @@ -188,6 +199,13 @@ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) if (pos->hr_proto->hp_handler_class != class) continue; __remove_pending_locked(hn, pos); + /* Hand off a file reference to the accept side under + * hn_lock. A concurrent handshake_req_cancel() can drop + * hr_file before accept reaches FD_PREPARE(); this extra + * reference keeps the file alive until FD_PREPARE() takes + * ownership. + */ + get_file(pos->hr_file); req = pos; break; } -- cgit v1.2.3 From 5da98f55b13173c08f003011b76531b25c821c07 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:20 -0400 Subject: net/handshake: Close the submit-side sock_hold race handshake_req_submit() publishes the request via handshake_req_hash_add() and __add_pending_locked(), drops hn_lock, and calls handshake_genl_notify() (which can sleep) before taking sock_hold() on req->hr_sk. A fast tlshd ACCEPT followed by DONE can drive handshake_complete()'s sock_put() into the window between the spin_unlock and the late sock_hold(); on a system where the consumer's fd held the only sk reference, the late sock_hold() then operates on an sk whose refcount has reached zero. The preceding two patches install an explicit file reference on struct handshake_req. That file pins sock->file, which pins the embedded struct socket, which defers inet_release()'s sock_put(). As long as hr_file is held, sk cannot reach refcount zero from the consumer side, and the submit-side sock_hold() with its matching sock_put() calls in handshake_complete() and handshake_req_cancel() is now redundant. Drop all three. The file reference already keeps each request's socket alive, and the lifetime story is contained in a single get_file()/fput() pair. Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-6-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- net/handshake/request.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/net/handshake/request.c b/net/handshake/request.c index e2d7ee7ce6e0..bd3d9467ab91 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -301,13 +301,6 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_err; } - /* - * Pin struct sock so sk_destruct does not run until the - * handshake completion path releases it; struct socket is - * held separately via hr_file above. - */ - sock_hold(req->hr_sk); - trace_handshake_submit(net, req, req->hr_sk); return 0; @@ -337,9 +330,6 @@ void handshake_complete(struct handshake_req *req, int status, trace_handshake_complete(net, req, sk, status); req->hr_proto->hp_done(req, status, info); - /* Handshake request is no longer pending */ - sock_put(sk); - fput(file); } } @@ -387,8 +377,6 @@ bool handshake_req_cancel(struct sock *sk) out_true: trace_handshake_cancel(net, req, sk); - /* Handshake request is no longer pending */ - sock_put(sk); fput(req->hr_file); return true; } -- cgit v1.2.3 From 204a5efde5ed52932840ee1d15d3b581cfda48e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:21 -0400 Subject: net/handshake: Verify file-reference balance in submit paths The new file-reference contract on struct handshake_req is silently breakable: a missing get_file() at submit or a missing fput() on an error path leaves the file leaked but does not crash the test, so the existing absence-of-crash checks pass either way. Snapshot file_count(filp) before each handshake_req_submit() in the submit-success, EAGAIN, EBUSY, and cancel tests, and assert the expected balance after submit and again after cancel. The already-completed cancel test also asserts the post-complete balance, which pins down that handshake_complete() drops the reference and that the subsequent cancel does not double-fput. The destroy test gets the same treatment before __fput_sync(), which double-checks that cancel's fput() ran and the only remaining reference is the one sock_alloc_file() established. Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-7-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- net/handshake/handshake-test.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 9cc7a95f4120..3dd507470d5f 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -208,6 +208,7 @@ static void handshake_req_submit_test3(struct kunit *test) static void handshake_req_submit_test4(struct kunit *test) { struct handshake_req *req, *result; + unsigned long fcount_before; struct socket *sock; struct file *filp; int err; @@ -224,8 +225,10 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, sock->sk); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); /* Act */ result = handshake_req_hash_lookup(sock->sk); @@ -235,11 +238,13 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, req, result); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_submit_test5(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct handshake_net *hn; struct socket *sock; @@ -265,12 +270,14 @@ static void handshake_req_submit_test5(struct kunit *test) saved = hn->hn_pending; hn->hn_pending = hn->hn_pending_max + 1; + fcount_before = file_count(filp); /* Act */ err = handshake_req_submit(sock, req, GFP_KERNEL); /* Assert */ KUNIT_EXPECT_EQ(test, err, -EAGAIN); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); hn->hn_pending = saved; @@ -279,6 +286,7 @@ static void handshake_req_submit_test5(struct kunit *test) static void handshake_req_submit_test6(struct kunit *test) { struct handshake_req *req1, *req2; + unsigned long fcount_before; struct socket *sock; struct file *filp; int err; @@ -296,21 +304,26 @@ static void handshake_req_submit_test6(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); KUNIT_ASSERT_NOT_NULL(test, sock->sk); sock->file = filp; + fcount_before = file_count(filp); /* Act */ err = handshake_req_submit(sock, req1, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); err = handshake_req_submit(sock, req2, GFP_KERNEL); /* Assert */ KUNIT_EXPECT_EQ(test, err, -EBUSY); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test1(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct socket *sock; struct file *filp; @@ -329,8 +342,10 @@ static void handshake_req_cancel_test1(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); /* NB: handshake_req hasn't been accepted */ @@ -339,12 +354,14 @@ static void handshake_req_cancel_test1(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test2(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req, *next; struct handshake_net *hn; struct socket *sock; @@ -365,8 +382,10 @@ static void handshake_req_cancel_test2(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); net = sock_net(sock->sk); hn = handshake_pernet(net); @@ -385,12 +404,14 @@ static void handshake_req_cancel_test2(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test3(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req, *next; struct handshake_net *hn; struct socket *sock; @@ -411,8 +432,10 @@ static void handshake_req_cancel_test3(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); net = sock_net(sock->sk); hn = handshake_pernet(net); @@ -428,12 +451,14 @@ static void handshake_req_cancel_test3(struct kunit *test) /* Pretend to complete this request */ handshake_complete(next, -ETIMEDOUT, NULL); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); /* Act */ result = handshake_req_cancel(sock->sk); /* Assert */ KUNIT_EXPECT_FALSE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } @@ -454,6 +479,7 @@ static struct handshake_proto handshake_req_alloc_proto_destroy = { static void handshake_req_destroy_test1(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct socket *sock; struct file *filp; @@ -473,10 +499,12 @@ static void handshake_req_destroy_test1(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); /* Act */ /* Ensure the close/release/put process has run to -- cgit v1.2.3 From ea5fe6a73ca57e5150b8a38b341aef2636eb72f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 25 May 2026 12:51:22 -0400 Subject: net/handshake: Drain pending requests at net namespace exit The arguments to list_splice_init() in handshake_net_exit() are reversed. The call moves the local empty "requests" list onto hn->hn_requests, leaving the local list empty, so the subsequent drain loop runs zero iterations. Pending handshake requests that had not yet been accepted are not torn down when the net namespace is destroyed; each one keeps a reference on a socket file and on the handshake_req allocation. Pass the source and destination in the documented order (list_splice_init(list, head) moves list onto head) so the pending list is transferred to the local scratch list and drained through handshake_complete(). Fixing the splice direction exposes a list-corruption race. After the splice each req->hr_list still has non-empty link pointers, threading the stack-local scratch list rather than hn_requests. A concurrent handshake_req_cancel() -- for example, from sunrpc's TLS timeout on a kernel socket whose netns reference was not taken -- finds the request through the rhashtable, calls remove_pending(), and sees !list_empty(&req->hr_list). __remove_pending_locked() then list_del_init()s an entry off the scratch list while the drain iterates, corrupting it. The same call arriving after the drain loop has run list_del() on an entry hits LIST_POISON instead. Have remove_pending() check HANDSHAKE_F_NET_DRAINING under hn_lock and report not-found when drain is in progress. The drain has already taken ownership; handshake_complete()'s existing test_and_set on HANDSHAKE_F_REQ_COMPLETED still arbitrates between drain and cancel for who calls the consumer's hp_done. Use list_del_init() rather than list_del() in the drain so req->hr_list does not carry LIST_POISON after drain releases the entry. The DRAINING guard in remove_pending() makes cancel return false, but cancel still falls through to test_and_set_bit on HANDSHAKE_F_REQ_COMPLETED and drops the request's hr_file reference. Without another pin, if that is the last reference, sk_destruct frees the request while it is still linked on the drain loop's local list. Pin each request's hr_file under hn_lock before releasing the list, and drop that drain pin after the loop finishes with the request. Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20260525-handshake-file-pin-v3-8-66c616906ead@oracle.com Signed-off-by: Paolo Abeni --- net/handshake/netlink.c | 10 ++++++++-- net/handshake/request.c | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index 21d6cbd52fcd..3fd4fef9bab1 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -201,13 +201,19 @@ static void __net_exit handshake_net_exit(struct net *net) */ spin_lock_bh(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); - list_splice_init(&requests, &hn->hn_requests); + list_splice_init(&hn->hn_requests, &requests); + list_for_each_entry(req, &requests, hr_list) + get_file(req->hr_file); spin_unlock_bh(&hn->hn_lock); while (!list_empty(&requests)) { + struct file *file; + req = list_first_entry(&requests, struct handshake_req, hr_list); - list_del(&req->hr_list); + file = req->hr_file; + list_del_init(&req->hr_list); handshake_complete(req, -ETIMEDOUT, NULL); + fput(file); } } diff --git a/net/handshake/request.c b/net/handshake/request.c index bd3d9467ab91..cd30d54d0501 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -163,13 +163,16 @@ static void __remove_pending_locked(struct handshake_net *hn, * otherwise %false. * * If @req was on a pending list, it has not yet been accepted. + * Returns %false when the net namespace is draining; the drain + * loop has taken ownership of the pending list. */ static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) { bool ret = false; spin_lock_bh(&hn->hn_lock); - if (!list_empty(&req->hr_list)) { + if (!test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags) && + !list_empty(&req->hr_list)) { __remove_pending_locked(hn, req); ret = true; } -- cgit v1.2.3 From 2e4eed207cb4ba513e4a1fdcecbb3732e98f4914 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2026 09:53:17 +0200 Subject: MAINTAINERS: Add Vasant Hegde to reviewers of AMD IOMMU Vasant has a long history of providing valuable feedback and testing results for the AMD IOMMU code. Still, too often he gets not Cc'ed on code changes, so make his reviewer status official. Acked-by: Vasant Hegde Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c2c6d79275c6..34293e53129f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1152,6 +1152,7 @@ F: drivers/platform/x86/amd/hfi/ AMD IOMMU (AMD-VI) M: Joerg Roedel R: Suravee Suthikulpanit +R: Vasant Hegde L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git -- cgit v1.2.3 From 00c257948900fae69dae2a055b378edf09aacf6e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2026 09:53:18 +0200 Subject: MAINTAINERS: Add my employer to my entries AMD pays for my IOMMU maintainer work, so mention that in the MAINTAINERS file as well. Signed-off-by: Joerg Roedel --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 34293e53129f..52e538030cf9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1150,7 +1150,7 @@ F: Documentation/arch/x86/amd-hfi.rst F: drivers/platform/x86/amd/hfi/ AMD IOMMU (AMD-VI) -M: Joerg Roedel +M: Joerg Roedel (AMD) R: Suravee Suthikulpanit R: Vasant Hegde L: iommu@lists.linux.dev @@ -13482,7 +13482,7 @@ F: include/linux/iommu-dma.h F: include/linux/iova.h IOMMU SUBSYSTEM -M: Joerg Roedel +M: Joerg Roedel (AMD) M: Will Deacon R: Robin Murphy L: iommu@lists.linux.dev -- cgit v1.2.3 From 20040b2a3cb992f84d3db4c086b909eb9b906b31 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 26 May 2026 09:45:23 +0200 Subject: dpll: export __dpll_device_change_ntf() for use under dpll_lock Export __dpll_device_change_ntf() so that drivers can send device change notifications from within device callbacks, which are already called under dpll_lock. Using dpll_device_change_ntf() in that context would deadlock. Add lockdep_assert_held() to catch misuse without the lock held. Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20260526074525.1451008-2-ivecera@redhat.com Signed-off-by: Paolo Abeni --- drivers/dpll/dpll_netlink.c | 13 +++++++++++-- include/linux/dpll.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 0ff1658c2dc1..75e3ae0c16d0 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -829,12 +829,21 @@ int dpll_device_delete_ntf(struct dpll_device *dpll) return dpll_device_event_send(DPLL_CMD_DEVICE_DELETE_NTF, dpll); } -static int -__dpll_device_change_ntf(struct dpll_device *dpll) +/** + * __dpll_device_change_ntf - notify that the dpll device has been changed + * @dpll: registered dpll pointer + * + * Context: caller must hold dpll_lock. Suitable for use inside device + * callbacks which are already invoked under dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ +int __dpll_device_change_ntf(struct dpll_device *dpll) { + lockdep_assert_held(&dpll_lock); dpll_device_notify(dpll, DPLL_DEVICE_CHANGED); return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll); } +EXPORT_SYMBOL_GPL(__dpll_device_change_ntf); /** * dpll_device_change_ntf - notify that the dpll device has been changed diff --git a/include/linux/dpll.h b/include/linux/dpll.h index f8037f1ab20b..2dbe8567eafc 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -284,6 +284,7 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, int dpll_pin_ref_sync_pair_add(struct dpll_pin *pin, struct dpll_pin *ref_sync_pin); +int __dpll_device_change_ntf(struct dpll_device *dpll); int dpll_device_change_ntf(struct dpll_device *dpll); int __dpll_pin_change_ntf(struct dpll_pin *pin); -- cgit v1.2.3 From d733f519f6443540f8359461a34e3b0042099bbe Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 26 May 2026 09:45:24 +0200 Subject: dpll: zl3073x: use __dpll_device_change_ntf() and remove change_work The change_work was introduced to send device change notifications from DPLL device callbacks without deadlocking on dpll_lock, since the callbacks are already invoked under that lock. Now that __dpll_device_change_ntf() is exported for callers that already hold dpll_lock, use it directly and remove the change_work infrastructure entirely. This eliminates a race condition where change_work could be re-scheduled after cancel_work_sync() during device teardown, potentially causing the handler to dereference a freed or NULL dpll_dev pointer. Fixes: 9363b4837659 ("dpll: zl3073x: Allow to configure phase offset averaging factor") Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260526074525.1451008-3-ivecera@redhat.com Signed-off-by: Paolo Abeni --- drivers/dpll/zl3073x/dpll.c | 26 +++++++++----------------- drivers/dpll/zl3073x/dpll.h | 2 -- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 64b4e9e3e8fe..0770bd895de9 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1079,15 +1079,6 @@ zl3073x_dpll_phase_offset_avg_factor_get(const struct dpll_device *dpll, return 0; } -static void -zl3073x_dpll_change_work(struct work_struct *work) -{ - struct zl3073x_dpll *zldpll; - - zldpll = container_of(work, struct zl3073x_dpll, change_work); - dpll_device_change_ntf(zldpll->dpll_dev); -} - static int zl3073x_dpll_phase_offset_avg_factor_set(const struct dpll_device *dpll, void *dpll_priv, u32 factor, @@ -1113,8 +1104,10 @@ zl3073x_dpll_phase_offset_avg_factor_set(const struct dpll_device *dpll, * we have to send a notification for other DPLL devices. */ list_for_each_entry(item, &zldpll->dev->dplls, list) { - if (item != zldpll) - schedule_work(&item->change_work); + struct dpll_device *dpll_dev = READ_ONCE(item->dpll_dev); + + if (item != zldpll && dpll_dev) + __dpll_device_change_ntf(dpll_dev); } return 0; @@ -1627,13 +1620,13 @@ zl3073x_dpll_device_register(struct zl3073x_dpll *zldpll) static void zl3073x_dpll_device_unregister(struct zl3073x_dpll *zldpll) { - WARN(!zldpll->dpll_dev, "DPLL device is not registered\n"); + struct dpll_device *dpll_dev = READ_ONCE(zldpll->dpll_dev); - cancel_work_sync(&zldpll->change_work); + WARN(!dpll_dev, "DPLL device is not registered\n"); - dpll_device_unregister(zldpll->dpll_dev, &zldpll->ops, zldpll); - dpll_device_put(zldpll->dpll_dev, &zldpll->tracker); - zldpll->dpll_dev = NULL; + WRITE_ONCE(zldpll->dpll_dev, NULL); + dpll_device_unregister(dpll_dev, &zldpll->ops, zldpll); + dpll_device_put(dpll_dev, &zldpll->tracker); } /** @@ -1926,7 +1919,6 @@ zl3073x_dpll_alloc(struct zl3073x_dev *zldev, u8 ch) zldpll->dev = zldev; zldpll->id = ch; INIT_LIST_HEAD(&zldpll->pins); - INIT_WORK(&zldpll->change_work, zl3073x_dpll_change_work); return zldpll; } diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h index 434c32a7db12..c8bc8437a709 100644 --- a/drivers/dpll/zl3073x/dpll.h +++ b/drivers/dpll/zl3073x/dpll.h @@ -21,7 +21,6 @@ * @tracker: tracking object for the acquired reference * @lock_status: last saved DPLL lock status * @pins: list of pins - * @change_work: device change notification work */ struct zl3073x_dpll { struct list_head list; @@ -35,7 +34,6 @@ struct zl3073x_dpll { dpll_tracker tracker; enum dpll_lock_status lock_status; struct list_head pins; - struct work_struct change_work; }; struct zl3073x_dpll *zl3073x_dpll_alloc(struct zl3073x_dev *zldev, u8 ch); -- cgit v1.2.3 From c1224569cef038b040db0459510cd7948ecd467b Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 26 May 2026 09:45:25 +0200 Subject: dpll: zl3073x: make frequency monitor a per-device attribute The frequency monitoring feature uses shared hardware registers that measure input reference frequencies independently of individual DPLL channels. However, the freq_monitor flag was incorrectly placed in the per-DPLL structure, causing each channel to track its own enable/disable state independently. Since the DPLL core calls measured_freq_get() only for the first pin registration, the measured_freq_check() in the periodic worker was gated by the per-DPLL freq_monitor flag of whichever channel happens to be checked. If the first DPLL channel had frequency monitoring disabled while another had it enabled, measurements were never reported. Move freq_monitor from struct zl3073x_dpll to struct zl3073x_dev so all DPLL channels share a single flag, matching the hardware behavior. Update freq_monitor_set() to notify other DPLL devices about the change (like phase_offset_avg_factor_set() already does) and remove the mode-dependent guard in zl3073x_dpll_changes_check() since all input pin monitoring (pin state, phase offset, FFO, and measured frequency) works correctly in all DPLL modes. Fixes: bfc923b642874 ("dpll: zl3073x: implement frequency monitoring") Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260526074525.1451008-4-ivecera@redhat.com Signed-off-by: Paolo Abeni --- drivers/dpll/zl3073x/core.c | 19 ++++++++----------- drivers/dpll/zl3073x/core.h | 4 +++- drivers/dpll/zl3073x/dpll.c | 29 ++++++++++++++--------------- drivers/dpll/zl3073x/dpll.h | 2 -- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 5f1e70f3e40a..0a133b0f2d97 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -762,18 +762,15 @@ zl3073x_dev_periodic_work(struct kthread_work *work) dev_warn(zldev->dev, "Failed to update phase offsets: %pe\n", ERR_PTR(rc)); - /* Update measured input reference frequencies if any DPLL has - * frequency monitoring enabled. + /* Update measured input reference frequencies if frequency + * monitoring is enabled. */ - list_for_each_entry(zldpll, &zldev->dplls, list) { - if (zldpll->freq_monitor) { - rc = zl3073x_ref_freq_meas_update(zldev); - if (rc) - dev_warn(zldev->dev, - "Failed to update measured frequencies: %pe\n", - ERR_PTR(rc)); - break; - } + if (zldev->freq_monitor) { + rc = zl3073x_ref_freq_meas_update(zldev); + if (rc) + dev_warn(zldev->dev, + "Failed to update measured frequencies: %pe\n", + ERR_PTR(rc)); } /* Update references' fractional frequency offsets */ diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index 99440620407d..addba378b0df 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -57,6 +57,7 @@ struct zl3073x_chip_info { * @work: periodic work * @clock_id: clock id of the device * @phase_avg_factor: phase offset measurement averaging factor + * @freq_monitor: is frequency monitor enabled */ struct zl3073x_dev { struct device *dev; @@ -77,9 +78,10 @@ struct zl3073x_dev { struct kthread_worker *kworker; struct kthread_delayed_work work; - /* Devlink parameters */ + /* Per-chip parameters */ u64 clock_id; u8 phase_avg_factor; + bool freq_monitor; }; extern const struct regmap_config zl3073x_regmap_config; diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 0770bd895de9..0bfcbae2109f 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1212,7 +1212,7 @@ zl3073x_dpll_freq_monitor_get(const struct dpll_device *dpll, { struct zl3073x_dpll *zldpll = dpll_priv; - if (zldpll->freq_monitor) + if (zldpll->dev->freq_monitor) *state = DPLL_FEATURE_STATE_ENABLE; else *state = DPLL_FEATURE_STATE_DISABLE; @@ -1226,9 +1226,19 @@ zl3073x_dpll_freq_monitor_set(const struct dpll_device *dpll, enum dpll_feature_state state, struct netlink_ext_ack *extack) { - struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dpll *item, *zldpll = dpll_priv; - zldpll->freq_monitor = (state == DPLL_FEATURE_STATE_ENABLE); + zldpll->dev->freq_monitor = (state == DPLL_FEATURE_STATE_ENABLE); + + /* The frequency monitoring is common for all DPLL channels so after + * change we have to send a notification for other DPLL devices. + */ + list_for_each_entry(item, &zldpll->dev->dplls, list) { + struct dpll_device *dpll_dev = READ_ONCE(item->dpll_dev); + + if (item != zldpll && dpll_dev) + __dpll_device_change_ntf(dpll_dev); + } return 0; } @@ -1745,7 +1755,7 @@ zl3073x_dpll_pin_measured_freq_check(struct zl3073x_dpll_pin *pin) u8 ref_id; u32 freq; - if (!zldpll->freq_monitor) + if (!zldpll->dev->freq_monitor) return false; ref_id = zl3073x_input_pin_ref_get(pin->id); @@ -1778,10 +1788,8 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) struct zl3073x_dev *zldev = zldpll->dev; enum dpll_lock_status lock_status; struct device *dev = zldev->dev; - const struct zl3073x_chan *chan; struct zl3073x_dpll_pin *pin; int rc; - u8 mode; zldpll->check_count++; @@ -1800,15 +1808,6 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) dpll_device_change_ntf(zldpll->dpll_dev); } - /* Input pin monitoring does make sense only in automatic - * or forced reference modes. - */ - chan = zl3073x_chan_state_get(zldev, zldpll->id); - mode = zl3073x_chan_mode_get(chan); - if (mode != ZL_DPLL_MODE_REFSEL_MODE_AUTO && - mode != ZL_DPLL_MODE_REFSEL_MODE_REFLOCK) - return; - /* Update phase offset latch registers for this DPLL if the phase * offset monitor feature is enabled. */ diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h index c8bc8437a709..21adcc18e45e 100644 --- a/drivers/dpll/zl3073x/dpll.h +++ b/drivers/dpll/zl3073x/dpll.h @@ -15,7 +15,6 @@ * @id: DPLL index * @check_count: periodic check counter * @phase_monitor: is phase offset monitor enabled - * @freq_monitor: is frequency monitor enabled * @ops: DPLL device operations for this instance * @dpll_dev: pointer to registered DPLL device * @tracker: tracking object for the acquired reference @@ -28,7 +27,6 @@ struct zl3073x_dpll { u8 id; u8 check_count; bool phase_monitor; - bool freq_monitor; struct dpll_device_ops ops; struct dpll_device *dpll_dev; dpll_tracker tracker; -- cgit v1.2.3 From 79378db6a86c7014cce40b65252e6c18f5b8bcc2 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar K Date: Wed, 27 May 2026 23:07:36 +0530 Subject: spi: spi-mem: avoid mutating op template in spi_mem_supports_op() spi_mem_supports_op() accepts a const struct spi_mem_op pointer but casts away const internally to call spi_mem_adjust_op_freq(). This mutates the caller's op template, which causes stale max_freq values when callers reuse persistent templates - subsequent calls won't re-apply the device frequency cap since spi_mem_adjust_op_freq() skips non-zero values. Fix by operating on a stack-local copy instead. Fixes: a4f8e70d75dd ("spi: spi-mem: add spi_mem_adjust_op_freq() in spi_mem_supports_op()") Cc: Tianyu Xu Cc: stable@vger.kernel.org Signed-off-by: Santhosh Kumar K Reviewed-by: Miquel Raynal Link: https://patch.msgid.link/20260527173736.2243004-1-s-k6@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index a09371a075d2..93266848c6df 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -279,13 +279,20 @@ static bool spi_mem_internal_supports_op(struct spi_mem *mem, */ bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { - /* Make sure the operation frequency is correct before going futher */ - spi_mem_adjust_op_freq(mem, (struct spi_mem_op *)op); + struct spi_mem_op eval_op = *op; + + /* + * Work on a local copy; this is a pure capability check and must + * not modify the caller's op. Stored templates with max_freq == 0 + * must remain unset so their frequency is always re-capped to the + * current device maximum at execution time. + */ + spi_mem_adjust_op_freq(mem, &eval_op); - if (spi_mem_check_op(op)) + if (spi_mem_check_op(&eval_op)) return false; - return spi_mem_internal_supports_op(mem, op); + return spi_mem_internal_supports_op(mem, &eval_op); } EXPORT_SYMBOL_GPL(spi_mem_supports_op); -- cgit v1.2.3 From 00e1950716c6ed67d74777b2db286b0fa23b4be9 Mon Sep 17 00:00:00 2001 From: Zhenghang Xiao Date: Tue, 26 May 2026 18:51:52 +0800 Subject: Bluetooth: l2cap: clear chan->ident on ECRED reconfiguration success l2cap_ecred_reconf_rsp() returns early on success without clearing chan->ident. Every other L2CAP response handler (l2cap_ecred_conn_rsp, l2cap_le_connect_rsp, l2cap_config_rsp) clears chan->ident after a successful transaction to prevent the channel from matching subsequent responses with the recycled ident value. A remote attacker that completed a reconfiguration as the peer can replay a failure response with the stale ident, causing the kernel to match and destroy the already-established channel via l2cap_chan_del(chan, ECONNRESET). Clear chan->ident for all matching channels on success, and harden the failure path by using l2cap_chan_hold_unless_zero() consistent with other L2CAP handlers (l2cap_le_command_rej, __l2cap_get_chan_by_ident). Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Zhenghang Xiao Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5668c92b3f58..ff13c43e3588 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5460,14 +5460,20 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, BT_DBG("result 0x%4.4x", result); - if (!result) + if (!result) { + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident == cmd->ident) + chan->ident = 0; + } return 0; + } list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { if (chan->ident != cmd->ident) continue; - l2cap_chan_hold(chan); + if (!l2cap_chan_hold_unless_zero(chan)) + continue; l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); -- cgit v1.2.3 From 41c2713b204e6cb6a94587bc6bf6935107df5479 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 11 May 2026 12:09:42 -0400 Subject: Bluetooth: L2CAP: Fix possible crash on l2cap_ecred_conn_rsp If dcid is received for an already-assigned destination CID the spec requires that both channels to be discarded, but calling l2cap_chan_del may invalidate the tmp cursor created by list_for_each_entry_safe and in fact it is the wrong procedure as the chan->dcid may be assigned previously it really needs to be disconnected. Calling l2cap_chan_clone directly may still lead to l2cap_chan_del so instead schedule l2cap_chan_timeout with delay 0 to close the channel asynchronously. Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ff13c43e3588..45b175399e8d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5262,6 +5262,7 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, cmd_len -= sizeof(*rsp); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { + struct l2cap_chan *orig; u16 dcid; if (chan->ident != cmd->ident || @@ -5283,8 +5284,10 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, BT_DBG("dcid[%d] 0x%4.4x", i, dcid); + orig = __l2cap_get_chan_by_dcid(conn, dcid); + /* Check if dcid is already in use */ - if (dcid && __l2cap_get_chan_by_dcid(conn, dcid)) { + if (dcid && orig) { /* If a device receives a * L2CAP_CREDIT_BASED_CONNECTION_RSP packet with an * already-assigned Destination CID, then both the @@ -5293,10 +5296,24 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, */ l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); - chan = __l2cap_get_chan_by_dcid(conn, dcid); - l2cap_chan_lock(chan); - l2cap_chan_del(chan, ECONNRESET); - l2cap_chan_unlock(chan); + + /* Check that the dcid channel mode is + * L2CAP_MODE_EXT_FLOWCTL since this procedure is only + * valid for that mode and shouldn't disconnect a dcid + * in other modes. + */ + if (orig->mode == L2CAP_MODE_EXT_FLOWCTL) { + l2cap_chan_lock(orig); + /* Disconnect the original channel as it may be + * considered connected since dcid has already + * been assigned; don't call l2cap_chan_close + * directly since that could lead to + * l2cap_chan_del and then removing the channel + * from the list while we're iterating over it. + */ + __set_chan_timer(orig, 0); + l2cap_chan_unlock(orig); + } continue; } -- cgit v1.2.3 From 47f23a259517abbdb8032c057a1e8a6bf3734878 Mon Sep 17 00:00:00 2001 From: Muhammad Bilal Date: Wed, 27 May 2026 04:59:17 +0000 Subject: Bluetooth: ISO: fix UAF in iso_recv_frame iso_recv_frame reads conn->sk under iso_conn_lock but releases the lock before using sk, with no reference held. A concurrent iso_sock_kill() can free sk in that window, causing use-after-free on sk->sk_state and sock_queue_rcv_skb(). Fix by replacing the bare pointer read with iso_sock_hold(conn), which calls sock_hold() while the spinlock is held, atomically elevating the refcount before the lock drops. Add a drop_put label so sock_put() is called on all exit paths where the hold succeeded. Fixes: ccf74f2390d60a2f9a75ef496d2564abb478f46a ("Bluetooth: Add BTPROTO_ISO socket type") Cc: stable@vger.kernel.org Signed-off-by: Muhammad Bilal Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index d7af617cda45..f03b7fa5dccc 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -564,7 +564,7 @@ static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb) struct sock *sk; iso_conn_lock(conn); - sk = conn->sk; + sk = iso_sock_hold(conn); iso_conn_unlock(conn); if (!sk) @@ -573,11 +573,15 @@ static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %d", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } -- cgit v1.2.3 From 4b5f8e608749b7e8fa386c6e4301cf9272595859 Mon Sep 17 00:00:00 2001 From: Muhammad Bilal Date: Wed, 27 May 2026 04:59:18 +0000 Subject: Bluetooth: ISO: serialize iso_sock_clear_timer with socket lock iso_sock_close() calls iso_sock_clear_timer() before acquiring lock_sock(sk). iso_sock_clear_timer() reads iso_pi(sk)->conn twice without the socket lock held: if (!iso_pi(sk)->conn) return; cancel_delayed_work(&iso_pi(sk)->conn->timeout_work); Concurrently, iso_conn_del() executes under lock_sock(sk) and calls iso_chan_del(), which sets iso_pi(sk)->conn to NULL and may result in the final reference to the connection being dropped: CPU0 CPU1 ---- ---- iso_sock_clear_timer() if (conn != NULL) ... lock_sock(sk) iso_chan_del() iso_pi(sk)->conn = NULL cancel_delayed_work(conn) /* NULL deref or UAF */ iso_pi(sk)->conn is not stable across the unlock window, causing a NULL pointer dereference or use-after-free. Serialize iso_sock_clear_timer() with the socket lock by moving it inside lock_sock()/release_sock(), matching the pattern used in iso_conn_del() and all other call sites. Fixes: ccf74f2390d60a2f9a75ef496d2564abb478f46a ("Bluetooth: Add BTPROTO_ISO socket type") Cc: stable@vger.kernel.org Signed-off-by: Muhammad Bilal Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index f03b7fa5dccc..876649556d3c 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -864,8 +864,8 @@ static void __iso_sock_close(struct sock *sk) /* Must be called on unlocked socket. */ static void iso_sock_close(struct sock *sk) { - iso_sock_clear_timer(sk); lock_sock(sk); + iso_sock_clear_timer(sk); __iso_sock_close(sk); release_sock(sk); iso_sock_kill(sk); -- cgit v1.2.3 From 40b87657200cfae93e48904fd9c9c8fc3e192cae Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Tue, 26 May 2026 10:50:57 -0300 Subject: Bluetooth: hci_core: Rework hci_dev_do_reset() to use hci_sync functions The current HCI reset function in hci_core.c duplicates most of the work done by hci_dev_close_sync(), and doesn't handle LE, advertising or discovery. Instead of porting these to hci_dev_do_reset(), directly call the close/open functions from hci_sync to reset the hdev. MGMT now notifies when a user performs a reset. Suggested-by: Luiz Augusto von Dentz Signed-off-by: Heitor Alves de Siqueira Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 43 +++---------------------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c46c1236ebfa..28d7929dc593 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -539,46 +539,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev) hci_req_sync_lock(hdev); - /* Drop queues */ - skb_queue_purge(&hdev->rx_q); - skb_queue_purge(&hdev->cmd_q); - - /* Cancel these to avoid queueing non-chained pending work */ - hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); - /* Wait for - * - * if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - * queue_delayed_work(&hdev->{cmd,ncmd}_timer) - * - * inside RCU section to see the flag or complete scheduling. - */ - synchronize_rcu(); - /* Explicitly cancel works in case scheduled after setting the flag. */ - cancel_delayed_work(&hdev->cmd_timer); - cancel_delayed_work(&hdev->ncmd_timer); - - /* Avoid potential lockdep warnings from the *_flush() calls by - * ensuring the workqueue is empty up front. - */ - drain_workqueue(hdev->workqueue); - - hci_dev_lock(hdev); - hci_inquiry_cache_flush(hdev); - hci_conn_hash_flush(hdev); - hci_dev_unlock(hdev); - - if (hdev->flush) - hdev->flush(hdev); - - hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); - - atomic_set(&hdev->cmd_cnt, 1); - hdev->acl_cnt = 0; - hdev->sco_cnt = 0; - hdev->le_cnt = 0; - hdev->iso_cnt = 0; - - ret = hci_reset_sync(hdev); + ret = hci_dev_close_sync(hdev); + if (!ret) + ret = hci_dev_open_sync(hdev); hci_req_sync_unlock(hdev); return ret; -- cgit v1.2.3 From 525daaea459fc215f432de1b8debbd9144bf97b0 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Tue, 26 May 2026 10:50:58 -0300 Subject: Bluetooth: hci_sync: Set HCI_CMD_DRAIN_WORKQUEUE during device close Since hci_dev_close_sync() can now be called during the reset path, we should also set HCI_CMD_DRAIN_WORKQUEUE. This avoids queuing timeouts while the hdev workqueue is being drained. Fixes: 877afadad2dc ("Bluetooth: When HCI work queue is drained, only queue chained work") Signed-off-by: Heitor Alves de Siqueira Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 1faf8df6d159..0f016d269c62 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5301,6 +5301,12 @@ int hci_dev_close_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); + /* Set HCI_DRAIN_WORKQUEUE flag to prevent queuing work during + * reset/close. See hci_cmd_work() and handle_cmd_cnt_and_timer(). + */ + hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + synchronize_rcu(); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { disable_delayed_work(&hdev->power_off); disable_delayed_work(&hdev->ncmd_timer); @@ -5324,6 +5330,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); + hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); return err; } @@ -5423,6 +5430,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) /* Clear flags */ hdev->flags &= BIT(HCI_RAW); hci_dev_clear_volatile_flags(hdev); + hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); -- cgit v1.2.3 From cdf88b35e06f1b385f7f6228060ae541d44fbb72 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Tue, 26 May 2026 10:50:59 -0300 Subject: Bluetooth: hci_sync: Reset device counters in hci_dev_close_sync() Before resetting or closing the device, protocol counters should also be zeroed. Fixes: d0b137062b2d ("Bluetooth: hci_sync: Rework init stages") Signed-off-by: Heitor Alves de Siqueira Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 0f016d269c62..aeccd8084cba 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5393,6 +5393,10 @@ int hci_dev_close_sync(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); + hdev->acl_cnt = 0; + hdev->sco_cnt = 0; + hdev->le_cnt = 0; + hdev->iso_cnt = 0; if (hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); -- cgit v1.2.3 From bbec30f7e19d9a1c604da7164b8057ccee590e72 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 May 2026 09:49:35 +0200 Subject: gpio: shared: undo the vote of the proxy on GPIO free When the user of a shared GPIO managed by gpio-shared-proxy calls gpiod_put() to release it, we never undo the potential "vote" for driving the shared line "high". In the free() callback, check if this proxy voted for "high" and - if so - decrease the number of votes and potentially revert the value to low if this is the last user. Cc: stable@vger.kernel.org Fixes: e992d54c6f97 ("gpio: shared-proxy: implement the shared GPIO proxy driver") Closes: https://sashiko.dev/#/patchset/20260513-gpio-shared-dynamic-voting-v1-1-8e1c49961b7d%40oss.qualcomm.com Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260522-gpio-shared-free-vote-v3-1-8a4fddc6bedb@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-shared-proxy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpio/gpio-shared-proxy.c b/drivers/gpio/gpio-shared-proxy.c index 29d7d2e4dfc0..6941e4be6cf1 100644 --- a/drivers/gpio/gpio-shared-proxy.c +++ b/drivers/gpio/gpio-shared-proxy.c @@ -103,9 +103,18 @@ static void gpio_shared_proxy_free(struct gpio_chip *gc, unsigned int offset) { struct gpio_shared_proxy_data *proxy = gpiochip_get_data(gc); struct gpio_shared_desc *shared_desc = proxy->shared_desc; + int ret; guard(gpio_shared_desc_lock)(shared_desc); + if (proxy->voted_high) { + ret = gpio_shared_proxy_set_unlocked(proxy, + shared_desc->can_sleep ? gpiod_set_value_cansleep : gpiod_set_value, 0); + if (ret) + dev_err(proxy->dev, + "Failed to unset the shared GPIO value on release: %d\n", ret); + } + proxy->shared_desc->usecnt--; dev_dbg(proxy->dev, "Shared GPIO freed, number of users: %u\n", -- cgit v1.2.3 From a5c627d90809b793fc053849b3a00609db305776 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 May 2026 09:35:27 +0200 Subject: gpio: adnp: fix flow control regression caused by scoped_guard() scoped_guard() is implemented as a for loop. Using it to protect code using the continue statement changes the flow as we now only break out of the hidden loop inside scoped_guard(), not the original for loop. Use a regular code block instead. Fixes: c7fe19ed3973 ("gpio: adnp: use lock guards for the I2C lock") Reported-by: David Lechner Closes: https://lore.kernel.org/all/cde2abb2-4cc8-4fc9-b34a-0c5d2b95779f@baylibre.com/ Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260522073527.9812-1-bartosz.golaszewski@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-adnp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index e5ac2d211013..fe5bcaa90496 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -237,7 +237,9 @@ static irqreturn_t adnp_irq(int irq, void *data) unsigned long pending; int err; - scoped_guard(mutex, &adnp->i2c_lock) { + { + guard(mutex)(&adnp->i2c_lock); + err = adnp_read(adnp, GPIO_PLR(adnp) + i, &level); if (err < 0) continue; -- cgit v1.2.3 From a1b836607304f71051f9f9dcccf8b5097b86a1fb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 May 2026 11:12:36 +0200 Subject: gpio: shared: fix deadlock on shared proxy's parent removal Commit 710abda58055 ("gpio: shared: call gpio_chip::of_xlate() if set") used the mutex embedded in struct gpio_shared_entry to protect the offset field which now can be modified after assignment. The critical section however is too wide and introduced a potential deadlock on the removal of the shared GPIO proxy's parent. Make the critical section shorter - only protect the offset when it's being read. While at it: mention the fact that the entry lock is now also used to protect against concurrent access to the offset field in the structure's documentation. Cc: stable@vger.kernel.org Fixes: 710abda58055 ("gpio: shared: call gpio_chip::of_xlate() if set") Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260522-gpio-shared-deadlock-v1-1-76bca088f8c0@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-shared.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c index e02d6b93a4ab..087b64c06c9f 100644 --- a/drivers/gpio/gpiolib-shared.c +++ b/drivers/gpio/gpiolib-shared.c @@ -53,7 +53,7 @@ struct gpio_shared_entry { unsigned int offset; /* Index in the property value array. */ size_t index; - /* Synchronizes the modification of shared_desc. */ + /* Synchronizes the modification of shared_desc and offset. */ struct mutex lock; struct gpio_shared_desc *shared_desc; struct kref ref; @@ -598,12 +598,11 @@ void gpio_device_teardown_shared(struct gpio_device *gdev) struct gpio_shared_ref *ref; list_for_each_entry(entry, &gpio_shared_list, list) { - guard(mutex)(&entry->lock); - if (!device_match_fwnode(&gdev->dev, entry->fwnode)) continue; - gpiod_free_commit(&gdev->descs[entry->offset]); + scoped_guard(mutex, &entry->lock) + gpiod_free_commit(&gdev->descs[entry->offset]); list_for_each_entry(ref, &entry->refs, list) { guard(mutex)(&ref->lock); -- cgit v1.2.3 From 9d7697fabbc72428f981c01ddbe0a6be0ce8b6fa Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 May 2026 11:12:37 +0200 Subject: gpio: shared: fix lockdep false positive by removing unneeded lock By the time gpio_device_teardown_shared() is called, the parent device is gone from the global list of GPIO devices and all outstanding SRCU read-side critical sections have completed. That means that no concurrent gpio_find_and_request() can call gpio_shared_add_proxy_lookup() for this device at this time. There's also no risk of the parent device being re-bound to the driver before the unbinding completes (including the child devices). Lockdep produces a false-positive report about a possible circular dependency as it doesn't know the ordering guarantee. Not taking the ref->lock in gpio_device_teardown_shared() silences it and is safe to do. Cc: stable@vger.kernel.org Fixes: ea513dd3c066 ("gpio: shared: make locking more fine-grained") Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260522-gpio-shared-deadlock-v1-2-76bca088f8c0@oss.qualcomm.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-shared.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c index 087b64c06c9f..de72776fb154 100644 --- a/drivers/gpio/gpiolib-shared.c +++ b/drivers/gpio/gpiolib-shared.c @@ -605,8 +605,6 @@ void gpio_device_teardown_shared(struct gpio_device *gdev) gpiod_free_commit(&gdev->descs[entry->offset]); list_for_each_entry(ref, &entry->refs, list) { - guard(mutex)(&ref->lock); - if (ref->lookup) { gpiod_remove_lookup_table(ref->lookup); kfree(ref->lookup->table[0].key); -- cgit v1.2.3 From 8a122b5e72cc0043705f0d524bcd15f0c0b3ec15 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 May 2026 10:15:16 +0300 Subject: gpio: virtuser: Fix uninitialized data bug in gpio_virtuser_direction_do_write() If *ppos is non-zero (user-space write split over multiple calls to write()) then simple_write_to_buffer() won't initialize the start of the buffer. Really, non-zero values for *ppos aren't going to work at all. Check for that and return -EINVAL at the start of the function. Fixes: 91581c4b3f29 ("gpio: virtuser: new virtual testing driver for the GPIO API") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/ahP3BJWWy-m_qI0X@stanley.mountain Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 128520d340d4..846f8688fec5 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -397,7 +397,7 @@ static ssize_t gpio_virtuser_direction_do_write(struct file *file, char buf[32], *trimmed; int ret, dir, val = 0; - if (count >= sizeof(buf)) + if (*ppos != 0 || count >= sizeof(buf)) return -EINVAL; ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); @@ -622,7 +622,7 @@ static ssize_t gpio_virtuser_consumer_write(struct file *file, char buf[GPIO_VIRTUSER_NAME_BUF_LEN + 2]; int ret; - if (count >= sizeof(buf)) + if (*ppos != 0 || count >= sizeof(buf)) return -EINVAL; ret = simple_write_to_buffer(buf, GPIO_VIRTUSER_NAME_BUF_LEN, ppos, -- cgit v1.2.3 From 3e46c18d5d87f063a93ae0fe7662fbf6660459d5 Mon Sep 17 00:00:00 2001 From: Marco Scardovi Date: Tue, 26 May 2026 19:02:45 +0200 Subject: gpio: rockchip: convert bank->clk to devm_clk_get_enabled() The bank->clk was previously obtained via of_clk_get() and manually prepared/enabled. However, it was missing a corresponding clk_put() in both the error paths and the remove function, leading to a reference leak. Convert the allocation to devm_clk_get_enabled(), which also properly propagates failures from clk_prepare_enable() that were previously ignored. The GPIO bank device uses the same OF node as the previous of_clk_get() call, so devm_clk_get_enabled(dev, NULL) correctly resolves the same clock provider entry. Fix the reference leak and simplify the code by removing the manual clk_disable_unprepare() calls in the probe error paths and in the remove function. Fixes: 936ee2675eee ("gpio/rockchip: add driver for rockchip gpio") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Marco Scardovi Link: https://patch.msgid.link/20260526171050.12785-2-scardracs@disroot.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rockchip.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 44d7ebd12724..33580093a4e7 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -656,11 +656,10 @@ static int rockchip_get_bank_data(struct rockchip_pin_bank *bank) if (!bank->irq) return -EINVAL; - bank->clk = of_clk_get(bank->of_node, 0); + bank->clk = devm_clk_get_enabled(bank->dev, NULL); if (IS_ERR(bank->clk)) return PTR_ERR(bank->clk); - clk_prepare_enable(bank->clk); id = readl(bank->reg_base + gpio_regs_v2.version_id); switch (id) { @@ -672,7 +671,6 @@ static int rockchip_get_bank_data(struct rockchip_pin_bank *bank) bank->db_clk = of_clk_get(bank->of_node, 1); if (IS_ERR(bank->db_clk)) { dev_err(bank->dev, "cannot find debounce clk\n"); - clk_disable_unprepare(bank->clk); return -EINVAL; } break; @@ -751,7 +749,6 @@ static int rockchip_gpio_probe(struct platform_device *pdev) ret = rockchip_gpiolib_register(bank); if (ret) { - clk_disable_unprepare(bank->clk); mutex_unlock(&bank->deferred_lock); return ret; } @@ -792,7 +789,6 @@ static void rockchip_gpio_remove(struct platform_device *pdev) { struct rockchip_pin_bank *bank = platform_get_drvdata(pdev); - clk_disable_unprepare(bank->clk); gpiochip_remove(&bank->gpio_chip); } -- cgit v1.2.3 From 9500077678230e36d22bf16d2b9539c13e59a801 Mon Sep 17 00:00:00 2001 From: Marco Scardovi Date: Tue, 26 May 2026 19:02:46 +0200 Subject: gpio: rockchip: teardown bugs and resource leaks Address several teardown issues and resource leaks in the driver's remove path and error handling: 1. Debounce clock reference leak: The debounce clock (bank->db_clk) is obtained using of_clk_get() which increments the clock's reference count, but clk_put() is never called. Register a devm action to cleanly release it on unbind. Note that of_clk_get(..., 1) remains necessary over devm_clk_get() because the DT binding does not define clock-names, precluding name-based lookup. 2. Unregistered chained IRQ handler: The chained IRQ handler is not disconnected in remove(). If a stray interrupt fires after the driver is removed, the kernel attempts to execute a stale handler, leading to a panic. Fix this by clearing the handler in remove(). 3. IRQ domain leak: The linear IRQ domain and its generic chips are allocated manually during probe but never removed. Remove the IRQ domain during driver teardown to free the associated generic chips and mappings. Fixes: 936ee2675eee ("gpio/rockchip: add driver for rockchip gpio") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Marco Scardovi Link: https://patch.msgid.link/20260526171050.12785-3-scardracs@disroot.org [Bartosz: don't emit an error message on devres allocation failure] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rockchip.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 33580093a4e7..bc97d5d5d329 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -638,10 +638,17 @@ fail: return ret; } +static void rockchip_clk_put(void *data) +{ + struct clk *clk = data; + + clk_put(clk); +} + static int rockchip_get_bank_data(struct rockchip_pin_bank *bank) { struct resource res; - int id = 0; + int id = 0, ret; if (of_address_to_resource(bank->of_node, 0, &res)) { dev_err(bank->dev, "cannot find IO resource for bank\n"); @@ -673,6 +680,11 @@ static int rockchip_get_bank_data(struct rockchip_pin_bank *bank) dev_err(bank->dev, "cannot find debounce clk\n"); return -EINVAL; } + + ret = devm_add_action_or_reset(bank->dev, rockchip_clk_put, + bank->db_clk); + if (ret) + return ret; break; case GPIO_TYPE_V1: bank->gpio_regs = &gpio_regs_v1; @@ -789,6 +801,9 @@ static void rockchip_gpio_remove(struct platform_device *pdev) { struct rockchip_pin_bank *bank = platform_get_drvdata(pdev); + irq_set_chained_handler_and_data(bank->irq, NULL, NULL); + if (bank->domain) + irq_domain_remove(bank->domain); gpiochip_remove(&bank->gpio_chip); } -- cgit v1.2.3 From 175db11786bde9061db526bf1ac5107d915f5163 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 16 May 2026 04:34:14 +0900 Subject: Disable -Wattribute-alias for clang-23 and newer Clang recently added support for -Wattribute-alias [1], which results in the same warnings that necessitated commit bee20031772a ("disable -Wattribute-alias warning for SYSCALL_DEFINEx()") for GCC. kernel/time/itimer.c:325:1: error: alias and aliasee have different types 'long (unsigned int)' and 'long (typeof (__builtin_choose_expr((__builtin_types_compatible_p(typeof ((unsigned int)0), typeof (0LL)) || __builtin_types_compatible_p(typeof ((unsigned int)0), typeof (0ULL))), 0LL, 0L)))' (aka 'long (long)') [-Werror,-Wattribute-alias] 325 | SYSCALL_DEFINE1(alarm, unsigned int, seconds) | ^ include/linux/syscalls.h:225:36: note: expanded from macro 'SYSCALL_DEFINE1' 225 | #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) | ^ include/linux/syscalls.h:236:2: note: expanded from macro 'SYSCALL_DEFINEx' 236 | __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) | ^ include/linux/syscalls.h:251:18: note: expanded from macro '__SYSCALL_DEFINEx' 251 | __attribute__((alias(__stringify(__se_sys##name)))); \ | ^ kernel/time/itimer.c:325:1: note: aliasee is declared here include/linux/syscalls.h:225:36: note: expanded from macro 'SYSCALL_DEFINE1' 225 | #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) | ^ include/linux/syscalls.h:236:2: note: expanded from macro 'SYSCALL_DEFINEx' 236 | __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) | ^ include/linux/syscalls.h:255:18: note: expanded from macro '__SYSCALL_DEFINEx' 255 | asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ | ^ :16:1: note: expanded from here 16 | __se_sys_alarm | ^ Disable the warnings in the same way for clang-23 and newer. Disable the warning about unknown warning options to avoid breaking the build for versions of clang-23 that do not have -Wattribute-alias, such as ones deployed by vendors like Android or CI systems or when bisecting LLVM between llvmorg-23-init and release/23.x. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2163 Link: https://github.com/llvm/llvm-project/commit/40da6920a0d71d49dfa2392b09153600b0759f5e [1] Link: https://patch.msgid.link/20260515-syscall-disable-attribute-alias-for-clang-v1-1-9a9d95d41df6@kernel.org Signed-off-by: Nathan Chancellor --- arch/riscv/include/asm/syscall_wrapper.h | 4 ++++ include/linux/compat.h | 4 ++++ include/linux/compiler-clang.h | 6 ++++++ include/linux/compiler_types.h | 4 ++++ include/linux/syscalls.h | 4 ++++ 5 files changed, 22 insertions(+) diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index ac80216549ff..226289c3b5c8 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -32,6 +32,10 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments"); \ + __diag_ignore(clang, 23, "-Wunknown-warning-option", \ + "Avoid breaking versions without -Wattribute-alias"); \ + __diag_ignore(clang, 23, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ static long __se_##prefix##name(ulong, ulong, ulong, ulong, ulong, ulong, \ ulong) \ __attribute__((alias(__stringify(___se_##prefix##name)))); \ diff --git a/include/linux/compat.h b/include/linux/compat.h index 56cebaff0c91..8da0a15c95f4 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -72,6 +72,10 @@ __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ + __diag_ignore(clang, 23, "-Wunknown-warning-option", \ + "Avoid breaking versions without -Wattribute-alias"); \ + __diag_ignore(clang, 23, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments"); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index e1123dd28486..527e4e136020 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -131,6 +131,12 @@ #define __diag_str(s) __diag_str1(s) #define __diag(s) _Pragma(__diag_str(clang diagnostic s)) +#if CONFIG_CLANG_VERSION >= 230000 +#define __diag_clang_23(s) __diag(s) +#else +#define __diag_clang_23(s) +#endif + #define __diag_clang_13(s) __diag(s) #define __diag_ignore_all(option, comment) \ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e8fd77593b68..369966598a2c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -711,6 +711,10 @@ struct ftrace_likely_data { #define __diag_GCC(version, severity, string) #endif +#ifndef __diag_clang +#define __diag_clang(version, severity, string) +#endif + #define __diag_push() __diag(push) #define __diag_pop() __diag(pop) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f5639d5ac331..4fb7291f54b6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -247,6 +247,10 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) __diag_push(); \ __diag_ignore(GCC, 8, "-Wattribute-alias", \ "Type aliasing is used to sanitize syscall arguments");\ + __diag_ignore(clang, 23, "-Wunknown-warning-option", \ + "Avoid breaking versions without -Wattribute-alias");\ + __diag_ignore(clang, 23, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ -- cgit v1.2.3 From 006c66d1d52f1905e6ccfb615cf27235e4e6e745 Mon Sep 17 00:00:00 2001 From: bui duc phuc Date: Thu, 28 May 2026 12:32:04 +0700 Subject: regmap: reject volatile update_bits() in cache-only mode Prevent _regmap_update_bits() from accessing hardware when the register map is in cache-only mode. Unlike regmap_raw_read() and _regmap_read(), the volatile _regmap_update_bits() fast path bypasses the cache_only check. This can result in unexpected hardware accesses while the device is suspended. Return -EBUSY to ensure behavior is consistent with other cache-only access paths. Signed-off-by: bui duc phuc Link: https://patch.msgid.link/20260528053204.46783-1-phucduc.bui@gmail.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index b2b26f07f4e3..e6e022b02637 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -3257,6 +3257,9 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg, *change = false; if (regmap_volatile(map, reg) && map->reg_update_bits) { + if (map->cache_only) + return -EBUSY; + reg = regmap_reg_addr(map, reg); ret = map->reg_update_bits(map->bus_context, reg, mask, val); if (ret == 0 && change) -- cgit v1.2.3 From ead6680f354f83966c796fc7f9463a3171789616 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 23 May 2026 19:14:46 +0100 Subject: dma-buf: fix UAF in dma_buf_fd() tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once FD_ADD() returns, the fd is live in the file descriptor table and a thread sharing that table can close() it before DMA_BUF_TRACE() runs. The close drops the last reference, __fput() frees the dma_buf, and the tracepoint then dereferences dmabuf to take dmabuf->name_lock -- slab-use-after-free. Split FD_ADD() back into get_unused_fd_flags() + fd_install() and emit the tracepoint between them. While the fdtable slot is reserved with a NULL file pointer, a racing close() returns -EBADF without entering __fput(), so the dma_buf stays alive across the trace. Same approach as commit 2d76319c4cbb ("dma-buf: fix UAF in dma_buf_put() tracepoint"). This undoes the FD_ADD() conversion done in commit 34dfce523c90 ("dma: convert dma_buf_fd() to FD_ADD()"); FD_ADD() has no place to hook the tracepoint safely. Reported-by: syzbot+7f4987d0afb97dd090cb@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7f4987d0afb97dd090cb Fixes: 281a22631423 ("dma-buf: add some tracepoints to debug.") Cc: stable@vger.kernel.org # 7.0.x Signed-off-by: David Carlier Reviewed-by: Christian König Signed-off-by: Sumit Semwal Link: https://patch.msgid.link/20260523181446.69525-1-devnexen@gmail.com --- drivers/dma-buf/dma-buf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 71f37544a5c6..d504c636dc29 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -792,9 +792,13 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags) if (!dmabuf || !dmabuf->file) return -EINVAL; - fd = FD_ADD(flags, dmabuf->file); + fd = get_unused_fd_flags(flags); + if (fd < 0) + return fd; + DMA_BUF_TRACE(trace_dma_buf_fd, dmabuf, fd); + fd_install(fd, dmabuf->file); return fd; } EXPORT_SYMBOL_NS_GPL(dma_buf_fd, "DMA_BUF"); -- cgit v1.2.3 From c0a8899e02ddebd51e2589835182c239c2e224ae Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 27 May 2026 17:05:26 +0100 Subject: HID: wacom: Fix OOB write in wacom_hid_set_device_mode() wacom_hid_set_device_mode() currently assumes that the HID_DG_INPUTMODE usage is always located in the first field (field[0]) of the feature report. However, a device can specify HID_DG_INPUTMODE in a different field. If HID_DG_INPUTMODE is in a field other than the first one and the first field has a report_count smaller than the usage_index of HID_DG_INPUTMODE, this leads to an out-of-bounds write to r->field[0]->value. Fix this by storing the field index of HID_DG_INPUTMODE in 'struct hid_data' during feature mapping. In wacom_hid_set_device_mode(), use this stored field index to access the correct field and add bounds checks to ensure both the field index and the value index are within valid ranges before writing. Cc: stable@vger.kernel.org Fixes: 5ae6e89f7409 ("HID: wacom: implement the finger part of the HID generic handling") Tested-by: Ping Cheng Reviewed-by: Ping Cheng Signed-off-by: Lee Jones Signed-off-by: Benjamin Tissoires --- drivers/hid/wacom_sys.c | 13 ++++++++++--- drivers/hid/wacom_wac.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index a32320b351e3..2220168bf116 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -356,6 +356,7 @@ static void wacom_feature_mapping(struct hid_device *hdev, hid_data->inputmode = field->report->id; hid_data->inputmode_index = usage->usage_index; + hid_data->inputmode_field_index = field->index; break; case HID_UP_DIGITIZER: @@ -571,9 +572,14 @@ static int wacom_hid_set_device_mode(struct hid_device *hdev) re = &(hdev->report_enum[HID_FEATURE_REPORT]); r = re->report_id_hash[hid_data->inputmode]; - if (r) { - r->field[0]->value[hid_data->inputmode_index] = 2; - hid_hw_request(hdev, r, HID_REQ_SET_REPORT); + if (r && hid_data->inputmode_field_index >= 0 && + hid_data->inputmode_field_index < r->maxfield) { + struct hid_field *field = r->field[hid_data->inputmode_field_index]; + + if (field && hid_data->inputmode_index < field->report_count) { + field->value[hid_data->inputmode_index] = 2; + hid_hw_request(hdev, r, HID_REQ_SET_REPORT); + } } return 0; } @@ -2846,6 +2852,7 @@ static int wacom_probe(struct hid_device *hdev, return -ENODEV; wacom_wac->hid_data.inputmode = -1; + wacom_wac->hid_data.inputmode_field_index = -1; wacom_wac->mode_report = -1; if (hid_is_usb(hdev)) { diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index d4f7d8ca1e7e..126bec6e5c0c 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -295,6 +295,7 @@ struct wacom_shared { struct hid_data { __s16 inputmode; /* InputMode HID feature, -1 if non-existent */ __s16 inputmode_index; /* InputMode HID feature index in the report */ + __s16 inputmode_field_index; /* InputMode HID feature field index in the report */ bool sense_state; bool inrange_state; bool eraser; -- cgit v1.2.3 From 43a1e3744548e6fd85873e6fb43e293eb4010694 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 May 2026 11:45:41 -0700 Subject: security/keys: fix missed RCU read section on lookup Nicholas Carlini reports that the keyring code calls assoc_array_find() in find_key_to_update() without holding the RCU read lock, while the assoc_array_gc() code really is designed around removing the node from the tree and then freeing it after an RCU grace-period. The regular key handling doesn't see this because holding the keyring semaphore hides any lifetime issues, but the persistent key handling uses a different model. Instead of extending the keyring locking, just do the simple RCU locking that the assoc_array was designed for. Reported-by: Nicholas Carlini Cc: David Howells Cc: Jarkko Sakkinen Cc: Paul Moore Cc: James Morris James Morris Cc: Serge E. Hallyn Signed-off-by: Linus Torvalds --- security/keys/keyring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index b39038f7dd31..5a9887d6b7be 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1109,6 +1109,7 @@ key_ref_t find_key_to_update(key_ref_t keyring_ref, kenter("{%d},{%s,%s}", keyring->serial, index_key->type->name, index_key->description); + guard(rcu)(); object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, index_key); -- cgit v1.2.3 From 17bfe0a8c014ee1d542ad352cd6a0a505361664a Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Mon, 25 May 2026 01:08:24 -0700 Subject: net: mana: Add NULL guards in teardown path to prevent panic on attach failure When queue allocation fails partway through, the error cleanup frees and NULLs apc->tx_qp and apc->rxqs. Multiple teardown paths such as mana_remove(), mana_change_mtu() recovery, and internal error handling in mana_alloc_queues() can subsequently call into functions that dereference these pointers without NULL checks: - mana_chn_setxdp() dereferences apc->rxqs[0], causing a NULL pointer dereference panic (CR2: 0000000000000000 at mana_chn_setxdp+0x26). - mana_destroy_vport() iterates apc->rxqs without a NULL check. - mana_fence_rqs() iterates apc->rxqs without a NULL check. - mana_dealloc_queues() iterates apc->tx_qp without a NULL check. Add NULL guards for apc->rxqs in mana_fence_rqs(), mana_destroy_vport(), and before the mana_chn_setxdp() call. Add a NULL guard for apc->tx_qp in mana_dealloc_queues() to skip TX queue draining when TX queues were never allocated or already freed. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20260525081129.1230035-2-dipayanroy@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 70 ++++++++++++++++----------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9afc786b297a..9e7e4bf526bf 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1727,6 +1727,9 @@ static void mana_fence_rqs(struct mana_port_context *apc) struct mana_rxq *rxq; int err; + if (!apc->rxqs) + return; + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { rxq = apc->rxqs[rxq_idx]; err = mana_fence_rq(apc, rxq); @@ -2858,13 +2861,16 @@ static void mana_destroy_vport(struct mana_port_context *apc) struct mana_rxq *rxq; u32 rxq_idx; - for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { - rxq = apc->rxqs[rxq_idx]; - if (!rxq) - continue; + if (apc->rxqs) { - mana_destroy_rxq(apc, rxq, true); - apc->rxqs[rxq_idx] = NULL; + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + if (!rxq) + continue; + + mana_destroy_rxq(apc, rxq, true); + apc->rxqs[rxq_idx] = NULL; + } } mana_destroy_txq(apc); @@ -3269,7 +3275,8 @@ static int mana_dealloc_queues(struct net_device *ndev) if (apc->port_is_up) return -EINVAL; - mana_chn_setxdp(apc, NULL); + if (apc->rxqs) + mana_chn_setxdp(apc, NULL); if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); @@ -3287,33 +3294,38 @@ static int mana_dealloc_queues(struct net_device *ndev) * number of queues. */ - for (i = 0; i < apc->num_queues; i++) { - txq = &apc->tx_qp[i].txq; - tsleep = 1000; - while (atomic_read(&txq->pending_sends) > 0 && - time_before(jiffies, timeout)) { - usleep_range(tsleep, tsleep + 1000); - tsleep <<= 1; - } - if (atomic_read(&txq->pending_sends)) { - err = pcie_flr(to_pci_dev(gd->gdma_context->dev)); - if (err) { - netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", - err, atomic_read(&txq->pending_sends), - txq->gdma_txq_id); + if (apc->tx_qp) { + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + tsleep = 1000; + while (atomic_read(&txq->pending_sends) > 0 && + time_before(jiffies, timeout)) { + usleep_range(tsleep, tsleep + 1000); + tsleep <<= 1; + } + if (atomic_read(&txq->pending_sends)) { + err = + pcie_flr(to_pci_dev(gd->gdma_context->dev)); + if (err) { + netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", + err, + atomic_read(&txq->pending_sends), + txq->gdma_txq_id); + } + break; } - break; } - } - for (i = 0; i < apc->num_queues; i++) { - txq = &apc->tx_qp[i].txq; - while ((skb = skb_dequeue(&txq->pending_skbs))) { - mana_unmap_skb(skb, apc); - dev_kfree_skb_any(skb); + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + while ((skb = skb_dequeue(&txq->pending_skbs))) { + mana_unmap_skb(skb, apc); + dev_kfree_skb_any(skb); + } + atomic_set(&txq->pending_sends, 0); } - atomic_set(&txq->pending_sends, 0); } + /* We're 100% sure the queues can no longer be woken up, because * we're sure now mana_poll_tx_cq() can't be running. */ -- cgit v1.2.3 From 5b05aa36ee24297d7296ca58dfd8c448d0e4cda3 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Mon, 25 May 2026 01:08:25 -0700 Subject: net: mana: Skip redundant detach on already-detached port When mana_per_port_queue_reset_work_handler() runs after a previous detach succeeded but attach failed, the port is left in a detached state with apc->tx_qp and apc->rxqs already freed. Calling mana_detach() again unconditionally leads to NULL pointer dereferences during queue teardown. Add an early exit in mana_detach() when the port is already in detached state (!netif_device_present) for non-close callers, making it safe to call idempotently. This allows the queue reset handler and other recovery paths to simply retry mana_attach() without redundant teardown. Fixes: 3b194343c250 ("net: mana: Implement ndo_tx_timeout and serialize queue resets per port.") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20260525081129.1230035-3-dipayanroy@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9e7e4bf526bf..c9b1df1ed109 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3350,6 +3350,12 @@ int mana_detach(struct net_device *ndev, bool from_close) ASSERT_RTNL(); + /* If already detached (indicates detach succeeded but attach failed + * previously). Now skip mana detach and just retry mana_attach. + */ + if (!from_close && !netif_device_present(ndev)) + return 0; + apc->port_st_save = apc->port_is_up; apc->port_is_up = false; -- cgit v1.2.3 From f14fe6395a8b3d961a61e138ad7b36ba3626dd4e Mon Sep 17 00:00:00 2001 From: Zhenghang Xiao Date: Wed, 27 May 2026 11:24:11 +0800 Subject: sctp: fix race between sctp_wait_for_connect and peeloff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sctp_wait_for_connect() drops and re-acquires the socket lock while waiting for the association to reach ESTABLISHED state. During this window, another thread can peeloff the association to a new socket via getsockopt(SCTP_SOCKOPT_PEELOFF), changing asoc->base.sk. After re-acquiring the old socket lock, sctp_wait_for_connect() returns success without noticing the migration — the caller then accesses the association under the wrong lock in sctp_datamsg_from_user(). Add the same sk != asoc->base.sk check that sctp_wait_for_sndbuf() already has, returning an error if the association was migrated while we slept. Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave") Signed-off-by: Zhenghang Xiao Acked-by: Xin Long Link: https://patch.msgid.link/20260527032411.60959-1-kipreyyy@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1d2568bb6bc2..66e12fb0c646 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9403,6 +9403,8 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); + if (sk != asoc->base.sk) + goto do_error; *timeo_p = current_timeo; } -- cgit v1.2.3 From 422b5233b607476ac7176bfa2a101b9a103d7653 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Tue, 26 May 2026 17:32:38 +0200 Subject: net: pcs: pcs-mtk-lynxi: fix bpi-r3 serdes configuration Commit 8871389da151 introduces common pcs dts properties which writes rx=normal,tx=normal polarity to register SGMSYS_QPHY_WRAP_CTRL of switch. This is initialized with tx-bit set and so change inverts polarity compared to before. It looks like mt7531 has tx polarity inverted in hardware and set tx-bit by default to restore the normal polarity. The MT7531 datasheet quite clearly states: Register 000050EC QPHY_WRAP_CTRL -- QPHY wrapper control Reset value: 0x00000501 BIT 1 RX_BIT_POLARITY -- RX bit polarity control 1'b0: normal 1'b1: inverted BIT 0 TX_BIT_POLARITY -- TX bit polarity control (TX default inversed in MT7531) 1'b0: normal 1'b1: inverted Till this patch the register write was only called when mediatek,pnswap property was set which cannot be done for switch because the fw-node param was always NULL from switch driver in the mtk_pcs_lynxi_create call. Do not configure switch side like it's done before. Fixes: 8871389da151 ("net: pcs: pcs-mtk-lynxi: deprecate "mediatek,pnswap"") Signed-off-by: Frank Wunderlich Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20260526153239.30194-1-linux@fw-web.de Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-mtk-lynxi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c index c12f8087af9b..a753bd88cbc2 100644 --- a/drivers/net/pcs/pcs-mtk-lynxi.c +++ b/drivers/net/pcs/pcs-mtk-lynxi.c @@ -129,6 +129,9 @@ static int mtk_pcs_config_polarity(struct mtk_pcs_lynxi *mpcs, unsigned int val = 0; int ret; + if (!fwnode) + return 0; + if (fwnode_property_read_bool(fwnode, "mediatek,pnswap")) default_pol = PHY_POL_INVERT; -- cgit v1.2.3 From 5ab62dd3687bcc2cc542b99385aabac5c996db6f Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Wed, 20 May 2026 22:11:21 -0700 Subject: drm: prevent integer overflows in dumb buffer creation helpers Fix integer overflow issues in the dumb buffer creation path: 1. drm_mode_create_dumb() does not bound width, height, or bpp before passing them to driver callbacks. Downstream helpers (e.g. drm_gem_dma_dumb_create_internal) perform pitch/size alignment in u32 arithmetic that can overflow for extreme values. Add hard limits: width and height < 8192, bpp <= 32. No legitimate software rendering use case exceeds these. 2. drm_mode_align_dumb() uses roundup(pitch, hw_pitch_align) without checking for overflow. If pitch is near U32_MAX, roundup() wraps to a small value, making subsequent check_mul_overflow() pass with a much smaller pitch than intended. Add an overflow check after roundup. 3. drm_mode_align_dumb() uses ALIGN(size, hw_size_align) which only works correctly for power-of-two alignment values. Replace with roundup() which works for any alignment. Suggested-by: Thomas Zimmermann Signed-off-by: Rajat Gupta Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/drm_dumb_buffers.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c index e2b62e5fb891..cc99681a9ed0 100644 --- a/drivers/gpu/drm/drm_dumb_buffers.c +++ b/drivers/gpu/drm/drm_dumb_buffers.c @@ -70,8 +70,11 @@ static int drm_mode_align_dumb(struct drm_mode_create_dumb *args, if (!pitch) return -EINVAL; - if (hw_pitch_align) + if (hw_pitch_align) { pitch = roundup(pitch, hw_pitch_align); + if (pitch < hw_pitch_align) + return -EINVAL; + } if (!hw_size_align) hw_size_align = PAGE_SIZE; @@ -80,7 +83,7 @@ static int drm_mode_align_dumb(struct drm_mode_create_dumb *args, if (check_mul_overflow(args->height, pitch, &size)) return -EINVAL; - size = ALIGN(size, hw_size_align); + size = roundup(size, hw_size_align); if (!size) return -EINVAL; @@ -199,6 +202,13 @@ int drm_mode_create_dumb(struct drm_device *dev, if (!args->width || !args->height || !args->bpp) return -EINVAL; + /* Reject unreasonable inputs early. Dumb buffers are for software + * rendering; nothing legitimate needs more than 8192x8192 at 32bpp. + * This prevents overflows in downstream alignment helpers. + */ + if (args->width >= 8192 || args->height >= 8192 || args->bpp > 32) + return -EINVAL; + /* overflow checks for 32bit size calculations */ if (args->bpp > U32_MAX - 8) return -EINVAL; -- cgit v1.2.3 From 6851161feb01cea41358c9ec304bd2f981fc8505 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 29 May 2026 10:23:25 +0200 Subject: Revert "esp: fix page frag reference leak on skb_to_sgvec failure" This reverts commit 2982e599fff6faa21c8df147d96fc7af6c1a2f24. The patch does not fully fix the issue and the Author does not match the 'Signed-off-by:' tag, so revert it for now. Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 12 +++++------- net/ipv6/esp6.c | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 5d3a8656687e..513c8215c947 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -96,7 +96,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref) +static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) { struct crypto_aead *aead = x->data; int extralen = 0; @@ -113,7 +113,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ - if (already_unref || req->src != req->dst) + if (req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); @@ -220,7 +220,7 @@ static void esp_output_done(void *data, int err) } tmp = ESP_SKB_CB(skb)->tmp; - esp_ssg_unref(x, tmp, skb, false); + esp_ssg_unref(x, tmp, skb); kfree(tmp); if (xo && (xo->flags & XFRM_DEV_RESUME)) { @@ -569,10 +569,8 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); - if (unlikely(err < 0)) { - esp_ssg_unref(x, tmp, skb, true); + if (unlikely(err < 0)) goto error_free; - } } if ((x->props.flags & XFRM_STATE_ESN)) @@ -604,7 +602,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * } if (sg != dsg) - esp_ssg_unref(x, tmp, skb, false); + esp_ssg_unref(x, tmp, skb); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b963b8e72604..57481e423e59 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -113,7 +113,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref) +static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) { struct crypto_aead *aead = x->data; int extralen = 0; @@ -130,7 +130,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ - if (already_unref || req->src != req->dst) + if (req->src != req->dst) for (sg = sg_next(req->src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); @@ -254,7 +254,7 @@ static void esp_output_done(void *data, int err) } tmp = ESP_SKB_CB(skb)->tmp; - esp_ssg_unref(x, tmp, skb, false); + esp_ssg_unref(x, tmp, skb); kfree(tmp); esp_output_encap_csum(skb); @@ -600,10 +600,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); - if (unlikely(err < 0)) { - esp_ssg_unref(x, tmp, skb, true); + if (unlikely(err < 0)) goto error_free; - } } if ((x->props.flags & XFRM_STATE_ESN)) @@ -636,7 +634,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } if (sg != dsg) - esp_ssg_unref(x, tmp, skb, false); + esp_ssg_unref(x, tmp, skb); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); -- cgit v1.2.3 From 83726330748981372bde86ed5411d7b306612991 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 29 May 2026 00:01:44 +0100 Subject: KVM: arm64: Correctly cap ZCR_EL2 provided by a guest hypervisor ZCR_EL2 can be updated by a VHE guest hypervisor either using ZCR_EL2 (which traps) or ZCR_EL1 (which does not trap). KVM handles both in different way: - on ZCR_EL2 trap, ZCR_EL2.LEN is immediately capped at the VM's own VL limit. This has the potential to break existing SW that relies on the full LEN field to be stateful. - on ZCR_EL1 access, we do absolutely nothing. On restoring the SVE context for an L2 guest, we directly restore the guest hypervisor's view of ZCR_EL2 into the physical ZCR_EL2. If the guest's view of the register was updated using the ZCR_EL2 accessor, the value has already been sanitised (with the caveat mentioned above). But if the guest used ZCR_EL1, the raw value is written into the HW, and the L2 guest can now access VLs that it shouldn't. Fix all the above by moving the VL capping to the restore points, ensuring that: - the HW is always programmed with a capped value, irrespective of the accessor being used, - the ZCR_EL2.LEN field is always completely stateful, irrespective of the accessor being used. Additionally, move ZCR_EL2 to be a sanitised register, ensuring that only the LEN field is actually stateful. This requires some creative construction of the RES0 mask, as the sysreg generation script does not yet generate RAZ/WI fields. Fixes: b3d29a823099 ("KVM: arm64: nv: Handle ZCR_EL2 traps") Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260529-kvm-arm64-fix-zcr-len-nv-v2-1-86cad51992bd@kernel.org [maz: rewrote commit message, tidy up access_zcr_el2()] Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 16 ++++++++++------ arch/arm64/kvm/nested.c | 5 +++++ arch/arm64/kvm/sys_regs.c | 11 +++-------- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 65eead8362e0..a49042bfa801 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -511,7 +511,6 @@ enum vcpu_sysreg { ACTLR_EL2, /* Auxiliary Control Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ - ZCR_EL2, /* SVE Control Register (EL2) */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ @@ -543,6 +542,7 @@ enum vcpu_sysreg { SCTLR2_EL2, /* System Control Register 2 (EL2) */ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ + ZCR_EL2, /* SVE Control Register (EL2) */ /* Any VNCR-capable reg goes after this point */ MARKER(__VNCR_START__), diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index bf0eb5e43427..320cd45d49c5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -462,11 +462,13 @@ static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) { + u64 zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + /* * The vCPU's saved SVE state layout always matches the max VL of the * vCPU. Start off with the max VL so we can load the SVE state. */ - sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); + sve_cond_update_zcr_vq(zcr_el2, SYS_ZCR_EL2); __sve_restore_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); @@ -476,8 +478,10 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) * nested guest, as the guest hypervisor could select a smaller VL. Slap * that into hardware before wrapping up. */ - if (is_nested_ctxt(vcpu)) - sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2); + if (is_nested_ctxt(vcpu)) { + zcr_el2 = min(zcr_el2, __vcpu_sys_reg(vcpu, ZCR_EL2)); + sve_cond_update_zcr_vq(zcr_el2, SYS_ZCR_EL2); + } write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR); } @@ -501,11 +505,11 @@ static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) return; if (vcpu_has_sve(vcpu)) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + /* A guest hypervisor may restrict the effective max VL. */ if (is_nested_ctxt(vcpu)) - zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); - else - zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + zcr_el2 = min(zcr_el2, __vcpu_sys_reg(vcpu, ZCR_EL2)); write_sysreg_el2(zcr_el2, SYS_ZCR); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 883b6c1008fb..38f672e94087 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1834,6 +1834,11 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) resx.res1 = VNCR_EL2_RES1; set_sysreg_masks(kvm, VNCR_EL2, resx); + /* ZCR_EL2 - bits 8:4 are RAZ/WI so treat them as RES0 */ + resx.res0 = ZCR_ELx_RES0 | GENMASK_ULL(8, 4); + resx.res1 = ZCR_ELx_RES1; + set_sysreg_masks(kvm, ZCR_EL2, resx); + out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) __vcpu_rmw_sys_reg(vcpu, sr, |=, 0); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 148fc3400ea8..fa5c93c7a135 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2862,21 +2862,16 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - unsigned int vq; - if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); return false; } - if (!p->is_write) { + if (!p->is_write) p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); - return true; - } + else + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, p->regval); - vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; - vq = min(vq, vcpu_sve_max_vq(vcpu)); - __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); return true; } -- cgit v1.2.3 From a0d8f7ac03e387634c5a7efe3dc162f7d605e2cd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 7 May 2026 00:56:49 +0300 Subject: Revert "media: renesas: vsp1: Initialize format on all pads" This reverts commit 133ac42af0a1b389e8b7b3dc7c1cc8c30ff162b6. The change to format initialization, along with the change to format propagation in the BRx in commit 937f3e6b51f1 ("media: renesas: vsp1: brx: Fix format propagation"), broke configuration of the DRM pipeline. Revert it to fix the regression. The original commit was meant to fix a v4l2-compliance failure, with no known userspace applications being affected beside test tools. Reverting is the simplest option, a more comprehensive fix can be developed (and tested more thoroughly) later. Fixes: 133ac42af0a1 ("media: renesas: vsp1: Initialize format on all pads") Tested-by: Lad Prabhakar # On RZ/T2H Reviewed-by: Lad Prabhakar Link: https://patch.msgid.link/20260506215650.1897177-2-laurent.pinchart+renesas@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/platform/renesas/vsp1/vsp1_entity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/renesas/vsp1/vsp1_entity.c b/drivers/media/platform/renesas/vsp1/vsp1_entity.c index 1dad9589768c..839b75b62ceb 100644 --- a/drivers/media/platform/renesas/vsp1/vsp1_entity.c +++ b/drivers/media/platform/renesas/vsp1/vsp1_entity.c @@ -380,7 +380,7 @@ static int vsp1_entity_init_state(struct v4l2_subdev *subdev, unsigned int pad; /* Initialize all pad formats with default values. */ - for (pad = 0; pad < subdev->entity.num_pads; ++pad) { + for (pad = 0; pad < subdev->entity.num_pads - 1; ++pad) { struct v4l2_subdev_format format = { .pad = pad, .which = sd_state ? V4L2_SUBDEV_FORMAT_TRY -- cgit v1.2.3 From f78073e84c800ae146ce62447e7a685a5ceeb92d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 7 May 2026 00:56:50 +0300 Subject: Revert "media: renesas: vsp1: brx: Fix format propagation" This reverts commit 937f3e6b51f1cea079be9ba642665f2bf8bcc31f. The change to format propagation in the BRx broke configuration of the DRM pipeline. Revert it to fix the regression. The original commit was meant to fix a v4l2-compliance failure, with no known userspace applications being affected beside test tools. Reverting is the simplest option, a more comprehensive fix can be developed (and tested more thoroughly) later. Reported-by: Lad Prabhakar Closes: https://lore.kernel.org/linux-media/CA+V-a8t481xuwava0nb7uY9CUPqFWZ_8EP0xrK3BgumP7HDcLg@mail.gmail.com Fixes: 937f3e6b51f1 ("media: renesas: vsp1: brx: Fix format propagation") Tested-by: Lad Prabhakar # On RZ/T2H Reviewed-by: Lad Prabhakar Link: https://patch.msgid.link/20260506215650.1897177-3-laurent.pinchart+renesas@ideasonboard.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- drivers/media/platform/renesas/vsp1/vsp1_brx.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/media/platform/renesas/vsp1/vsp1_brx.c b/drivers/media/platform/renesas/vsp1/vsp1_brx.c index b1a2c68e9944..9d93cb8b8e82 100644 --- a/drivers/media/platform/renesas/vsp1/vsp1_brx.c +++ b/drivers/media/platform/renesas/vsp1/vsp1_brx.c @@ -156,20 +156,14 @@ static int brx_set_format(struct v4l2_subdev *subdev, compose->height = format->height; } - /* - * Propagate the format code to all pads, and the whole format to the - * source pad. - */ + /* Propagate the format code to all pads. */ if (fmt->pad == BRX_PAD_SINK(0)) { unsigned int i; - for (i = 0; i < brx->entity.source_pad; ++i) { + for (i = 0; i <= brx->entity.source_pad; ++i) { format = v4l2_subdev_state_get_format(state, i); format->code = fmt->format.code; } - - format = v4l2_subdev_state_get_format(state, i); - *format = fmt->format; } done: -- cgit v1.2.3 From db3f2195d29344a3cf1e9dd9ab7f21ced7308cf7 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 1 May 2026 13:22:26 -0700 Subject: KVM: SEV: Require in-GHCB scratch area if GHCB v2+ is in use As per the GHCB spec, when using GHCB v2+ require the software scratch area to reside in the GHCB's shared buffer. Note, things like Page State Change (PSC) requests _rely_ on this behavior, as the guest can't provide a length when making the request, i.e. the size of the guest payload is bounded by the size of the shared buffer. Failure to force usage of the GHCB, and a slew of other flaws, lets a malicious SNP guest corrupt host kernel heap memory, and leak host heap layout information. setup_vmgexit_scratch() allocates a buffer via kvzalloc(exit_info_2), where exit_info_2 is guest-controlled. With exit_info_2=24, this yields a 24-byte allocation in kmalloc-cg-32 (32-byte slab objects). The buffer holds an 8-byte psc_hdr followed by 8-byte psc_entry structs, so only entries[0] and entries[1] are in-bounds. snp_begin_psc() validates end_entry against VMGEXIT_PSC_MAX_COUNT (253) but NOT against the actual buffer size: idx_end = hdr->end_entry; if (idx_end >= VMGEXIT_PSC_MAX_COUNT) { // checks 253, not buffer snp_complete_psc(svm, ...); return 1; } for (idx = idx_start; idx <= idx_end; idx++) { entry_start = entries[idx]; // OOB when idx >= 2 The guest sets end_entry=10+, causing the host to iterate entries[2+] which are OOB into adjacent slab objects. For each OOB entry: - The host reads 8 bytes (OOB READ / info leak oracle) - If the data passes PSC validation, __snp_complete_one_psc() writes cur_page = 1 or 512 into the entry (OOB WRITE, sev.c:3806) - If validation fails, the error response reveals whether adjacent memory is zero vs non-zero (information disclosure to guest) The guest controls allocation size (exit_info_2), entry range (cur_entry/end_entry), and can fire unlimited VMGEXITs to repeatedly hit different slab positions. By exploiting the variety of bugs, a malicious SEV-SNP guest can: - OOB read adjacent kmalloc-cg-32 objects (heap layout disclosure) - OOB write cur_page bits into adjacent objects (heap corruption) - Trigger use-after-free conditions across VMGEXITs E.g. with KASAN enabled, a single insmod of the PoC guest module produces 73 KASAN reports: BUG: KASAN: slab-out-of-bounds in snp_begin_psc+0x126/0x890 Read of size 8 at addr ffff888219ffb5e0 by task qemu-system-x86/2199 BUG: KASAN: slab-out-of-bounds in snp_begin_psc+0x468/0x890 Write of size 8 at addr ffff888351566648 by task qemu-system-x86/2199 The buggy address belongs to the object at ffff888XXXXXXXXX which belongs to the cache kmalloc-cg-32 of size 32 The buggy address is located N bytes to the right of allocated 32-byte region [ffff888XXXXXXXXX, ffff888XXXXXXXXX) Breakdown: 62 slab-out-of-bounds (reads + writes past allocation) 7 slab-use-after-free 4 use-after-free All credit to Stan for the wonderful description and reproducer! Reported-by: Stan Shaw Cc: Michael Roth Cc: Tom Lendacky Cc: Peter Gonda Cc: Jacky Li Fixes: 4af663c2f64a ("KVM: SEV: Allow per-guest configuration of GHCB protocol version") Cc: stable@vger.kernel.org Signed-off-by: Michael Roth [sean: write changelog] Reviewed-by: Tom Lendacky Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c2126b3c3072..23170b64f4a3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3703,6 +3703,10 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_va = (void *)svm->sev_es.ghcb; scratch_va += (scratch_gpa_beg - control->ghcb_gpa); } else { + /* GHCB v2 requires the scratch area to be within the GHCB. */ + if (to_kvm_sev_info(svm->vcpu.kvm)->ghcb_version >= 2) + goto e_scratch; + /* * The guest memory must be read into a kernel buffer, so * limit the size -- cgit v1.2.3 From 1aa8a6dc7dac8b83234b53518311bf78231f4fa5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:27 -0700 Subject: KVM: SEV: Ignore MMIO requests of length '0' Explicitly ignore MMIO requests of length '0', so that setting up the software scratch area (and other code) doesn't have to worry about underflowing the length, and to allow for special casing '0' in the future. Fixes: 8f423a80d299 ("KVM: SVM: Support MMIO for an SEV-ES guest") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 23170b64f4a3..fb2174b6d1ba 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4497,13 +4497,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) case SVM_VMGEXIT_MMIO_READ: case SVM_VMGEXIT_MMIO_WRITE: { bool is_write = control->exit_code == SVM_VMGEXIT_MMIO_WRITE; + u64 len = control->exit_info_2; - ret = setup_vmgexit_scratch(svm, !is_write, control->exit_info_2); + if (!len) + return 1; + + ret = setup_vmgexit_scratch(svm, !is_write, len); if (ret) break; - ret = kvm_sev_es_mmio(vcpu, is_write, control->exit_info_1, - control->exit_info_2, svm->sev_es.ghcb_sa); + ret = kvm_sev_es_mmio(vcpu, is_write, control->exit_info_1, len, + svm->sev_es.ghcb_sa); break; } case SVM_VMGEXIT_NMI_COMPLETE: -- cgit v1.2.3 From dcf1b2d4b0564a27e4ca7c654871aab4f9620046 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:28 -0700 Subject: KVM: SEV: Reject MMIO requests larger than 8 bytes with GHCB v2+ When using GHCB v2+, reject MMIO requests that are larger than 8 bytes. Per the GHCB spec: SW_EXITINFO2 must be less than or equal to 0x7fffffff for version 1 and less than or equal to 0x8 for all other versions. Fixes: 4af663c2f64a ("KVM: SEV: Allow per-guest configuration of GHCB protocol version") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fb2174b6d1ba..e6579ca9f364 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4502,6 +4502,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) if (!len) return 1; + if (to_kvm_sev_info(vcpu->kvm)->ghcb_version >= 2 && len > 8) { + svm_vmgexit_bad_input(svm, GHCB_ERR_INVALID_INPUT); + return 1; + } + ret = setup_vmgexit_scratch(svm, !is_write, len); if (ret) break; -- cgit v1.2.3 From 3988bd2723de407ae90fa7a6f6029b4e60238c58 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:29 -0700 Subject: KVM: SEV: Ignore Port I/O requests of length '0' Explicitly ignore Port I/O requests of length '0' (or count '0'), so that setting up the software scratch area (and other code) doesn't have to worry about underflowing the length, and to allow for WARNing on trying to configure the scratch area with len==0. Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e6579ca9f364..52703c954856 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4585,6 +4585,11 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) control->exit_info_1, control->exit_info_2); ret = -EINVAL; break; + case SVM_EXIT_IOIO: + if (!((control->exit_info_1 & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT)) + return 1; + + fallthrough; default: ret = svm_invoke_exit_handler(vcpu, control->exit_code); } @@ -4605,6 +4610,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (unlikely(check_mul_overflow(count, size, &bytes))) return -EINVAL; + if (!bytes) + return 1; + r = setup_vmgexit_scratch(svm, in, bytes); if (r) return r; -- cgit v1.2.3 From 2be54670bdc017004c4a4b8bddb6ff02ebe7dbe2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:30 -0700 Subject: KVM: SEV: Use the size of the PSC header as the minimum size for PSC requests When handling a Page State Change (PSC) #VMGEXIT use the size of the PSC header as the minimum size for the scratch area. Per the GHCB spec, PSC requests do NOT provide the length, i.e. using control->exit_info_2 for the length is completely made up behavior. The existing code "works", e.g. even though Linux-as-a-guest always passes '0', because KVM doesn't do anything with the length when the request is in the GHCB's shared buffer. Use the header as the min length. Once the header is retrieved, KVM can use the specified indices to compute the full size of the request. Fixes: 9b54e248d264 ("KVM: SEV: Add support to handle Page State Change VMGEXIT") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Reviewed-by: Michael Roth Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 52703c954856..cbb3040e0778 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -4559,7 +4559,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) vcpu->run->system_event.data[0] = control->ghcb_gpa; break; case SVM_VMGEXIT_PSC: - ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); + ret = setup_vmgexit_scratch(svm, true, sizeof(struct psc_hdr)); if (ret) break; -- cgit v1.2.3 From 5867d7e202e09f037cefe77f7af4413c7c0fa088 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:31 -0700 Subject: KVM: SEV: Compute the correct max length of the in-GHCB scratch area When setting the length of the GHCB scratch area, and the area is in the GHCB shared buffer, set the effective length of the scratch area to the max possible size given the start of the guest-provided pointer, and the end of the shared buffer. The code was "fine" when first introduced, as KVM doesn't consult the length of the buffer when emulating MMIO, because the passed in @len always specifies the *max* size required. But for PSC requests, the incoming @len is just the minimum length (to process the header), and KVM needs to know the full size of the scratch area to avoid buffer overflows (spoiler alert). Opportunistically rename @len => @min_len to better reflect its role. Fixes: 9b54e248d264 ("KVM: SEV: Add support to handle Page State Change VMGEXIT") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Reviewed-by: Michael Roth Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index cbb3040e0778..6072fecfe994 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3662,7 +3662,7 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu) } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) -static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len) { struct vmcb_control_area *control = &svm->vmcb->control; u64 ghcb_scratch_beg, ghcb_scratch_end; @@ -3675,10 +3675,10 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) goto e_scratch; } - scratch_gpa_end = scratch_gpa_beg + len; + scratch_gpa_end = scratch_gpa_beg + min_len; if (scratch_gpa_end < scratch_gpa_beg) { pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", - len, scratch_gpa_beg); + min_len, scratch_gpa_beg); goto e_scratch; } @@ -3702,6 +3702,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) scratch_va = (void *)svm->sev_es.ghcb; scratch_va += (scratch_gpa_beg - control->ghcb_gpa); + + svm->sev_es.ghcb_sa_len = ghcb_scratch_end - scratch_gpa_beg; } else { /* GHCB v2 requires the scratch area to be within the GHCB. */ if (to_kvm_sev_info(svm->vcpu.kvm)->ghcb_version >= 2) @@ -3711,16 +3713,16 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) * The guest memory must be read into a kernel buffer, so * limit the size */ - if (len > GHCB_SCRATCH_AREA_LIMIT) { + if (min_len > GHCB_SCRATCH_AREA_LIMIT) { pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", - len, GHCB_SCRATCH_AREA_LIMIT); + min_len, GHCB_SCRATCH_AREA_LIMIT); goto e_scratch; } - scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); + scratch_va = kvzalloc(min_len, GFP_KERNEL_ACCOUNT); if (!scratch_va) return -ENOMEM; - if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { + if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, min_len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); @@ -3736,11 +3738,10 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) */ svm->sev_es.ghcb_sa_sync = sync; svm->sev_es.ghcb_sa_free = true; + svm->sev_es.ghcb_sa_len = min_len; } svm->sev_es.ghcb_sa = scratch_va; - svm->sev_es.ghcb_sa_len = len; - return 0; e_scratch: -- cgit v1.2.3 From f185e05dce6f170f83c4ba602e969b1c3c7a22e6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:32 -0700 Subject: KVM: SEV: WARN if KVM attempts to setup scratch area with min_len==0 Now that all paths in KVM properly validate the length needed for the scratch area, and are guaranteed to pass in a non-zero length, WARN if KVM attempts to configured the scratch area with min_len==0 to guard against future bugs. Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Reviewed-by: Michael Roth Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6072fecfe994..a3e85348ace9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3669,6 +3669,9 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 min_len) u64 scratch_gpa_beg, scratch_gpa_end; void *scratch_va; + if (WARN_ON_ONCE(!min_len)) + goto e_scratch; + scratch_gpa_beg = svm->sev_es.sw_scratch; if (!scratch_gpa_beg) { pr_err("vmgexit: scratch gpa not provided\n"); -- cgit v1.2.3 From ebe4b2dc9cfbfb2d8f665667c4d08f4c6c9bec05 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:33 -0700 Subject: KVM: SEV: Don't explicitly pass PSC buffer to snp_begin_psc() Stop explicitly passing the PSC buffer to snp_begin_psc(): it *must* be the scratch area. This will allow fixing a variety of bugs without further complicating the code. No functional change intended. Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Reviewed-by: Michael Roth Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a3e85348ace9..8577451b82b2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3841,7 +3841,7 @@ struct psc_buffer { struct psc_entry entries[]; } __packed; -static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc); +static int snp_begin_psc(struct vcpu_svm *svm); static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret) { @@ -3883,7 +3883,6 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm) static int snp_complete_one_psc(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct psc_buffer *psc = svm->sev_es.ghcb_sa; if (vcpu->run->hypercall.ret) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); @@ -3893,11 +3892,13 @@ static int snp_complete_one_psc(struct kvm_vcpu *vcpu) __snp_complete_one_psc(svm); /* Handle the next range (if any). */ - return snp_begin_psc(svm, psc); + return snp_begin_psc(svm); } -static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc) +static int snp_begin_psc(struct vcpu_svm *svm) { + struct vcpu_sev_es_state *sev_es = &svm->sev_es; + struct psc_buffer *psc = sev_es->ghcb_sa; struct psc_entry *entries = psc->entries; struct kvm_vcpu *vcpu = &svm->vcpu; struct psc_hdr *hdr = &psc->hdr; @@ -4567,7 +4568,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) if (ret) break; - ret = snp_begin_psc(svm, svm->sev_es.ghcb_sa); + ret = snp_begin_psc(svm); break; case SVM_VMGEXIT_AP_CREATION: ret = sev_snp_ap_creation(svm); -- cgit v1.2.3 From 121d88de56bc5c0ba0ce2f6381af67f948a7e7c1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:34 -0700 Subject: KVM: SEV: Check PSC request indices against the actual size of the buffer When processing Page State Change (PSC) requests, validate the PSC buffer against the effective size of the scratch area, which could be less than the maximum size if the guest provided a pointer that isn't exactly at the start of the GHCB shared buffer. Fixes: 9b54e248d264 ("KVM: SEV: Add support to handle Page State Change VMGEXIT") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Reviewed-by: Michael Roth Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8577451b82b2..6e8cbae2135a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3903,7 +3903,7 @@ static int snp_begin_psc(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; struct psc_hdr *hdr = &psc->hdr; struct psc_entry entry_start; - u16 idx, idx_start, idx_end; + u16 idx, idx_start, idx_end, max_nr_entries; int npages; bool huge; u64 gfn; @@ -3913,6 +3913,19 @@ static int snp_begin_psc(struct vcpu_svm *svm) return 1; } + /* + * GHCB v2 requires the scratch area to reside within the GHCB itself, + * and PSC requests are only supported for GHCB v2+. Thus it should be + * impossible to exceed the max PSC entry count (which is derived from + * the size of the shared GHCB buffer). + */ + max_nr_entries = (sev_es->ghcb_sa_len - sizeof(struct psc_hdr)) / + sizeof(struct psc_entry); + if (WARN_ON_ONCE(max_nr_entries > VMGEXIT_PSC_MAX_COUNT)) { + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); + return 1; + } + next_range: /* There should be no other PSCs in-flight at this point. */ if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) { @@ -3928,7 +3941,7 @@ next_range: idx_start = hdr->cur_entry; idx_end = hdr->end_entry; - if (idx_end >= VMGEXIT_PSC_MAX_COUNT) { + if (idx_end >= max_nr_entries) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR); return 1; } -- cgit v1.2.3 From c8cc238093ca6c99267032f6cfe78f59389f3157 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 1 May 2026 13:22:35 -0700 Subject: KVM: SEV: Use READ_ONCE() when reading entries/indices from PSC buffer Use READ_ONCE() when reading entries/indices from the guest-accessible Page State Change buffer to defend against TOCTOU bugs. Don't bother with READ_ONCE()/WRITE_ONCE() for cases where KVM is writing (and not consuming the result!), as the guest isn't supposed to touch the buffer while it's being processed. I.e. using READ_ONCE() is all about protecting against misbehaving guests. Fixes: 9b54e248d264 ("KVM: SEV: Add support to handle Page State Change VMGEXIT") Cc: stable@vger.kernel.org Reviewed-by: Tom Lendacky Signed-off-by: Sean Christopherson Message-ID: <20260501202250.2115252-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6e8cbae2135a..62b5befe0eed 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3872,9 +3872,9 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm) */ for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight; svm->sev_es.psc_inflight--, idx++) { - struct psc_entry *entry = &entries[idx]; + struct psc_entry entry = READ_ONCE(entries[idx]); - entry->cur_page = entry->pagesize ? 512 : 1; + entries[idx].cur_page = entry.pagesize ? 512 : 1; } hdr->cur_entry = idx; @@ -3938,8 +3938,8 @@ next_range: * validation, so take care to only use validated copies of values used * for things like array indexing. */ - idx_start = hdr->cur_entry; - idx_end = hdr->end_entry; + idx_start = READ_ONCE(hdr->cur_entry); + idx_end = READ_ONCE(hdr->end_entry); if (idx_end >= max_nr_entries) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR); @@ -3948,7 +3948,7 @@ next_range: /* Find the start of the next range which needs processing. */ for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) { - entry_start = entries[idx]; + entry_start = READ_ONCE(entries[idx]); gfn = entry_start.gfn; huge = entry_start.pagesize; @@ -3992,7 +3992,7 @@ next_range: * KVM_HC_MAP_GPA_RANGE exit. */ while (++idx <= idx_end) { - struct psc_entry entry = entries[idx]; + struct psc_entry entry = READ_ONCE(entries[idx]); if (entry.operation != entry_start.operation || entry.gfn != entry_start.gfn + npages || -- cgit v1.2.3 From 1e584c304cfb94a759417130b1fc6d30b30c4cce Mon Sep 17 00:00:00 2001 From: Jingguo Tan Date: Wed, 27 May 2026 10:33:01 +0800 Subject: vsock/virtio: bind uarg before filling zerocopy skb virtio_transport_send_pkt_info() allocates or reuses the zerocopy uarg before entering the send loop, but virtio_transport_alloc_skb() still fills the skb before it inherits that uarg. When fixed-buffer vectored zerocopy hits MAX_SKB_FRAGS, io_sg_from_iter() may partially attach managed frags and return -EMSGSIZE. The rollback path call kfree_skb() to free an skb that carries SKBFL_MANAGED_FRAG_REFS but no uarg, so skb_release_data() falls through to ordinary frag unref. Pass the uarg into virtio_transport_alloc_skb() and bind it immediately before virtio_transport_fill_skb(). This keeps control or no-payload skbs untouched while ensuring success and rollback share one lifetime rule. Fixes: 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") Signed-off-by: Lin Ma Signed-off-by: Rongzhen Cui Signed-off-by: Jingguo Tan Acked-by: Arseniy Krasnov Acked-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260527023301.1075581-1-malin89@huawei.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index b143290a311d..b10666937c49 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -205,6 +205,7 @@ static u16 virtio_transport_get_type(struct sock *sk) static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, size_t payload_len, bool zcopy, + struct ubuf_info *uarg, u32 src_cid, u32 src_port, u32 dst_cid, @@ -245,6 +246,12 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info * if (info->msg && payload_len > 0) { int err; + /* Bind the zerocopy lifetime before filling frags so error + * rollback frees managed fixed-buffer pages through + * the uarg-aware path. + */ + skb_zcopy_set(skb, uarg, NULL); + err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); if (err) goto out; @@ -364,6 +371,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, skb_len = min(max_skb_len, rest_len); skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, + uarg, src_cid, src_port, dst_cid, dst_port); if (!skb) { @@ -371,8 +379,6 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } - skb_zcopy_set(skb, uarg, NULL); - virtio_transport_inc_tx_pkt(vvs, skb); ret = t_ops->send_pkt(skb, info->net); @@ -1183,7 +1189,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!t) return -ENOTCONN; - reply = virtio_transport_alloc_skb(&info, 0, false, + reply = virtio_transport_alloc_skb(&info, 0, false, NULL, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), le64_to_cpu(hdr->src_cid), -- cgit v1.2.3 From f72eed9b84fb771019a955908132410a9ba9ea3f Mon Sep 17 00:00:00 2001 From: Yuqi Xu Date: Wed, 27 May 2026 11:48:15 +0800 Subject: bpf: sockmap: fix tail fragment offset in bpf_msg_push_data When bpf_msg_push_data() inserts data in the middle of a scatterlist entry, it splits the original entry into a left fragment and a right fragment. The right fragment offset is page-local, but the code advances it with `start`, which is the message-global insertion point. For inserts into a non-first SG entry, this over-advances the offset and leaves the split layout inconsistent. Advance the right fragment offset by the fragment-local delta, `start - offset`, which matches the length removed from the front of the original entry. Fixes: 6fff607e2f14 ("bpf: sk_msg program helper bpf_msg_push_data") Cc: stable@kernel.org Reported-by: Yuan Tan Reported-by: Zhengchuan Liang Reported-by: Xin Liu Signed-off-by: Yuqi Xu Signed-off-by: Ren Wei Link: https://patch.msgid.link/8b129d10566aa3eb43f61a8f9757bcf51707d324.1779636774.git.xuyq21@lenovo.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 9590877b0714..80439767e0ee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2869,7 +2869,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, psge->length = start - offset; rsge.length -= psge->length; - rsge.offset += start; + rsge.offset += start - offset; sk_msg_iter_var_next(i); sg_unmark_end(psge); -- cgit v1.2.3 From 9f72412bcf60144f252b0d6205106abf14344abc Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 27 May 2026 13:31:30 +0800 Subject: ipv6: fix possible infinite loop in rt6_fill_node() Sashiko reported this issue [1]. Apply the same fix as commit f8d8ce1b515a ("ipv6: fix possible infinite loop in fib6_info_uses_dev()"). Writers holding tb6_lock can list_del_rcu(&rt->fib6_siblings) without waiting for RCU readers; rt->fib6_siblings.next then still points into the old ring and this softirq-side walker never reaches &rt->fib6_siblings, causing a CPU stall. fib6_del_route() always WRITE_ONCE()s rt->fib6_nsiblings to 0 before list_del_rcu(), so an inside-loop check is a reliable detach signal. [1] https://sashiko.dev/#/patchset/20260526020227.4857-1-jiayuan.chen%40linux.dev Fixes: d9ccb18f83ea ("ipv6: Fix soft lockups in fib6_select_path under high next hop churn") Signed-off-by: Jiayuan Chen Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260527053133.180695-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b106e5fef9cb..dad416fdc585 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5902,6 +5902,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } + if (!READ_ONCE(rt->fib6_nsiblings)) + break; } rcu_read_unlock(); -- cgit v1.2.3 From 9c7da87c2dc860bb17ca1ece942495d28b1ce3b9 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 27 May 2026 13:31:31 +0800 Subject: ipv6: fix possible infinite loop in fib6_select_path() Found while auditing the same pattern Sashiko reported in rt6_fill_node() [1]. Apply the same fix as commit f8d8ce1b515a ("ipv6: fix possible infinite loop in fib6_info_uses_dev()"). Writers holding tb6_lock can list_del_rcu(&first->fib6_siblings) without waiting for RCU readers; first->fib6_siblings.next then still points into the old ring and this softirq-side walker never reaches &first->fib6_siblings as its terminator. fib6_purge_rt() always WRITE_ONCE()s first->fib6_nsiblings to 0 before list_del_rcu(), so an inside-loop check is a reliable detach signal. [1] https://sashiko.dev/#/patchset/20260526020227.4857-1-jiayuan.chen%40linux.dev Fixes: d9ccb18f83ea ("ipv6: Fix soft lockups in fib6_select_path under high next hop churn") Signed-off-by: Jiayuan Chen Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260527053133.180695-2-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dad416fdc585..636f0120d7e3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -481,6 +481,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; + if (!READ_ONCE(first->fib6_nsiblings)) + break; + nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); if (hash > nh_upper_bound) continue; -- cgit v1.2.3 From ff6e798c2eac3ebd0501ad7e796f583fab928de8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 May 2026 19:43:53 +0100 Subject: net: skbuff: fix pskb_carve leaking zcopy pages When SKBFL_MANAGED_FRAG_REFS is set, frag pages are not refcounted but their lifetime is controlled by the attached ubuf_info. To make a copy of the skb_shared_info, we either should clear the flag and reference the frags, or keep the flag and have frags unreferenced. pskb_carve_inside_header() and pskb_carve_inside_nonlinear() don't follow the rule and thus can leak page references. Let's clear SKBFL_MANAGED_FRAG_REFS from the original skb to fix it. It's the simplest way to address it, but there are more performant ways to do that if it ever becomes a problem. Link: https://lore.kernel.org/all/20260523085809.26331-1-nvminh232@clc.fitus.edu.vn/ Fixes: 753f1ca4e1e50 ("net: introduce managed frags infrastructure") Reported-by: Minh Nguyen Reported-by: Willem de Bruijn Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/1e2086aa69217d7f9c8da3d38f5be7160f1b4cd1.1779993185.git.asml.silence@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0d3cc115f2e7..c02f0a507ba8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6823,6 +6823,11 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_copy_from_linear_data_offset(skb, off, data, new_hlen); skb->len -= off; + /* Remove SKBFL_MANAGED_FRAG_REFS instead of trying to honour it + * while refcounting frags below. + */ + skb_zcopy_downgrade_managed(skb); + memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, @@ -6936,6 +6941,11 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, return -ENOMEM; size = SKB_WITH_OVERHEAD(size); + /* Remove SKBFL_MANAGED_FRAG_REFS instead of trying to honour it + * while refcounting frags below. + */ + skb_zcopy_downgrade_managed(skb); + memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { -- cgit v1.2.3 From 072aa0f5c3d8f11f3159037418ec45edce7440b8 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 29 May 2026 13:23:57 +0200 Subject: Revert "ipv6: preserve insertion order for same-scope addresses" Chris Adams reported that preserving insertion order for same-scope addresses is causing SSH connections to be dropped after stopping a VM while running NetworkManager. NetworkManager caches the IPv6 address configuration, when a RA arrives, it determines the list of addresses to configure and checks if the addresses are already in the right order in the kernel. If they aren't, NetworkManager removes and re-adds them to achieve the desired order. As the order changes, NetworkManager is confused and reconfigures the addresses on every update. In addition, this would also affect to cloud tooling that relies on IPv6 addresses order to identify primary and secondaries addresses. This reverts commit cb3de96eea66f5e4a580086c6a1be46e765f97f4. Fixes: cb3de96eea66 ("ipv6: preserve insertion order for same-scope addresses") Reported-by: Chris Adams Closes: https://lore.kernel.org/netdev/20260521135310.GC977@cmadams.net/ Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260529112357.5079-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 2 +- tools/testing/selftests/net/ioam6.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5476b6536eb7..bb84a78b80f6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1013,7 +1013,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) list_for_each(p, &idev->addr_list) { struct inet6_ifaddr *ifa = list_entry(p, struct inet6_ifaddr, if_list); - if (ifp_scope > ipv6_addr_src_scope(&ifa->addr)) + if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) break; } diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index b2b99889942f..845c26dd01a9 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -273,8 +273,8 @@ setup() ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null - ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha link set veth0 up &>/dev/null ip -netns $ioam_node_alpha link set lo up &>/dev/null ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ -- cgit v1.2.3 From f75e3eb08fe31d30a9af6ed80cdd22e6772837e2 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 29 May 2026 19:31:34 +0200 Subject: wireguard: send: append trailer after expanding head With how this is currently written, we add the trailer, zero it out, and then add the header space on. If that header space requires a reallocation + copy, the zeros in the trailer aren't copied, because the skb len hasn't actually been yet expanded to cover that. Instead add the padding at the end of the process rather than at the beginning. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20260529173134.3080773-2-Jason@zx2c4.com Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/send.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 26e09c30d596..67d01478eb76 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -177,16 +177,6 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) trailer_len = padding_len + noise_encrypted_len(0); plaintext_len = skb->len + padding_len; - /* Expand data section to have room for padding and auth tag. */ - num_frags = skb_cow_data(skb, trailer_len, &trailer); - if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) - return false; - - /* Set the padding to zeros, and make sure it and the auth tag are part - * of the skb. - */ - memset(skb_tail_pointer(trailer), 0, padding_len); - /* Expand head section to have room for our header and the network * stack's headers. */ @@ -198,6 +188,16 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) skb_checksum_help(skb))) return false; + /* Expand data section to have room for padding and auth tag. */ + num_frags = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + /* Set the padding to zeros, and make sure it and the auth tag are part + * of the skb. + */ + memset(skb_tail_pointer(trailer), 0, padding_len); + /* Only after checksumming can we safely add on the padding at the end * and the header. */ -- cgit v1.2.3 From 7164d78559b0ff29931a366a840a9e5dd53d4b7c Mon Sep 17 00:00:00 2001 From: Zhenghang Xiao Date: Tue, 26 May 2026 16:53:13 +0800 Subject: drm/gem: fix race between change_handle and handle_delete drm_gem_change_handle_ioctl leaves the old handle live in the IDR during the window between spin_unlock(table_lock) and the final spin_lock(table_lock). A concurrent drm_gem_handle_delete on the old handle succeeds in this window, decrements handle_count to 0, and frees the GEM object while the new handle's IDR entry still references it. NULL the old handle's IDR entry before dropping table_lock so that any concurrent GEM_CLOSE on the old handle sees NULL and returns -EINVAL. Restore the old entry on the prime-bookkeeping error path. Fixes: 5e28b7b94408 ("drm: Set old handle to NULL before prime swap in change_handle") Signed-off-by: Zhenghang Xiao Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20260526085313.26791-1-kipreyyy@gmail.com --- drivers/gpu/drm/drm_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index e3b8a1f353cb..e12cdf91f4dc 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1065,6 +1065,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, goto out_unlock; } + idr_replace(&file_priv->object_idr, NULL, args->handle); spin_unlock(&file_priv->table_lock); if (obj->dma_buf) { @@ -1073,6 +1074,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, if (ret < 0) { spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, handle); + idr_replace(&file_priv->object_idr, obj, args->handle); spin_unlock(&file_priv->table_lock); goto out_unlock; } -- cgit v1.2.3 From 44eeff9bc467bc7d1fec34fc3f6001f385fe462c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 26 May 2026 20:50:43 +0000 Subject: Revert "x86/fpu: Refine and simplify the magic number check during signal return" This reverts dc8aa31a7ac2 ("x86/fpu: Refine and simplify the magic number check during signal return"). The aforementioned commit broke applications that construct signal frames in userspace (such as CRIU and gVisor) if the frame's xstate size is smaller than the kernel's fpstate->user_size. Furthermore, this introduces a critical issue for checkpoint/restore tools like CRIU. If a process is checkpointed while inside a signal handler, its stack contains a signal frame formatted according to the source host's xstate capabilities. If that process is later restored on a destination host with larger xstate capabilities (e.g., a newer CPU with more features enabled, resulting in a larger fpstate->user_size), the kernel will look for FP_XSTATE_MAGIC2 at the destination host's larger user_size offset instead of the offset encoded in the frame's fx_sw->xstate_size. This causes the magic2 check to fail, forcing sigreturn to silently fall back to "FX-only" mode. Upon return from the signal handler, the process's extended state is reset to initial values instead of being restored, leading to silent data corruption. The aforementioned commit cited d877550eaf2d ("x86/fpu: Stop relying on userspace for info to fault in xsave buffer") as justification to stop relying on userspace for the magic number check. However, these two changes are fundamentally different. The last one only changed how much memory the kernel ensures is paged-in before running XRSTOR to prevent an infinite loop. It did not change the signal frame format or how the layout is validated. Reverting this change restores the use of fx_sw->xstate_size for locating magic2 and restores the necessary sanity checks, ensuring that the signal frame remains self-describing and portable. [ bp: Massage commit message. ] Fixes: dc8aa31a7ac2 ("x86/fpu: Refine and simplify the magic number check during signal return") Signed-off-by: Andrei Vagin Signed-off-by: Borislav Petkov (AMD) Acked-by: Chang S. Bae Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20260429000623.3356606-1-avagin@google.com --- arch/x86/kernel/fpu/signal.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index c3ec2512f2bb..20b638c507ca 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -27,14 +27,19 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, struct _fpx_sw_bytes *fx_sw) { + int min_xstate_size = sizeof(struct fxregs_state) + + sizeof(struct xstate_header); void __user *fpstate = fxbuf; unsigned int magic2; if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) return false; - /* Check for the first magic field */ - if (fx_sw->magic1 != FP_XSTATE_MAGIC1) + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > x86_task_fpu(current)->fpstate->user_size || + fx_sw->xstate_size > fx_sw->extended_size) goto setfx; /* @@ -43,7 +48,7 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, * fpstate layout with out copying the extended state information * in the memory layout. */ - if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size))) + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))) return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) -- cgit v1.2.3 From 05d5d79440c2cc0784f91b61f2012753e66be472 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 30 May 2026 12:25:36 +0200 Subject: Revert "gpib: cb7210: Fix region leak when request_irq fails" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2eae90a457baa0048a96ed38ad93090ee38c8b2f. Turns out not to be correct. Link: https://lore.kernel.org/r/PpNUbGhrvT8I_KayoDvQYI2PYjmMw1QEkuVBDZz2PwBsVVgPkBXJarc2mBM0IhiH3AQG0GtgqEsDRXNj3yUKEDBaZa25u73pAjvcE6vfRsg=@protonmail.com Reported-by: Dominik Karol Piątkowski Cc: Mark Brown Cc: Hongling Zeng Cc: Hongling Zeng Signed-off-by: Greg Kroah-Hartman --- drivers/gpib/cb7210/cb7210.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpib/cb7210/cb7210.c b/drivers/gpib/cb7210/cb7210.c index 673b5bfe2e7d..6dd8637c5964 100644 --- a/drivers/gpib/cb7210/cb7210.c +++ b/drivers/gpib/cb7210/cb7210.c @@ -1049,8 +1049,7 @@ static int cb_isa_attach(struct gpib_board *board, const struct gpib_board_confi if (!request_region(config->ibbase, cb7210_iosize, DRV_NAME)) { dev_err(board->gpib_dev, "ioports starting at 0x%x are already in use\n", config->ibbase); - retval = -EBUSY; - goto err_release_region; + return -EBUSY; } nec_priv->iobase = config->ibbase; cb_priv->fifo_iobase = nec7210_iobase(cb_priv); @@ -1063,16 +1062,11 @@ static int cb_isa_attach(struct gpib_board *board, const struct gpib_board_confi // install interrupt handler if (request_irq(config->ibirq, cb7210_interrupt, isr_flags, DRV_NAME, board)) { dev_err(board->gpib_dev, "failed to obtain IRQ %d\n", config->ibirq); - retval = -EBUSY; - goto err_release_region; + return -EBUSY; } cb_priv->irq = config->ibirq; return cb7210_init(cb_priv, board); - -err_release_region: - release_region(nec7210_iobase(cb_priv), cb7210_iosize); - return retval; } static void cb_isa_detach(struct gpib_board *board) -- cgit v1.2.3 From 1d774589f924056b8403e271fdecaf9a803a50fc Mon Sep 17 00:00:00 2001 From: Alexis Bouzigues Date: Fri, 29 May 2026 09:28:14 -0500 Subject: i2c: virtio: mark device ready before registering the adapter virtio_i2c_probe() synchronously probes child i2c drivers on the bus, but peripherals may use the bus at probe for tasks like reading a chip id. The vhost-user-i2c backend stalls at such probes unless DRIVER_OK is already set before the virtqueue is first kicked. Set DRIVER_OK explicitly before i2c_add_adapter(), as done for the same reason in commit f5866db64f34 ("virtio_console: enable VQs early") and commit 71e4b8bf0482 ("virtio_rpmsg: set DRIVER_OK before using device"). Signed-off-by: Alexis Bouzigues Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-virtio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c index 7b0b0bff8000..5da6fef92bec 100644 --- a/drivers/i2c/busses/i2c-virtio.c +++ b/drivers/i2c/busses/i2c-virtio.c @@ -222,6 +222,8 @@ static int virtio_i2c_probe(struct virtio_device *vdev) */ ACPI_COMPANION_SET(&vi->adap.dev, ACPI_COMPANION(vdev->dev.parent)); + virtio_device_ready(vdev); + ret = i2c_add_adapter(&vi->adap); if (ret) virtio_i2c_del_vqs(vdev); -- cgit v1.2.3 From 171022c7d594c133a45f92357a2a91475edabe20 Mon Sep 17 00:00:00 2001 From: Henri A Date: Wed, 20 May 2026 10:25:44 -0400 Subject: media: rc: igorplugusb: fix control request setup packet Commit eac69475b01f ("media: rc: igorplugusb: heed coherency rules") changed the control request storage from an embedded struct to an allocated pointer so it can obey DMA coherency rules. However, the driver still passes &ir->request to usb_fill_control_urb(). That points the URB setup packet at the pointer field itself rather than at the allocated struct usb_ctrlrequest. USB core then interprets pointer bytes as the setup packet. This can produce an invalid bRequestType and trigger the control direction warning reported by syzbot: usb 2-1: BOGUS control dir, pipe 80003580 doesn't match bRequestType 0 Pass ir->request itself as the setup packet. Fixes: eac69475b01f ("media: rc: igorplugusb: heed coherency rules") Reported-by: syzbot+11f0e4f957c7c3bf3d51@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=11f0e4f957c7c3bf3d51 Tested-by: syzbot+11f0e4f957c7c3bf3d51@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Assisted-by: Codex:GPT-5.5 Signed-off-by: Henri A Signed-off-by: Sean Young Signed-off-by: Hans Verkuil --- drivers/media/rc/igorplugusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/igorplugusb.c b/drivers/media/rc/igorplugusb.c index 3e10f6fe89f8..b5117ee9f5fa 100644 --- a/drivers/media/rc/igorplugusb.c +++ b/drivers/media/rc/igorplugusb.c @@ -184,7 +184,7 @@ static int igorplugusb_probe(struct usb_interface *intf, if (!ir->buf_in) goto fail; usb_fill_control_urb(ir->urb, udev, - usb_rcvctrlpipe(udev, 0), (uint8_t *)&ir->request, + usb_rcvctrlpipe(udev, 0), (uint8_t *)ir->request, ir->buf_in, MAX_PACKET, igorplugusb_callback, ir); usb_make_path(udev, ir->phys, sizeof(ir->phys)); -- cgit v1.2.3 From e43ffb69e0438cddd72aaa30898b4dc446f664f8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 May 2026 15:14:24 -0700 Subject: Linux 7.1-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f056c921ea9c..d8da45199699 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* -- cgit v1.2.3