From 2619da73bb2f10d88f7e1087125c40144fdf0987 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 Mar 2026 20:49:55 +0100 Subject: KVM: x86: Use __DECLARE_FLEX_ARRAY() for UAPI structures with VLAs Commit 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") broke the userspace API for C++. These structures ending in VLAs are typically a *header*, which can be followed by an arbitrary number of entries. Userspace typically creates a larger structure with some non-zero number of entries, for example in QEMU's kvm_arch_get_supported_msr_feature(): struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; } msr_data = {}; While that works in C, it fails in C++ with an error like: flexible array member 'kvm_msrs::entries' not at end of 'struct msr_data' Fix this by using __DECLARE_FLEX_ARRAY() for the VLA, which uses [0] for C++ compilation. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Cc: stable@vger.kernel.org Signed-off-by: David Woodhouse Link: https://patch.msgid.link/3abaf6aefd6e5efeff3b860ac38421d9dec908db.camel@infradead.org [sean: tag for stable@] Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80364d4dbebb..3f0d8d3c3daf 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -542,7 +543,7 @@ struct kvm_coalesced_mmio { struct kvm_coalesced_mmio_ring { __u32 first, last; - struct kvm_coalesced_mmio coalesced_mmio[]; + __DECLARE_FLEX_ARRAY(struct kvm_coalesced_mmio, coalesced_mmio); }; #define KVM_COALESCED_MMIO_MAX \ @@ -592,7 +593,7 @@ struct kvm_clear_dirty_log { /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; - __u8 sigset[]; + __DECLARE_FLEX_ARRAY(__u8, sigset); }; /* for KVM_TPR_ACCESS_REPORTING */ @@ -1051,7 +1052,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing { __u32 nr; __u32 flags; - struct kvm_irq_routing_entry entries[]; + __DECLARE_FLEX_ARRAY(struct kvm_irq_routing_entry, entries); }; #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) @@ -1142,7 +1143,7 @@ struct kvm_dirty_tlb { struct kvm_reg_list { __u64 n; /* number of regs */ - __u64 reg[]; + __DECLARE_FLEX_ARRAY(__u64, reg); }; struct kvm_one_reg { @@ -1608,7 +1609,7 @@ struct kvm_stats_desc { #ifdef __KERNEL__ char name[KVM_STATS_NAME_SIZE]; #else - char name[]; + __DECLARE_FLEX_ARRAY(char, name); #endif }; -- cgit v1.2.3 From 641f6fda143b879da1515f821ee475073678cf2a Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 29 Jan 2026 20:53:20 +0530 Subject: soc: qcom: pd-mapper: Fix element length in servreg_loc_pfr_req_ei It looks element length declared in servreg_loc_pfr_req_ei for reason not matching servreg_loc_pfr_req's reason field due which we could observe decoding error on PD crash. qmi_decode_string_elem: String len 81 >= Max Len 65 Fix this by matching with servreg_loc_pfr_req's reason field. Fixes: 1ebcde047c54 ("soc: qcom: add pd-mapper implementation") Signed-off-by: Mukesh Ojha Reviewed-by: Dmitry Baryshkov Tested-by: Nikita Travkin Link: https://lore.kernel.org/r/20260129152320.3658053-2-mukesh.ojha@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/pdr_internal.h | 2 +- drivers/soc/qcom/qcom_pdr_msg.c | 2 +- include/linux/soc/qcom/pdr.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/soc/qcom/pdr_internal.h b/drivers/soc/qcom/pdr_internal.h index 039508c1bbf7..047c0160b617 100644 --- a/drivers/soc/qcom/pdr_internal.h +++ b/drivers/soc/qcom/pdr_internal.h @@ -84,7 +84,7 @@ struct servreg_set_ack_resp { struct servreg_loc_pfr_req { char service[SERVREG_NAME_LENGTH + 1]; - char reason[257]; + char reason[SERVREG_PFR_LENGTH + 1]; }; struct servreg_loc_pfr_resp { diff --git a/drivers/soc/qcom/qcom_pdr_msg.c b/drivers/soc/qcom/qcom_pdr_msg.c index ca98932140d8..02022b11ecf0 100644 --- a/drivers/soc/qcom/qcom_pdr_msg.c +++ b/drivers/soc/qcom/qcom_pdr_msg.c @@ -325,7 +325,7 @@ const struct qmi_elem_info servreg_loc_pfr_req_ei[] = { }, { .data_type = QMI_STRING, - .elem_len = SERVREG_NAME_LENGTH + 1, + .elem_len = SERVREG_PFR_LENGTH + 1, .elem_size = sizeof(char), .array_type = VAR_LEN_ARRAY, .tlv_type = 0x02, diff --git a/include/linux/soc/qcom/pdr.h b/include/linux/soc/qcom/pdr.h index 83a8ea612e69..2b7691e47c2a 100644 --- a/include/linux/soc/qcom/pdr.h +++ b/include/linux/soc/qcom/pdr.h @@ -5,6 +5,7 @@ #include #define SERVREG_NAME_LENGTH 64 +#define SERVREG_PFR_LENGTH 256 struct pdr_service; struct pdr_handle; -- cgit v1.2.3 From a0e0c2f8c5f32b675f58e25a9338283cedb5ad2b Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Fri, 20 Mar 2026 11:06:17 +0000 Subject: reset: spacemit: k3: Decouple composite reset lines Instead of grouping several different reset lines into one composite reset, decouple them to individual ones which make it more aligned with underlying hardware. And for DWC USB driver, it will match well with the number of the reset property in the DT bindings. The DWC3 USB host controller in K3 SoC has three reset lines - AHB, VCC, PHY. The PCIe controller also has three reset lines - DBI, Slave, Master. Also three reset lines each for UCIE and RCPU block. As an agreement with maintainer, the reset IDs has been rearranged as contiguous number but keep most part unchanged to avoid break patches which already sent to mailing list. The changes of DT binding header file and reset driver are merged together as one single commit to avoid git-bisect breakage. Fixes: 938ce3b16582 ("reset: spacemit: Add SpacemiT K3 reset driver") Fixes: 216e0a5e98e5 ("dt-bindings: soc: spacemit: Add K3 reset support and IDs") Signed-off-by: Yixun Lan Reviewed-by: Philipp Zabel Acked-by: Conor Dooley Signed-off-by: Philipp Zabel --- drivers/reset/spacemit/reset-spacemit-k3.c | 60 +++++++++++++++----------- include/dt-bindings/reset/spacemit,k3-resets.h | 48 +++++++++++++++------ 2 files changed, 72 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/reset/spacemit/reset-spacemit-k3.c b/drivers/reset/spacemit/reset-spacemit-k3.c index e9e32e4c1ba5..9841f5e057b2 100644 --- a/drivers/reset/spacemit/reset-spacemit-k3.c +++ b/drivers/reset/spacemit/reset-spacemit-k3.c @@ -112,16 +112,21 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_SDH0] = RESET_DATA(APMU_SDH0_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_SDH1] = RESET_DATA(APMU_SDH1_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_SDH2] = RESET_DATA(APMU_SDH2_CLK_RES_CTRL, 0, BIT(1)), - [RESET_APMU_USB2] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(1)|BIT(2)|BIT(3)), - [RESET_APMU_USB3_PORTA] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(5)|BIT(6)|BIT(7)), - [RESET_APMU_USB3_PORTB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(9)|BIT(10)|BIT(11)), - [RESET_APMU_USB3_PORTC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(13)|BIT(14)|BIT(15)), - [RESET_APMU_USB3_PORTD] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, - BIT(17)|BIT(18)|BIT(19)), + [RESET_APMU_USB2_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(1)), + [RESET_APMU_USB2_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(2)), + [RESET_APMU_USB2_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(3)), + [RESET_APMU_USB3_A_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(5)), + [RESET_APMU_USB3_A_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(6)), + [RESET_APMU_USB3_A_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(7)), + [RESET_APMU_USB3_B_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(9)), + [RESET_APMU_USB3_B_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(10)), + [RESET_APMU_USB3_B_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(11)), + [RESET_APMU_USB3_C_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(13)), + [RESET_APMU_USB3_C_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(14)), + [RESET_APMU_USB3_C_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(15)), + [RESET_APMU_USB3_D_AHB] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(17)), + [RESET_APMU_USB3_D_VCC] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(18)), + [RESET_APMU_USB3_D_PHY] = RESET_DATA(APMU_USB_CLK_RES_CTRL, 0, BIT(19)), [RESET_APMU_QSPI] = RESET_DATA(APMU_QSPI_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_QSPI_BUS] = RESET_DATA(APMU_QSPI_CLK_RES_CTRL, 0, BIT(0)), [RESET_APMU_DMA] = RESET_DATA(APMU_DMA_CLK_RES_CTRL, 0, BIT(0)), @@ -151,10 +156,12 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_CPU7_SW] = RESET_DATA(APMU_PMU_CC2_AP, BIT(26), 0), [RESET_APMU_C1_MPSUB_SW] = RESET_DATA(APMU_PMU_CC2_AP, BIT(28), 0), [RESET_APMU_MPSUB_DBG] = RESET_DATA(APMU_PMU_CC2_AP, BIT(29), 0), - [RESET_APMU_UCIE] = RESET_DATA(APMU_UCIE_CTRL, - BIT(1) | BIT(2) | BIT(3), 0), - [RESET_APMU_RCPU] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, - BIT(3) | BIT(2) | BIT(0)), + [RESET_APMU_UCIE_IP] = RESET_DATA(APMU_UCIE_CTRL, BIT(1), 0), + [RESET_APMU_UCIE_HOT] = RESET_DATA(APMU_UCIE_CTRL, BIT(2), 0), + [RESET_APMU_UCIE_MON] = RESET_DATA(APMU_UCIE_CTRL, BIT(3), 0), + [RESET_APMU_RCPU_AUDIO_SYS] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(0)), + [RESET_APMU_RCPU_MCU_CORE] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(2)), + [RESET_APMU_RCPU_AUDIO_APMU] = RESET_DATA(APMU_RCPU_CLK_RES_CTRL, 0, BIT(3)), [RESET_APMU_DSI4LN2_ESCCLK] = RESET_DATA(APMU_LCD_CLK_RES_CTRL3, 0, BIT(3)), [RESET_APMU_DSI4LN2_LCD_SW] = RESET_DATA(APMU_LCD_CLK_RES_CTRL3, 0, BIT(4)), [RESET_APMU_DSI4LN2_LCD_MCLK] = RESET_DATA(APMU_LCD_CLK_RES_CTRL4, 0, BIT(9)), @@ -164,16 +171,21 @@ static const struct ccu_reset_data k3_apmu_resets[] = { [RESET_APMU_UFS_ACLK] = RESET_DATA(APMU_UFS_CLK_RES_CTRL, 0, BIT(0)), [RESET_APMU_EDP0] = RESET_DATA(APMU_LCD_EDP_CTRL, 0, BIT(0)), [RESET_APMU_EDP1] = RESET_DATA(APMU_LCD_EDP_CTRL, 0, BIT(16)), - [RESET_APMU_PCIE_PORTA] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTB] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTC] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTD] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, - BIT(5) | BIT(4) | BIT(3)), - [RESET_APMU_PCIE_PORTE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, - BIT(5) | BIT(4) | BIT(3)), + [RESET_APMU_PCIE_A_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(3)), + [RESET_APMU_PCIE_A_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(4)), + [RESET_APMU_PCIE_A_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_A, 0, BIT(5)), + [RESET_APMU_PCIE_B_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(3)), + [RESET_APMU_PCIE_B_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(4)), + [RESET_APMU_PCIE_B_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_B, 0, BIT(5)), + [RESET_APMU_PCIE_C_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(3)), + [RESET_APMU_PCIE_C_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(4)), + [RESET_APMU_PCIE_C_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_C, 0, BIT(5)), + [RESET_APMU_PCIE_D_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(3)), + [RESET_APMU_PCIE_D_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(4)), + [RESET_APMU_PCIE_D_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_D, 0, BIT(5)), + [RESET_APMU_PCIE_E_DBI] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(3)), + [RESET_APMU_PCIE_E_SLAVE] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(4)), + [RESET_APMU_PCIE_E_MASTER] = RESET_DATA(APMU_PCIE_CLK_RES_CTRL_E, 0, BIT(5)), [RESET_APMU_EMAC0] = RESET_DATA(APMU_EMAC0_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_EMAC1] = RESET_DATA(APMU_EMAC1_CLK_RES_CTRL, 0, BIT(1)), [RESET_APMU_EMAC2] = RESET_DATA(APMU_EMAC2_CLK_RES_CTRL, 0, BIT(1)), diff --git a/include/dt-bindings/reset/spacemit,k3-resets.h b/include/dt-bindings/reset/spacemit,k3-resets.h index 79ac1c22b7b5..dc1ef009ba79 100644 --- a/include/dt-bindings/reset/spacemit,k3-resets.h +++ b/include/dt-bindings/reset/spacemit,k3-resets.h @@ -97,11 +97,11 @@ #define RESET_APMU_SDH0 13 #define RESET_APMU_SDH1 14 #define RESET_APMU_SDH2 15 -#define RESET_APMU_USB2 16 -#define RESET_APMU_USB3_PORTA 17 -#define RESET_APMU_USB3_PORTB 18 -#define RESET_APMU_USB3_PORTC 19 -#define RESET_APMU_USB3_PORTD 20 +#define RESET_APMU_USB2_AHB 16 +#define RESET_APMU_USB2_VCC 17 +#define RESET_APMU_USB2_PHY 18 +#define RESET_APMU_USB3_A_AHB 19 +#define RESET_APMU_USB3_A_VCC 20 #define RESET_APMU_QSPI 21 #define RESET_APMU_QSPI_BUS 22 #define RESET_APMU_DMA 23 @@ -132,8 +132,8 @@ #define RESET_APMU_CPU7_SW 48 #define RESET_APMU_C1_MPSUB_SW 49 #define RESET_APMU_MPSUB_DBG 50 -#define RESET_APMU_UCIE 51 -#define RESET_APMU_RCPU 52 +#define RESET_APMU_USB3_A_PHY 51 /* USB3 A */ +#define RESET_APMU_USB3_B_AHB 52 #define RESET_APMU_DSI4LN2_ESCCLK 53 #define RESET_APMU_DSI4LN2_LCD_SW 54 #define RESET_APMU_DSI4LN2_LCD_MCLK 55 @@ -143,16 +143,40 @@ #define RESET_APMU_UFS_ACLK 59 #define RESET_APMU_EDP0 60 #define RESET_APMU_EDP1 61 -#define RESET_APMU_PCIE_PORTA 62 -#define RESET_APMU_PCIE_PORTB 63 -#define RESET_APMU_PCIE_PORTC 64 -#define RESET_APMU_PCIE_PORTD 65 -#define RESET_APMU_PCIE_PORTE 66 +#define RESET_APMU_USB3_B_VCC 62 /* USB3 B */ +#define RESET_APMU_USB3_B_PHY 63 +#define RESET_APMU_USB3_C_AHB 64 +#define RESET_APMU_USB3_C_VCC 65 +#define RESET_APMU_USB3_C_PHY 66 #define RESET_APMU_EMAC0 67 #define RESET_APMU_EMAC1 68 #define RESET_APMU_EMAC2 69 #define RESET_APMU_ESPI_MCLK 70 #define RESET_APMU_ESPI_SCLK 71 +#define RESET_APMU_USB3_D_AHB 72 /* USB3 D */ +#define RESET_APMU_USB3_D_VCC 73 +#define RESET_APMU_USB3_D_PHY 74 +#define RESET_APMU_UCIE_IP 75 +#define RESET_APMU_UCIE_HOT 76 +#define RESET_APMU_UCIE_MON 77 +#define RESET_APMU_RCPU_AUDIO_SYS 78 +#define RESET_APMU_RCPU_MCU_CORE 79 +#define RESET_APMU_RCPU_AUDIO_APMU 80 +#define RESET_APMU_PCIE_A_DBI 81 +#define RESET_APMU_PCIE_A_SLAVE 82 +#define RESET_APMU_PCIE_A_MASTER 83 +#define RESET_APMU_PCIE_B_DBI 84 +#define RESET_APMU_PCIE_B_SLAVE 85 +#define RESET_APMU_PCIE_B_MASTER 86 +#define RESET_APMU_PCIE_C_DBI 87 +#define RESET_APMU_PCIE_C_SLAVE 88 +#define RESET_APMU_PCIE_C_MASTER 89 +#define RESET_APMU_PCIE_D_DBI 90 +#define RESET_APMU_PCIE_D_SLAVE 91 +#define RESET_APMU_PCIE_D_MASTER 92 +#define RESET_APMU_PCIE_E_DBI 93 +#define RESET_APMU_PCIE_E_SLAVE 94 +#define RESET_APMU_PCIE_E_MASTER 95 /* DCIU resets*/ #define RESET_DCIU_HDMA 0 -- cgit v1.2.3 From 45065a5095c7773fb98c35d60c20c3b513540597 Mon Sep 17 00:00:00 2001 From: Akshai Murari Date: Fri, 27 Mar 2026 06:54:45 +0000 Subject: Input: add keycodes for contextual AI usages (HUTRR119) HUTRR119 introduces new usages for keys intended to invoke AI agents based on the current context. These are useful with the increasing number of operating systems with integrated Large Language Models Add new key definitions for KEY_ACTION_ON_SELECTION, KEY_CONTEXTUAL_INSERT and KEY_CONTEXTUAL_QUERY Signed-off-by: Akshai Murari Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 6 ++++++ drivers/hid/hid-input.c | 3 +++ include/uapi/linux/input-event-codes.h | 4 ++++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c5865b0d2aaa..a8d2b36a7852 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -990,6 +990,9 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0x0c, 0x01c9, "ALContactSync" }, { 0x0c, 0x01ca, "ALNavigation" }, { 0x0c, 0x01cb, "ALContextawareDesktopAssistant" }, + { 0x0c, 0x01cc, "ALActionOnSelection" }, + { 0x0c, 0x01cd, "ALContextualInsertion" }, + { 0x0c, 0x01ce, "ALContextualQuery" }, { 0x0c, 0x0200, "GenericGUIApplicationControls" }, { 0x0c, 0x0201, "ACNew" }, { 0x0c, 0x0202, "ACOpen" }, @@ -3375,6 +3378,9 @@ static const char *keys[KEY_MAX + 1] = { [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", + [KEY_ACTION_ON_SELECTION] = "ActionOnSelection", + [KEY_CONTEXTUAL_INSERT] = "ContextualInsert", + [KEY_CONTEXTUAL_QUERY] = "ContextualQuery", [KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev", [KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext", [KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup", diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 8fc20df99b97..ce9c3251287d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1227,6 +1227,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x1bc: map_key_clear(KEY_MESSENGER); break; case 0x1bd: map_key_clear(KEY_INFO); break; case 0x1cb: map_key_clear(KEY_ASSISTANT); break; + case 0x1cc: map_key_clear(KEY_ACTION_ON_SELECTION); break; + case 0x1cd: map_key_clear(KEY_CONTEXTUAL_INSERT); break; + case 0x1ce: map_key_clear(KEY_CONTEXTUAL_QUERY); break; case 0x201: map_key_clear(KEY_NEW); break; case 0x202: map_key_clear(KEY_OPEN); break; case 0x203: map_key_clear(KEY_CLOSE); break; diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4bdb6a165987..3528168f7c6d 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -643,6 +643,10 @@ #define KEY_EPRIVACY_SCREEN_ON 0x252 #define KEY_EPRIVACY_SCREEN_OFF 0x253 +#define KEY_ACTION_ON_SELECTION 0x254 /* AL Action on Selection (HUTRR119) */ +#define KEY_CONTEXTUAL_INSERT 0x255 /* AL Contextual Insertion (HUTRR119) */ +#define KEY_CONTEXTUAL_QUERY 0x256 /* AL Contextual Query (HUTRR119) */ + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 -- cgit v1.2.3 From 88c4bd90725557796c15878b7cb70066e9e6b5ab Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Thu, 3 Apr 2025 15:10:51 +0200 Subject: firmware: thead: Fix buffer overflow and use standard endian macros Addresses two issues in the TH1520 AON firmware protocol driver: 1. Fix a potential buffer overflow where the code used unsafe pointer arithmetic to access the 'mode' field through the 'resource' pointer with an offset. This was flagged by Smatch static checker as: "buffer overflow 'data' 2 <= 3" 2. Replace custom RPC_SET_BE* and RPC_GET_BE* macros with standard kernel endianness conversion macros (cpu_to_be16, etc.) for better portability and maintainability. The functionality was re-tested with the GPU power-up sequence, confirming the GPU powers up correctly and the driver probes successfully. [ 12.702370] powervr ffef400000.gpu: [drm] loaded firmware powervr/rogue_36.52.104.182_v1.fw [ 12.711043] powervr ffef400000.gpu: [drm] FW version v1.0 (build 6645434 OS) [ 12.719787] [drm] Initialized powervr 1.0.0 for ffef400000.gpu on minor 0 Fixes: e4b3cbd840e5 ("firmware: thead: Add AON firmware protocol driver") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/17a0ccce-060b-4b9d-a3c4-8d5d5823b1c9@stanley.mountain/ Signed-off-by: Michal Wilczynski Reviewed-by: Dan Carpenter Acked-by: Drew Fustini Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/firmware/thead,th1520-aon.c | 7 +-- include/linux/firmware/thead/thead,th1520-aon.h | 74 ------------------------- 2 files changed, 3 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/drivers/firmware/thead,th1520-aon.c b/drivers/firmware/thead,th1520-aon.c index a35fd5e2a07f..d7f19b5b5f46 100644 --- a/drivers/firmware/thead,th1520-aon.c +++ b/drivers/firmware/thead,th1520-aon.c @@ -170,10 +170,9 @@ int th1520_aon_power_update(struct th1520_aon_chan *aon_chan, u16 rsrc, hdr->func = TH1520_AON_PM_FUNC_SET_RESOURCE_POWER_MODE; hdr->size = TH1520_AON_RPC_MSG_NUM; - RPC_SET_BE16(&msg.resource, 0, rsrc); - RPC_SET_BE16(&msg.resource, 2, - (power_on ? TH1520_AON_PM_PW_MODE_ON : - TH1520_AON_PM_PW_MODE_OFF)); + msg.resource = cpu_to_be16(rsrc); + msg.mode = cpu_to_be16(power_on ? TH1520_AON_PM_PW_MODE_ON : + TH1520_AON_PM_PW_MODE_OFF); ret = th1520_aon_call_rpc(aon_chan, &msg); if (ret) diff --git a/include/linux/firmware/thead/thead,th1520-aon.h b/include/linux/firmware/thead/thead,th1520-aon.h index dae132b66873..d81f5f6f5b90 100644 --- a/include/linux/firmware/thead/thead,th1520-aon.h +++ b/include/linux/firmware/thead/thead,th1520-aon.h @@ -97,80 +97,6 @@ struct th1520_aon_rpc_ack_common { #define RPC_GET_SVC_FLAG_ACK_TYPE(MESG) (((MESG)->svc & 0x40) >> 6) #define RPC_SET_SVC_FLAG_ACK_TYPE(MESG, ACK) ((MESG)->svc |= (ACK) << 6) -#define RPC_SET_BE64(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 7] = _set_data & 0xFF; \ - data[_offset + 6] = (_set_data & 0xFF00) >> 8; \ - data[_offset + 5] = (_set_data & 0xFF0000) >> 16; \ - data[_offset + 4] = (_set_data & 0xFF000000) >> 24; \ - data[_offset + 3] = (_set_data & 0xFF00000000) >> 32; \ - data[_offset + 2] = (_set_data & 0xFF0000000000) >> 40; \ - data[_offset + 1] = (_set_data & 0xFF000000000000) >> 48; \ - data[_offset + 0] = (_set_data & 0xFF00000000000000) >> 56; \ - } while (0) - -#define RPC_SET_BE32(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 3] = (_set_data) & 0xFF; \ - data[_offset + 2] = (_set_data & 0xFF00) >> 8; \ - data[_offset + 1] = (_set_data & 0xFF0000) >> 16; \ - data[_offset + 0] = (_set_data & 0xFF000000) >> 24; \ - } while (0) - -#define RPC_SET_BE16(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - u64 _set_data = (SET_DATA); \ - data[_offset + 1] = (_set_data) & 0xFF; \ - data[_offset + 0] = (_set_data & 0xFF00) >> 8; \ - } while (0) - -#define RPC_SET_U8(MESG, OFFSET, SET_DATA) \ - do { \ - u8 *data = (u8 *)(MESG); \ - data[OFFSET] = (SET_DATA) & 0xFF; \ - } while (0) - -#define RPC_GET_BE64(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u32 *)(PTR) = \ - (data[_offset + 7] | data[_offset + 6] << 8 | \ - data[_offset + 5] << 16 | data[_offset + 4] << 24 | \ - data[_offset + 3] << 32 | data[_offset + 2] << 40 | \ - data[_offset + 1] << 48 | data[_offset + 0] << 56); \ - } while (0) - -#define RPC_GET_BE32(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u32 *)(PTR) = \ - (data[_offset + 3] | data[_offset + 2] << 8 | \ - data[_offset + 1] << 16 | data[_offset + 0] << 24); \ - } while (0) - -#define RPC_GET_BE16(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - u64 _offset = (OFFSET); \ - *(u16 *)(PTR) = (data[_offset + 1] | data[_offset + 0] << 8); \ - } while (0) - -#define RPC_GET_U8(MESG, OFFSET, PTR) \ - do { \ - u8 *data = (u8 *)(MESG); \ - *(u8 *)(PTR) = (data[OFFSET]); \ - } while (0) - /* * Defines for SC PM Power Mode */ -- cgit v1.2.3 From 77facb35227c421467cdb49268de433168c2dcef Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Thu, 2 Apr 2026 17:23:16 -0500 Subject: net: increase IP_TUNNEL_RECURSION_LIMIT to 5 In configurations with multiple tunnel layers and MPLS lwtunnel routing, a single tunnel hop can increment the counter beyond this limit. This causes packets to be dropped with the "Dead loop on virtual device" message even when a routing loop doesn't exist. Increase IP_TUNNEL_RECURSION_LIMIT from 4 to 5 to handle this use-case. Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions") Link: https://lore.kernel.org/netdev/88deb91b-ef1b-403c-8eeb-0f971f27e34f@redhat.com/ Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260402222401.3408368-1-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1f577a4f8ce9..d708b66e55cd 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -32,7 +32,7 @@ * recursion involves route lookups and full IP output, consuming much * more stack per level, so a lower limit is needed. */ -#define IP_TUNNEL_RECURSION_LIMIT 4 +#define IP_TUNNEL_RECURSION_LIMIT 5 /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) -- cgit v1.2.3 From 16cbec24897624051b324aa3a85859c38ca65fde Mon Sep 17 00:00:00 2001 From: Stanislav Kinsburskii Date: Tue, 24 Mar 2026 23:57:40 +0000 Subject: mshv: Fix infinite fault loop on permission-denied GPA intercepts Prevent infinite fault loops when guests access memory regions without proper permissions. Currently, mshv_handle_gpa_intercept() attempts to remap pages for all faults on movable memory regions, regardless of whether the access type is permitted. When a guest writes to a read-only region, the remap succeeds but the region remains read-only, causing immediate re-fault and spinning the vCPU indefinitely. Validate intercept access type against region permissions before attempting remaps. Reject writes to non-writable regions and executes to non-executable regions early, returning false to let the VMM handle the intercept appropriately. This also closes a potential DoS vector where malicious guests could intentionally trigger these fault loops to consume host resources. Fixes: b9a66cd5ccbb ("mshv: Add support for movable memory regions") Signed-off-by: Stanislav Kinsburskii Reviewed-by: Anirudh Rayabharam (Microsoft) Signed-off-by: Wei Liu --- drivers/hv/mshv_root_main.c | 15 ++++++++++++--- include/hyperv/hvgdk_mini.h | 6 ++++++ include/hyperv/hvhdk.h | 4 ++-- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 6f42423f7faa..c8e5523a52b6 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -630,7 +630,7 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) { struct mshv_partition *p = vp->vp_partition; struct mshv_mem_region *region; - bool ret; + bool ret = false; u64 gfn; #if defined(CONFIG_X86_64) struct hv_x64_memory_intercept_message *msg = @@ -641,6 +641,8 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) (struct hv_arm64_memory_intercept_message *) vp->vp_intercept_msg_page->u.payload; #endif + enum hv_intercept_access_type access_type = + msg->header.intercept_access_type; gfn = HVPFN_DOWN(msg->guest_physical_address); @@ -648,12 +650,19 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp) if (!region) return false; + if (access_type == HV_INTERCEPT_ACCESS_WRITE && + !(region->hv_map_flags & HV_MAP_GPA_WRITABLE)) + goto put_region; + + if (access_type == HV_INTERCEPT_ACCESS_EXECUTE && + !(region->hv_map_flags & HV_MAP_GPA_EXECUTABLE)) + goto put_region; + /* Only movable memory ranges are supported for GPA intercepts */ if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) ret = mshv_region_handle_gfn_fault(region, gfn); - else - ret = false; +put_region: mshv_region_put(region); return ret; diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 1823a290a7b7..f9600f87186a 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -1533,4 +1533,10 @@ struct hv_mmio_write_input { u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; } __packed; +enum hv_intercept_access_type { + HV_INTERCEPT_ACCESS_READ = 0, + HV_INTERCEPT_ACCESS_WRITE = 1, + HV_INTERCEPT_ACCESS_EXECUTE = 2 +}; + #endif /* _HV_HVGDK_MINI_H */ diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 245f3db53bf1..5e83d3714966 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -779,7 +779,7 @@ struct hv_x64_intercept_message_header { u32 vp_index; u8 instruction_length:4; u8 cr8:4; /* Only set for exo partitions */ - u8 intercept_access_type; + u8 intercept_access_type; /* enum hv_intercept_access_type */ union hv_x64_vp_execution_state execution_state; struct hv_x64_segment_register cs_segment; u64 rip; @@ -825,7 +825,7 @@ union hv_arm64_vp_execution_state { struct hv_arm64_intercept_message_header { u32 vp_index; u8 instruction_length; - u8 intercept_access_type; + u8 intercept_access_type; /* enum hv_intercept_access_type */ union hv_arm64_vp_execution_state execution_state; u64 pc; u64 cpsr; -- cgit v1.2.3 From adfc80dd0d7831335b5105fb3d8747094bf42878 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: prctl: rename branch landing pad implementation functions to be more explicit Per Linus' comments about the unreadability of abbreviations such as "indir_br_lp", rename the three prctl() implementation functions to be more explicit. This involves renaming "indir_br_lp_status" in the function names to "branch_landing_pad_state". While here, add _prctl_ into the function names, following the speculation control prctl implementation functions. Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Cc: Linus Torvalds Cc: Mark Brown Signed-off-by: Paul Walmsley --- arch/riscv/kernel/usercfi.c | 16 ++++++++-------- include/linux/cpu.h | 6 +++--- kernel/sys.c | 15 ++++++++------- 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 9052171c1a8c..04ab1eb8df29 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -457,7 +457,8 @@ int arch_lock_shadow_stack_status(struct task_struct *task, return 0; } -int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, + unsigned long __user *state) { unsigned long fcfi_status = 0; @@ -467,10 +468,10 @@ int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *sta /* indirect branch tracking is enabled on the task or not */ fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); - return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; + return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; } -int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) +int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { bool enable_indir_lp = false; @@ -482,24 +483,23 @@ int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) return -EINVAL; /* Reject unknown flags */ - if (status & ~PR_INDIR_BR_LP_ENABLE) + if (state & ~PR_INDIR_BR_LP_ENABLE) return -EINVAL; - enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE); + enable_indir_lp = (state & PR_INDIR_BR_LP_ENABLE); set_indir_lp_status(t, enable_indir_lp); return 0; } -int arch_lock_indir_br_lp_status(struct task_struct *task, - unsigned long arg) +int arch_prctl_lock_branch_landing_pad_state(struct task_struct *task) { /* * If indirect branch tracking is not supported or not enabled on task, * nothing to lock here */ if (!is_user_lpad_enabled() || - !is_indir_lp_enabled(task) || arg != 0) + !is_indir_lp_enabled(task)) return -EINVAL; set_indir_lp_lock(task, true); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8239cd95a005..9b6b0d87fdb0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -229,8 +229,8 @@ static inline bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v) #define smt_mitigations SMT_MITIGATIONS_OFF #endif -int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status); -int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status); -int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status); +int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, unsigned long __user *state); +int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state); +int arch_prctl_lock_branch_landing_pad_state(struct task_struct *t); #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/sys.c b/kernel/sys.c index c86eba9aa7e9..a5e0c187bbbf 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2388,17 +2388,18 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st return -EINVAL; } -int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +int __weak arch_prctl_get_branch_landing_pad_state(struct task_struct *t, + unsigned long __user *state) { return -EINVAL; } -int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) +int __weak arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { return -EINVAL; } -int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status) +int __weak arch_prctl_lock_branch_landing_pad_state(struct task_struct *t) { return -EINVAL; } @@ -2891,17 +2892,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_INDIR_BR_LP_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_get_indir_br_lp_status(me, (unsigned long __user *)arg2); + error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg2); break; case PR_SET_INDIR_BR_LP_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; - error = arch_set_indir_br_lp_status(me, arg2); + error = arch_prctl_set_branch_landing_pad_state(me, arg2); break; case PR_LOCK_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - error = arch_lock_indir_br_lp_status(me, arg2); + error = arch_prctl_lock_branch_landing_pad_state(me); break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); -- cgit v1.2.3 From 08ee1559052be302f1d3752f48360b89517d9f8d Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 4 Apr 2026 18:40:58 -0600 Subject: prctl: cfi: change the branch landing pad prctl()s to be more descriptive Per Linus' comments requesting the replacement of "INDIR_BR_LP" in the indirect branch tracking prctl()s with something more readable, and suggesting the use of the speculation control prctl()s as an exemplar, reimplement the prctl()s and related constants that control per-task forward-edge control flow integrity. This primarily involves two changes. First, the prctls are restructured to resemble the style of the speculative execution workaround control prctls PR_{GET,SET}_SPECULATION_CTRL, to make them easier to extend in the future. Second, the "indir_br_lp" abbrevation is expanded to "branch_landing_pads" to be less telegraphic. The kselftest and documentation is adjusted accordingly. Link: https://lore.kernel.org/linux-riscv/CAHk-=whhSLGZAx3N5jJpb4GLFDqH_QvS07D+6BnkPWmCEzTAgw@mail.gmail.com/ Cc: Deepak Gupta Cc: Linus Torvalds Cc: Mark Brown Signed-off-by: Paul Walmsley --- Documentation/arch/riscv/zicfilp.rst | 63 +++++++++++++--------- arch/riscv/kernel/usercfi.c | 15 +++--- include/uapi/linux/prctl.h | 37 ++++++------- kernel/sys.c | 23 ++++---- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 36 ++++++------- tools/testing/selftests/riscv/cfi/cfitests.c | 4 +- 6 files changed, 91 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/Documentation/arch/riscv/zicfilp.rst b/Documentation/arch/riscv/zicfilp.rst index 78a3e01ff68c..ab7d8e62ddaf 100644 --- a/Documentation/arch/riscv/zicfilp.rst +++ b/Documentation/arch/riscv/zicfilp.rst @@ -76,34 +76,49 @@ the program. 4. prctl() enabling -------------------- -:c:macro:`PR_SET_INDIR_BR_LP_STATUS` / :c:macro:`PR_GET_INDIR_BR_LP_STATUS` / -:c:macro:`PR_LOCK_INDIR_BR_LP_STATUS` are three prctls added to manage indirect -branch tracking. These prctls are architecture-agnostic and return -EINVAL if -the underlying functionality is not supported. +Per-task indirect branch tracking state can be monitored and +controlled via the :c:macro:`PR_GET_CFI` and :c:macro:`PR_SET_CFI` +``prctl()` arguments (respectively), by supplying +:c:macro:`PR_CFI_BRANCH_LANDING_PADS` as the second argument. These +are architecture-agnostic, and will return -EINVAL if the underlying +functionality is not supported. -* prctl(PR_SET_INDIR_BR_LP_STATUS, unsigned long arg) +* prctl(:c:macro:`PR_SET_CFI`, :c:macro:`PR_CFI_BRANCH_LANDING_PADS`, unsigned long arg) -If arg1 is :c:macro:`PR_INDIR_BR_LP_ENABLE` and if CPU supports -``zicfilp`` then the kernel will enable indirect branch tracking for the -task. The dynamic loader can issue this :c:macro:`prctl` once it has -determined that all the objects loaded in the address space support -indirect branch tracking. Additionally, if there is a `dlopen` to an -object which wasn't compiled with ``zicfilp``, the dynamic loader can -issue this prctl with arg1 set to 0 (i.e. :c:macro:`PR_INDIR_BR_LP_ENABLE` -cleared). - -* prctl(PR_GET_INDIR_BR_LP_STATUS, unsigned long * arg) +arg is a bitmask. -Returns the current status of indirect branch tracking. If enabled -it'll return :c:macro:`PR_INDIR_BR_LP_ENABLE` - -* prctl(PR_LOCK_INDIR_BR_LP_STATUS, unsigned long arg) +If :c:macro:`PR_CFI_ENABLE` is set in arg, and the CPU supports +``zicfilp``, then the kernel will enable indirect branch tracking for +the task. The dynamic loader can issue this ``prctl()`` once it has +determined that all the objects loaded in the address space support +indirect branch tracking. + +Indirect branch tracking state can also be locked once enabled. This +prevents the task from subsequently disabling it. This is done by +setting the bit :c:macro:`PR_CFI_LOCK` in arg. Either indirect branch +tracking must already be enabled for the task, or the bit +:c:macro:`PR_CFI_ENABLE` must also be set in arg. This is intended +for environments that wish to run with a strict security posture that +do not wish to load objects without ``zicfilp`` support. + +Indirect branch tracking can also be disabled for the task, assuming +that it has not previously been enabled and locked. If there is a +``dlopen()`` to an object which wasn't compiled with ``zicfilp``, the +dynamic loader can issue this ``prctl()`` with arg set to +:c:macro:`PR_CFI_DISABLE`. Disabling indirect branch tracking for the +task is not possible if it has previously been enabled and locked. + + +* prctl(:c:macro:`PR_GET_CFI`, :c:macro:`PR_CFI_BRANCH_LANDING_PADS`, unsigned long * arg) + +Returns the current status of indirect branch tracking into a bitmask +stored into the memory location pointed to by arg. The bitmask will +have the :c:macro:`PR_CFI_ENABLE` bit set if indirect branch tracking +is currently enabled for the task, and if it is locked, will +additionally have the :c:macro:`PR_CFI_LOCK` bit set. If indirect +branch tracking is currently disabled for the task, the +:c:macro:`PR_CFI_DISABLE` bit will be set. -Locks the current status of indirect branch tracking on the task. User -space may want to run with a strict security posture and wouldn't want -loading of objects without ``zicfilp`` support in them, to disallow -disabling of indirect branch tracking. In this case, user space can -use this prctl to lock the current settings. 5. violations related to indirect branch tracking -------------------------------------------------- diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 04ab1eb8df29..2c535737511d 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -465,16 +465,14 @@ int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, if (!is_user_lpad_enabled()) return -EINVAL; - /* indirect branch tracking is enabled on the task or not */ - fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); + fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE); + fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0); return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; } int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) { - bool enable_indir_lp = false; - if (!is_user_lpad_enabled()) return -EINVAL; @@ -482,12 +480,13 @@ int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long if (is_indir_lp_locked(t)) return -EINVAL; - /* Reject unknown flags */ - if (state & ~PR_INDIR_BR_LP_ENABLE) + if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE))) + return -EINVAL; + + if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE) return -EINVAL; - enable_indir_lp = (state & PR_INDIR_BR_LP_ENABLE); - set_indir_lp_status(t, enable_indir_lp); + set_indir_lp_status(t, !!(state & PR_CFI_ENABLE)); return 0; } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 55b0446fff9d..b6ec6f693719 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -397,30 +397,23 @@ struct prctl_mm_map { # define PR_RSEQ_SLICE_EXT_ENABLE 0x01 /* - * Get the current indirect branch tracking configuration for the current - * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + * Get or set the control flow integrity (CFI) configuration for the + * current thread. + * + * Some per-thread control flow integrity settings are not yet + * controlled through this prctl(); see for example + * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS */ -#define PR_GET_INDIR_BR_LP_STATUS 80 - +#define PR_GET_CFI 80 +#define PR_SET_CFI 81 /* - * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will - * enable cpu feature for user thread, to track all indirect branches and ensure - * they land on arch defined landing pad instruction. - * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. - * arch64 - If enabled, an indirect branch must land on a BTI instruction. - * riscv - If enabled, an indirect branch must land on an lpad instruction. - * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect - * branches will no more be tracked by cpu to land on arch defined landing pad - * instruction. - */ -#define PR_SET_INDIR_BR_LP_STATUS 81 -# define PR_INDIR_BR_LP_ENABLE (1UL << 0) - -/* - * Prevent further changes to the specified indirect branch tracking - * configuration. All bits may be locked via this call, including - * undefined bits. + * Forward-edge CFI variants (excluding ARM64 BTI, which has its own + * prctl()s). */ -#define PR_LOCK_INDIR_BR_LP_STATUS 82 +#define PR_CFI_BRANCH_LANDING_PADS 0 +/* Return and control values for PR_{GET,SET}_CFI */ +# define PR_CFI_ENABLE _BITUL(0) +# define PR_CFI_DISABLE _BITUL(1) +# define PR_CFI_LOCK _BITUL(2) #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index a5e0c187bbbf..62e842055cc9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2889,20 +2889,23 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = rseq_slice_extension_prctl(arg2, arg3); break; - case PR_GET_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + case PR_GET_CFI: + if (arg2 != PR_CFI_BRANCH_LANDING_PADS) return -EINVAL; - error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg2); - break; - case PR_SET_INDIR_BR_LP_STATUS: - if (arg3 || arg4 || arg5) + if (arg4 || arg5) return -EINVAL; - error = arch_prctl_set_branch_landing_pad_state(me, arg2); + error = arch_prctl_get_branch_landing_pad_state(me, (unsigned long __user *)arg3); break; - case PR_LOCK_INDIR_BR_LP_STATUS: - if (arg2 || arg3 || arg4 || arg5) + case PR_SET_CFI: + if (arg2 != PR_CFI_BRANCH_LANDING_PADS) return -EINVAL; - error = arch_prctl_lock_branch_landing_pad_state(me); + if (arg4 || arg5) + return -EINVAL; + error = arch_prctl_set_branch_landing_pad_state(me, arg3); + if (error) + break; + if (arg3 & PR_CFI_LOCK && !(arg3 & PR_CFI_DISABLE)) + error = arch_prctl_lock_branch_landing_pad_state(me); break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 55b0446fff9d..560f99bc4782 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -397,30 +397,24 @@ struct prctl_mm_map { # define PR_RSEQ_SLICE_EXT_ENABLE 0x01 /* - * Get the current indirect branch tracking configuration for the current - * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + * Get or set the control flow integrity (CFI) configuration for the + * current thread. + * + * Some per-thread control flow integrity settings are not yet + * controlled through this prctl(); see for example + * PR_{GET,SET,LOCK}_SHADOW_STACK_STATUS */ -#define PR_GET_INDIR_BR_LP_STATUS 80 - +#define PR_GET_CFI 80 +#define PR_SET_CFI 81 /* - * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will - * enable cpu feature for user thread, to track all indirect branches and ensure - * they land on arch defined landing pad instruction. - * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. - * arch64 - If enabled, an indirect branch must land on a BTI instruction. - * riscv - If enabled, an indirect branch must land on an lpad instruction. - * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect - * branches will no more be tracked by cpu to land on arch defined landing pad - * instruction. + * Forward-edge CFI variants (excluding ARM64 BTI, which has its own + * prctl()s). */ -#define PR_SET_INDIR_BR_LP_STATUS 81 -# define PR_INDIR_BR_LP_ENABLE (1UL << 0) +#define PR_CFI_BRANCH_LANDING_PADS 0 +/* Return and control values for PR_{GET,SET}_CFI */ +# define PR_CFI_ENABLE _BITUL(0) +# define PR_CFI_DISABLE _BITUL(1) +# define PR_CFI_LOCK _BITUL(2) -/* - * Prevent further changes to the specified indirect branch tracking - * configuration. All bits may be locked via this call, including - * undefined bits. - */ -#define PR_LOCK_INDIR_BR_LP_STATUS 82 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/testing/selftests/riscv/cfi/cfitests.c b/tools/testing/selftests/riscv/cfi/cfitests.c index 0dac74b8553c..39d097b6881f 100644 --- a/tools/testing/selftests/riscv/cfi/cfitests.c +++ b/tools/testing/selftests/riscv/cfi/cfitests.c @@ -146,11 +146,11 @@ int main(int argc, char *argv[]) * pads for user mode except lighting up a bit in senvcfg via a prctl. * Enable landing pad support throughout the execution of the test binary. */ - ret = my_syscall5(__NR_prctl, PR_GET_INDIR_BR_LP_STATUS, &lpad_status, 0, 0, 0); + ret = my_syscall5(__NR_prctl, PR_GET_CFI, PR_CFI_BRANCH_LANDING_PADS, &lpad_status, 0, 0); if (ret) ksft_exit_fail_msg("Get landing pad status failed with %d\n", ret); - if (!(lpad_status & PR_INDIR_BR_LP_ENABLE)) + if (!(lpad_status & PR_CFI_ENABLE)) ksft_exit_fail_msg("Landing pad is not enabled, should be enabled via glibc\n"); ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &ss_status, 0, 0, 0); -- cgit v1.2.3 From 1ee1605138fc94cc8f8f273321dd2471c64977f9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:52 +0200 Subject: xsk: respect tailroom for ZC setups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-buffer XDP stores information about frags in skb_shared_info that sits at the tailroom of a packet. The storage space is reserved via xdp_data_hard_end(): ((xdp)->data_hard_start + (xdp)->frame_sz - \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) and then we refer to it via macro below: static inline struct skb_shared_info * xdp_get_shared_info_from_buff(const struct xdp_buff *xdp) { return (struct skb_shared_info *)xdp_data_hard_end(xdp); } Currently we do not respect this tailroom space in multi-buffer AF_XDP ZC scenario. To address this, introduce xsk_pool_get_tailroom() and use it within xsk_pool_get_rx_frame_size() which is used in ZC drivers to configure length of HW Rx buffer. Typically drivers on Rx Hw buffers side work on 128 byte alignment so let us align the value returned by xsk_pool_get_rx_frame_size() in order to avoid addressing this on driver's side. This addresses the fact that idpf uses mentioned function *before* pool->dev being set so we were at risk that after subtracting tailroom we would not provide 128-byte aligned value to HW. Since xsk_pool_get_rx_frame_size() is actively used in xsk_rcv_check() and __xsk_rcv(), add a variant of this routine that will not include 128 byte alignment and therefore old behavior is preserved. Reviewed-by: Björn Töpel Acked-by: Stanislav Fomichev Fixes: 24ea50127ecf ("xsk: support mbuf on ZC RX") Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-3-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock_drv.h | 23 ++++++++++++++++++++++- net/xdp/xsk.c | 4 ++-- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 6b9ebae2dc95..46797645a0c2 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -41,16 +41,37 @@ static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) return XDP_PACKET_HEADROOM + pool->headroom; } +static inline u32 xsk_pool_get_tailroom(bool mbuf) +{ + return mbuf ? SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : 0; +} + static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { return pool->chunk_size; } -static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) +static inline u32 __xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) +{ + u32 frame_size = __xsk_pool_get_rx_frame_size(pool); + struct xdp_umem *umem = pool->umem; + bool mbuf; + + /* Reserve tailroom only for zero-copy pools that opted into + * multi-buffer. The reserved area is used for skb_shared_info, + * matching the XDP core's xdp_data_hard_end() layout. + */ + mbuf = pool->dev && (umem->flags & XDP_UMEM_SG_FLAG); + frame_size -= xsk_pool_get_tailroom(mbuf); + + return ALIGN_DOWN(frame_size, 128); +} + static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) { return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6149f6a79897..c8ef9e427c9c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -239,7 +239,7 @@ static u32 xsk_copy_xdp(void *to, void **from, u32 to_len, static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool); + u32 frame_size = __xsk_pool_get_rx_frame_size(xs->pool); void *copy_from = xsk_copy_xdp_start(xdp), *copy_to; u32 from_len, meta_len, rem, num_desc; struct xdp_buff_xsk *xskb; @@ -338,7 +338,7 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; - if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { + if (len > __xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { xs->rx_dropped++; return -ENOSPC; } -- cgit v1.2.3 From 93e84fe45b752d17a5a46b306ed78f0133bbc719 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 2 Apr 2026 17:49:53 +0200 Subject: xsk: fix XDP_UMEM_SG_FLAG issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xp_assign_dev_shared() is missing XDP_USE_SG being propagated to flags so set it in order to preserve mtu check that is supposed to be done only when no multi-buffer setup is in picture. Also, this flag has the same value as XDP_UMEM_TX_SW_CSUM so we could get unexpected SG setups for software Tx checksums. Since csum flag is UAPI, modify value of XDP_UMEM_SG_FLAG. Fixes: d609f3d228a8 ("xsk: add multi-buffer support for sockets sharing umem") Reviewed-by: Björn Töpel Signed-off-by: Maciej Fijalkowski Link: https://patch.msgid.link/20260402154958.562179-4-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock.h | 2 +- net/xdp/xsk_buff_pool.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 23e8861e8b25..ebac60a3d8a1 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,7 +14,7 @@ #include #include -#define XDP_UMEM_SG_FLAG (1 << 1) +#define XDP_UMEM_SG_FLAG BIT(3) struct net_device; struct xsk_queue; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 37b7a68b89b3..729602a3cec0 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -247,6 +247,10 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct xdp_umem *umem = umem_xs->umem; flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY; + + if (umem->flags & XDP_UMEM_SG_FLAG) + flags |= XDP_USE_SG; + if (umem_xs->pool->uses_need_wakeup) flags |= XDP_USE_NEED_WAKEUP; -- cgit v1.2.3 From f8dca15a1b190787bbd03285304b569631160eda Mon Sep 17 00:00:00 2001 From: Tuan Do Date: Fri, 3 Apr 2026 00:33:17 -0700 Subject: netfilter: nft_ct: fix use-after-free in timeout object destroy nft_ct_timeout_obj_destroy() frees the timeout object with kfree() immediately after nf_ct_untimeout(), without waiting for an RCU grace period. Concurrent packet processing on other CPUs may still hold RCU-protected references to the timeout object obtained via rcu_dereference() in nf_ct_timeout_data(). Add an rcu_head to struct nf_ct_timeout and use kfree_rcu() to defer freeing until after an RCU grace period, matching the approach already used in nfnetlink_cttimeout.c. KASAN report: BUG: KASAN: slab-use-after-free in nf_conntrack_tcp_packet+0x1381/0x29d0 Read of size 4 at addr ffff8881035fe19c by task exploit/80 Call Trace: nf_conntrack_tcp_packet+0x1381/0x29d0 nf_conntrack_in+0x612/0x8b0 nf_hook_slow+0x70/0x100 __ip_local_out+0x1b2/0x210 tcp_sendmsg_locked+0x722/0x1580 __sys_sendto+0x2d8/0x320 Allocated by task 75: nft_ct_timeout_obj_init+0xf6/0x290 nft_obj_init+0x107/0x1b0 nf_tables_newobj+0x680/0x9c0 nfnetlink_rcv_batch+0xc29/0xe00 Freed by task 26: nft_obj_destroy+0x3f/0xa0 nf_tables_trans_destroy_work+0x51c/0x5c0 process_one_work+0x2c4/0x5a0 Fixes: 7e0b2b57f01d ("netfilter: nft_ct: add ct timeout support") Cc: stable@vger.kernel.org Signed-off-by: Tuan Do Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack_timeout.h | 1 + net/netfilter/nft_ct.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 9fdaba911de6..3a66d4abb6d6 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -14,6 +14,7 @@ struct nf_ct_timeout { __u16 l3num; const struct nf_conntrack_l4proto *l4proto; + struct rcu_head rcu; char data[]; }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 128ff8155b5d..04c74ccf9b84 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1020,7 +1020,7 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, nf_queue_nf_hook_drop(ctx->net); nf_ct_untimeout(ctx->net, timeout); nf_ct_netns_put(ctx->net, ctx->family); - kfree(priv->timeout); + kfree_rcu(priv->timeout, rcu); } static int nft_ct_timeout_obj_dump(struct sk_buff *skb, -- cgit v1.2.3 From 936206e3f6ff411581e615e930263d6f8b78df9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Apr 2026 17:00:01 +0200 Subject: netfilter: nfnetlink_queue: make hash table per queue Sharing a global hash table among all queues is tempting, but it can cause crash: BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue] [..] nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue] nfnetlink_rcv_msg+0x46a/0x930 kmem_cache_alloc_node_noprof+0x11e/0x450 struct nf_queue_entry is freed via kfree, but parallel cpu can still encounter such an nf_queue_entry when walking the list. Alternative fix is to free the nf_queue_entry via kfree_rcu() instead, but as we have to alloc/free for each skb this will cause more mem pressure. Cc: Scott Mitchell Fixes: e19079adcd26 ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table") Signed-off-by: Florian Westphal --- include/net/netfilter/nf_queue.h | 1 - net/netfilter/nfnetlink_queue.c | 139 ++++++++++++++------------------------- 2 files changed, 49 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 45eb26b2e95b..d17035d14d96 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -23,7 +23,6 @@ struct nf_queue_entry { struct nf_hook_state state; bool nf_ct_is_unconfirmed; u16 size; /* sizeof(entry) + saved route keys */ - u16 queue_num; /* extra space to store route keys */ }; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 47f7f62906e2..8e02f84784da 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -49,8 +49,8 @@ #endif #define NFQNL_QMAX_DEFAULT 1024 -#define NFQNL_HASH_MIN 1024 -#define NFQNL_HASH_MAX 1048576 +#define NFQNL_HASH_MIN 8 +#define NFQNL_HASH_MAX 32768 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len * includes the header length. Thus, the maximum packet length that we @@ -60,29 +60,10 @@ */ #define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN) -/* Composite key for packet lookup: (net, queue_num, packet_id) */ -struct nfqnl_packet_key { - possible_net_t net; - u32 packet_id; - u16 queue_num; -} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */ - -/* Global rhashtable - one for entire system, all netns */ -static struct rhashtable nfqnl_packet_map __read_mostly; - -/* Helper to initialize composite key */ -static inline void nfqnl_init_key(struct nfqnl_packet_key *key, - struct net *net, u32 packet_id, u16 queue_num) -{ - memset(key, 0, sizeof(*key)); - write_pnet(&key->net, net); - key->packet_id = packet_id; - key->queue_num = queue_num; -} - struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ - struct rcu_head rcu; + struct rhashtable nfqnl_packet_map; + struct rcu_work rwork; u32 peer_portid; unsigned int queue_maxlen; @@ -106,6 +87,7 @@ struct nfqnl_instance { typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); +static struct workqueue_struct *nfq_cleanup_wq __read_mostly; static unsigned int nfnl_queue_net_id __read_mostly; #define INSTANCE_BUCKETS 16 @@ -124,34 +106,10 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num) return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS; } -/* Extract composite key from nf_queue_entry for hashing */ -static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct nf_queue_entry *entry = data; - struct nfqnl_packet_key key; - - nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num); - - return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed); -} - -/* Compare stack-allocated key against entry */ -static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg, - const void *obj) -{ - const struct nfqnl_packet_key *key = arg->key; - const struct nf_queue_entry *entry = obj; - - return !net_eq(entry->state.net, read_pnet(&key->net)) || - entry->queue_num != key->queue_num || - entry->id != key->packet_id; -} - static const struct rhashtable_params nfqnl_rhashtable_params = { .head_offset = offsetof(struct nf_queue_entry, hash_node), - .key_len = sizeof(struct nfqnl_packet_key), - .obj_hashfn = nfqnl_packet_obj_hashfn, - .obj_cmpfn = nfqnl_packet_obj_cmpfn, + .key_offset = offsetof(struct nf_queue_entry, id), + .key_len = sizeof(u32), .automatic_shrinking = true, .min_size = NFQNL_HASH_MIN, .max_size = NFQNL_HASH_MAX, @@ -190,6 +148,10 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); + err = rhashtable_init(&inst->nfqnl_packet_map, &nfqnl_rhashtable_params); + if (err < 0) + goto out_free; + spin_lock(&q->instances_lock); if (instance_lookup(q, queue_num)) { err = -EEXIST; @@ -210,6 +172,8 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) out_unlock: spin_unlock(&q->instances_lock); + rhashtable_destroy(&inst->nfqnl_packet_map); +out_free: kfree(inst); return ERR_PTR(err); } @@ -217,15 +181,18 @@ out_unlock: static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data); -static void -instance_destroy_rcu(struct rcu_head *head) +static void instance_destroy_work(struct work_struct *work) { - struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, - rcu); + struct nfqnl_instance *inst; + inst = container_of(to_rcu_work(work), struct nfqnl_instance, + rwork); rcu_read_lock(); nfqnl_flush(inst, NULL, 0); rcu_read_unlock(); + + rhashtable_destroy(&inst->nfqnl_packet_map); + kfree(inst); module_put(THIS_MODULE); } @@ -234,7 +201,9 @@ static void __instance_destroy(struct nfqnl_instance *inst) { hlist_del_rcu(&inst->hlist); - call_rcu(&inst->rcu, instance_destroy_rcu); + + INIT_RCU_WORK(&inst->rwork, instance_destroy_work); + queue_rcu_work(nfq_cleanup_wq, &inst->rwork); } static void @@ -250,9 +219,7 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) { int err; - entry->queue_num = queue->queue_num; - - err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node, + err = rhashtable_insert_fast(&queue->nfqnl_packet_map, &entry->hash_node, nfqnl_rhashtable_params); if (unlikely(err)) return err; @@ -266,23 +233,19 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) static void __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) { - rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node, + rhashtable_remove_fast(&queue->nfqnl_packet_map, &entry->hash_node, nfqnl_rhashtable_params); list_del(&entry->list); queue->queue_total--; } static struct nf_queue_entry * -find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id, - struct net *net) +find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) { - struct nfqnl_packet_key key; struct nf_queue_entry *entry; - nfqnl_init_key(&key, net, id, queue->queue_num); - spin_lock_bh(&queue->lock); - entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key, + entry = rhashtable_lookup_fast(&queue->nfqnl_packet_map, &id, nfqnl_rhashtable_params); if (entry) @@ -1531,7 +1494,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, verdict = ntohl(vhdr->verdict); - entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net); + entry = find_dequeue_entry(queue, ntohl(vhdr->id)); if (entry == NULL) return -ENOENT; @@ -1880,40 +1843,38 @@ static int __init nfnetlink_queue_init(void) { int status; - status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params); - if (status < 0) - return status; + nfq_cleanup_wq = alloc_ordered_workqueue("nfq_workqueue", 0); + if (!nfq_cleanup_wq) + return -ENOMEM; status = register_pernet_subsys(&nfnl_queue_net_ops); - if (status < 0) { - pr_err("failed to register pernet ops\n"); - goto cleanup_rhashtable; - } + if (status < 0) + goto cleanup_pernet_subsys; - netlink_register_notifier(&nfqnl_rtnl_notifier); - status = nfnetlink_subsys_register(&nfqnl_subsys); - if (status < 0) { - pr_err("failed to create netlink socket\n"); - goto cleanup_netlink_notifier; - } + status = netlink_register_notifier(&nfqnl_rtnl_notifier); + if (status < 0) + goto cleanup_rtnl_notifier; status = register_netdevice_notifier(&nfqnl_dev_notifier); - if (status < 0) { - pr_err("failed to register netdevice notifier\n"); - goto cleanup_netlink_subsys; - } + if (status < 0) + goto cleanup_dev_notifier; + + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) + goto cleanup_nfqnl_subsys; nf_register_queue_handler(&nfqh); return status; -cleanup_netlink_subsys: - nfnetlink_subsys_unregister(&nfqnl_subsys); -cleanup_netlink_notifier: +cleanup_nfqnl_subsys: + unregister_netdevice_notifier(&nfqnl_dev_notifier); +cleanup_dev_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); +cleanup_rtnl_notifier: unregister_pernet_subsys(&nfnl_queue_net_ops); -cleanup_rhashtable: - rhashtable_destroy(&nfqnl_packet_map); +cleanup_pernet_subsys: + destroy_workqueue(nfq_cleanup_wq); return status; } @@ -1924,9 +1885,7 @@ static void __exit nfnetlink_queue_fini(void) nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_queue_net_ops); - - rhashtable_destroy(&nfqnl_packet_map); - + destroy_workqueue(nfq_cleanup_wq); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } -- cgit v1.2.3 From 52f657e34d7b21b47434d9d8b26fa7f6778b63a0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Apr 2026 13:18:57 -0700 Subject: x86: shadow stacks: proper error handling for mmap lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 김영민 reports that shstk_pop_sigframe() doesn't check for errors from mmap_read_lock_killable(), which is a silly oversight, and also shows that we haven't marked those functions with "__must_check", which would have immediately caught it. So let's fix both issues. Reported-by: 김영민 Acked-by: Oleg Nesterov Acked-by: Dave Hansen Acked-by: Rick Edgecombe Signed-off-by: Linus Torvalds --- arch/x86/kernel/shstk.c | 3 ++- include/linux/mmap_lock.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index 978232b6d48d..ff8edea8511b 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -351,7 +351,8 @@ static int shstk_pop_sigframe(unsigned long *ssp) need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp; if (need_to_check_vma) - mmap_read_lock_killable(current->mm); + if (mmap_read_lock_killable(current->mm)) + return -EINTR; err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); if (unlikely(err)) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 93eca48bc443..04b8f61ece5d 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -546,7 +546,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) __mmap_lock_trace_acquire_returned(mm, true, true); } -static inline int mmap_write_lock_killable(struct mm_struct *mm) +static inline int __must_check mmap_write_lock_killable(struct mm_struct *mm) { int ret; @@ -593,7 +593,7 @@ static inline void mmap_read_lock(struct mm_struct *mm) __mmap_lock_trace_acquire_returned(mm, false, true); } -static inline int mmap_read_lock_killable(struct mm_struct *mm) +static inline int __must_check mmap_read_lock_killable(struct mm_struct *mm) { int ret; @@ -603,7 +603,7 @@ static inline int mmap_read_lock_killable(struct mm_struct *mm) return ret; } -static inline bool mmap_read_trylock(struct mm_struct *mm) +static inline bool __must_check mmap_read_trylock(struct mm_struct *mm) { bool ret; -- cgit v1.2.3 From 146d4ab94cf129ee06cd467cb5c71368a6b5bad6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 8 Apr 2026 13:12:32 +0100 Subject: rxrpc: Fix call removal to use RCU safe deletion Fix rxrpc call removal from the rxnet->calls list to use list_del_rcu() rather than list_del_init() to prevent stuffing up reading /proc/net/rxrpc/calls from potentially getting into an infinite loop. This, however, means that list_empty() no longer works on an entry that's been deleted from the list, making it harder to detect prior deletion. Fix this by: Firstly, make rxrpc_destroy_all_calls() only dump the first ten calls that are unexpectedly still on the list. Limiting the number of steps means there's no need to call cond_resched() or to remove calls from the list here, thereby eliminating the need for rxrpc_put_call() to check for that. rxrpc_put_call() can then be fixed to unconditionally delete the call from the list as it is the only place that the deletion occurs. Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing") Closes: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com Signed-off-by: David Howells cc: Marc Dionne cc: Jeffrey Altman cc: Linus Torvalds cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/call_object.c | 24 +++++++++--------------- 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 869f97c9bf73..a826cd80007b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -347,7 +347,7 @@ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ - E_(rxrpc_call_see_zap, "SEE zap ") + E_(rxrpc_call_see_still_live, "SEE !still-l") #define rxrpc_txqueue_traces \ EM(rxrpc_txqueue_await_reply, "AWR") \ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 918f41d97a2f..59329cfe1532 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -654,11 +654,9 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) if (dead) { ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); - if (!list_empty(&call->link)) { - spin_lock(&rxnet->call_lock); - list_del_init(&call->link); - spin_unlock(&rxnet->call_lock); - } + spin_lock(&rxnet->call_lock); + list_del_rcu(&call->link); + spin_unlock(&rxnet->call_lock); rxrpc_cleanup_call(call); } @@ -730,24 +728,20 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { - spin_lock(&rxnet->call_lock); + int shown = 0; - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, - struct rxrpc_call, link); - _debug("Zapping call %p", call); + spin_lock(&rxnet->call_lock); - rxrpc_see_call(call, rxrpc_call_see_zap); - list_del_init(&call->link); + list_for_each_entry(call, &rxnet->calls, link) { + rxrpc_see_call(call, rxrpc_call_see_still_live); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, refcount_read(&call->ref), rxrpc_call_states[__rxrpc_call_state(call)], call->flags, call->events); - spin_unlock(&rxnet->call_lock); - cond_resched(); - spin_lock(&rxnet->call_lock); + if (++shown >= 10) + break; } spin_unlock(&rxnet->call_lock); -- cgit v1.2.3 From b33f5741bb187db8ff32e8f5b96def77cc94dfca Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 8 Apr 2026 13:12:34 +0100 Subject: rxrpc: Fix use of wrong skb when comparing queued RESP challenge serial In rxrpc_post_response(), the code should be comparing the challenge serial number from the cached response before deciding to switch to a newer response, but looks at the newer packet private data instead, rendering the comparison always false. Fix this by switching to look at the older packet. Fix further[1] to substitute the new packet in place of the old one if newer and also to release whichever we don't use. Fixes: 5800b1cf3fd8 ("rxrpc: Allow CHALLENGEs to the passed to the app for a RESPONSE") Signed-off-by: Alok Tiwari Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://sashiko.dev/#/patchset/20260319150150.4189381-1-dhowells%40redhat.com [1] Link: https://patch.msgid.link/20260408121252.2249051-7-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/conn_event.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index a826cd80007b..f7f559204b87 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -185,6 +185,7 @@ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ EM(rxrpc_skb_put_oob, "PUT oob ") \ + EM(rxrpc_skb_put_old_response, "PUT old-resp ") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \ EM(rxrpc_skb_put_response, "PUT response ") \ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 98ad9b51ca2c..c50cbfc5a313 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -557,11 +557,11 @@ void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb) spin_lock_irq(&local->lock); old = conn->tx_response; if (old) { - struct rxrpc_skb_priv *osp = rxrpc_skb(skb); + struct rxrpc_skb_priv *osp = rxrpc_skb(old); /* Always go with the response to the most recent challenge. */ if (after(sp->resp.challenge_serial, osp->resp.challenge_serial)) - conn->tx_response = old; + conn->tx_response = skb; else old = skb; } else { @@ -569,4 +569,5 @@ void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb) } spin_unlock_irq(&local->lock); rxrpc_poke_conn(conn, rxrpc_conn_get_poke_response); + rxrpc_free_skb(old, rxrpc_skb_put_old_response); } -- cgit v1.2.3 From 0cd3e3f3f2ec1a45aa559e2c0f3d57fac5eb3c25 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 8 Apr 2026 13:12:37 +0100 Subject: rxrpc: Fix to request an ack if window is limited Peers may only send immediate acks for every 2 UDP packets received. When sending a jumbogram, it is important to check that there is sufficient window space to send another same sized jumbogram following the current one, and request an ack if there isn't. Failure to do so may cause the call to stall waiting for an ack until the resend timer fires. Where jumbograms are in use this causes a very significant drop in performance. Fixes: fe24a5494390 ("rxrpc: Send jumbo DATA packets") Signed-off-by: Marc Dionne Signed-off-by: David Howells cc: Jeffrey Altman cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/20260408121252.2249051-10-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 2 +- net/rxrpc/output.c | 2 ++ net/rxrpc/proc.c | 5 +++-- 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index f7f559204b87..578b8038b211 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -521,6 +521,7 @@ #define rxrpc_req_ack_traces \ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ EM(rxrpc_reqack_app_stall, "APP-STALL ") \ + EM(rxrpc_reqack_jumbo_win, "JUMBO-WIN ") \ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 36d6ca0d1089..96ecb83c9071 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -117,7 +117,7 @@ struct rxrpc_net { atomic_t stat_tx_jumbo[10]; atomic_t stat_rx_jumbo[10]; - atomic_t stat_why_req_ack[8]; + atomic_t stat_why_req_ack[9]; atomic_t stat_io_loop; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index d70db367e358..870e59bf06af 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -479,6 +479,8 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_old_rtt; else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) why = rxrpc_reqack_app_stall; + else if (call->tx_winsize <= (2 * req->n) || call->cong_cwnd <= (2 * req->n)) + why = rxrpc_reqack_jumbo_win; else goto dont_set_request_ack; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 59292f7f9205..7755fca5beb8 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -518,11 +518,12 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]), atomic_read(&rxnet->stat_rx_acks[0])); seq_printf(seq, - "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u\n", + "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u jwin=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]), - atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall])); + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_jumbo_win])); seq_printf(seq, "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]), -- cgit v1.2.3 From e3b2cf6e5dba416a03152f299d99982dfe1e861d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 1 Apr 2026 12:15:58 +0200 Subject: kernfs: pass struct ns_common instead of const void * for namespace tags kernfs has historically used const void * to pass around namespace tags used for directory-level namespace filtering. The only current user of this is sysfs network namespace tagging where struct net pointers are cast to void *. Replace all const void * namespace parameters with const struct ns_common * throughout the kernfs, sysfs, and kobject namespace layers. This includes the kobj_ns_type_operations callbacks, kobject_namespace(), and all sysfs/kernfs APIs that accept or return namespace tags. Passing struct ns_common is needed because various codepaths require access to the underlying namespace. A struct ns_common can always be converted back to the concrete namespace type (e.g., struct net) via container_of() or to_ns_common() in the reverse direction. This is a preparatory change for switching to ns_id-based directory iteration to prevent a KASLR pointer leak through the current use of raw namespace pointers as hash seeds and comparison keys. Signed-off-by: Christian Brauner --- drivers/base/class.c | 4 +-- drivers/base/core.c | 7 +++--- drivers/infiniband/core/device.c | 5 ++-- drivers/infiniband/ulp/srp/ib_srp.c | 7 +++--- drivers/net/bonding/bond_sysfs.c | 4 +-- drivers/net/ipvlan/ipvtap.c | 5 ++-- drivers/net/macvtap.c | 5 ++-- fs/kernfs/dir.c | 30 ++++++++++++---------- fs/kernfs/file.c | 2 +- fs/kernfs/kernfs-internal.h | 2 +- fs/kernfs/mount.c | 2 +- fs/nfs/sysfs.c | 16 +++++++----- fs/sysfs/dir.c | 6 ++--- fs/sysfs/file.c | 8 +++--- fs/sysfs/mount.c | 10 +++++--- fs/sysfs/symlink.c | 7 +++--- fs/sysfs/sysfs.h | 4 +-- include/linux/device/class.h | 6 ++--- include/linux/kernfs.h | 40 +++++++++++++++++------------ include/linux/kobject.h | 4 +-- include/linux/kobject_ns.h | 13 +++++----- include/linux/netdevice.h | 4 +-- include/linux/sysfs.h | 24 +++++++++--------- include/net/net_namespace.h | 8 +++--- lib/kobject.c | 8 +++--- lib/kobject_uevent.c | 13 ++++++---- net/core/net-sysfs.c | 50 ++++++++++++++++++------------------- net/core/net_namespace.c | 8 +++--- net/sunrpc/sysfs.c | 17 +++++++------ net/wireless/sysfs.c | 4 +-- 30 files changed, 175 insertions(+), 148 deletions(-) (limited to 'include') diff --git a/drivers/base/class.c b/drivers/base/class.c index 827fc7adacc7..ffab0a9c8ccb 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -127,7 +127,7 @@ static const struct kobj_type class_ktype = { }; int class_create_file_ns(const struct class *cls, const struct class_attribute *attr, - const void *ns) + const struct ns_common *ns) { struct subsys_private *sp = class_to_subsys(cls); int error; @@ -143,7 +143,7 @@ int class_create_file_ns(const struct class *cls, const struct class_attribute * EXPORT_SYMBOL_GPL(class_create_file_ns); void class_remove_file_ns(const struct class *cls, const struct class_attribute *attr, - const void *ns) + const struct ns_common *ns) { struct subsys_private *sp = class_to_subsys(cls); diff --git a/drivers/base/core.c b/drivers/base/core.c index 09b98f02f559..0613de0fbe44 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2570,15 +2570,14 @@ static void device_release(struct kobject *kobj) kfree(p); } -static const void *device_namespace(const struct kobject *kobj) +static const struct ns_common *device_namespace(const struct kobject *kobj) { const struct device *dev = kobj_to_dev(kobj); - const void *ns = NULL; if (dev->class && dev->class->namespace) - ns = dev->class->namespace(dev); + return dev->class->namespace(dev); - return ns; + return NULL; } static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 558b73940d66..7945614be36d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -509,12 +509,13 @@ static int ib_device_uevent(const struct device *device, return 0; } -static const void *net_namespace(const struct device *d) +static const struct ns_common *net_namespace(const struct device *d) { const struct ib_core_device *coredev = container_of(d, struct ib_core_device, dev); + struct net *net = read_pnet(&coredev->rdma_net); - return read_pnet(&coredev->rdma_net); + return net ? to_ns_common(net) : NULL; } static struct class ib_class = { diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 30339dcabb4d..b58868e1cf11 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -1048,7 +1049,7 @@ static void srp_remove_target(struct srp_target_port *target) scsi_remove_host(target->scsi_host); srp_stop_rport_timers(target->rport); srp_disconnect_target(target); - kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); + kobj_ns_drop(KOBJ_NS_TYPE_NET, to_ns_common(target->net)); for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; srp_free_ch_ib(target, ch); @@ -3713,7 +3714,7 @@ static ssize_t add_target_store(struct device *dev, target = host_to_target(target_host); - target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + target->net = to_net_ns(kobj_ns_grab_current(KOBJ_NS_TYPE_NET)); target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; @@ -3905,7 +3906,7 @@ put: * earlier in this function. */ if (target->state != SRP_TARGET_REMOVED) - kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); + kobj_ns_drop(KOBJ_NS_TYPE_NET, to_ns_common(target->net)); scsi_host_put(target->scsi_host); } diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 9a75ad3181ab..eaba44c76a5e 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -808,7 +808,7 @@ int __net_init bond_create_sysfs(struct bond_net *bn) sysfs_attr_init(&bn->class_attr_bonding_masters.attr); ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters, - bn->net); + to_ns_common(bn->net)); /* Permit multiple loads of the module by ignoring failures to * create the bonding_masters sysfs file. Bonding devices * created by second or subsequent loads of the module will @@ -835,7 +835,7 @@ int __net_init bond_create_sysfs(struct bond_net *bn) /* Remove /sys/class/net/bonding_masters. */ void __net_exit bond_destroy_sysfs(struct bond_net *bn) { - netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net); + netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, to_ns_common(bn->net)); } /* Initialize sysfs for each bond. This sets up and registers diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index edd13916831a..2d6bbddd1edd 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -30,10 +30,11 @@ static dev_t ipvtap_major; static struct cdev ipvtap_cdev; -static const void *ipvtap_net_namespace(const struct device *d) +static const struct ns_common *ipvtap_net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d->parent); - return dev_net(dev); + + return to_ns_common(dev_net(dev)); } static struct class ipvtap_class = { diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b391a0f740a3..cc975dfb7380 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -35,10 +35,11 @@ struct macvtap_dev { */ static dev_t macvtap_major; -static const void *macvtap_net_namespace(const struct device *d) +static const struct ns_common *macvtap_net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d->parent); - return dev_net(dev); + + return to_ns_common(dev_net(dev)); } static struct class macvtap_class = { diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8d40c4b1db9f..be262145ae08 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -313,7 +313,8 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) * * Return: 31-bit hash of ns + name (so it fits in an off_t) */ -static unsigned int kernfs_name_hash(const char *name, const void *ns) +static unsigned int kernfs_name_hash(const char *name, + const struct ns_common *ns) { unsigned long hash = init_name_hash(ns); unsigned int len = strlen(name); @@ -330,7 +331,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) } static int kernfs_name_compare(unsigned int hash, const char *name, - const void *ns, const struct kernfs_node *kn) + const struct ns_common *ns, const struct kernfs_node *kn) { if (hash < kn->hash) return -1; @@ -856,7 +857,7 @@ out_unlock: */ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, const unsigned char *name, - const void *ns) + const struct ns_common *ns) { struct rb_node *node = parent->dir.children.rb_node; bool has_ns = kernfs_ns_enabled(parent); @@ -889,7 +890,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, const unsigned char *path, - const void *ns) + const struct ns_common *ns) { ssize_t len; char *p, *name; @@ -930,7 +931,8 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns) + const char *name, + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); @@ -956,7 +958,8 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); * Return: pointer to the found kernfs_node on success, %NULL on failure. */ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, - const char *path, const void *ns) + const char *path, + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root = kernfs_root(parent); @@ -1079,7 +1082,8 @@ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns) + void *priv, + const struct ns_common *ns) { struct kernfs_node *kn; int rc; @@ -1221,7 +1225,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, struct kernfs_node *kn; struct kernfs_root *root; struct inode *inode = NULL; - const void *ns = NULL; + const struct ns_common *ns = NULL; root = kernfs_root(parent); down_read(&root->kernfs_rwsem); @@ -1702,7 +1706,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) * Return: %0 on success, -ENOENT if such entry doesn't exist. */ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns) + const struct ns_common *ns) { struct kernfs_node *kn; struct kernfs_root *root; @@ -1741,7 +1745,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, * Return: %0 on success, -errno on failure. */ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) + const char *new_name, const struct ns_common *new_ns) { struct kernfs_node *old_parent; struct kernfs_root *root; @@ -1832,7 +1836,7 @@ static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) return 0; } -static struct kernfs_node *kernfs_dir_pos(const void *ns, +static struct kernfs_node *kernfs_dir_pos(const struct ns_common *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { if (pos) { @@ -1867,7 +1871,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, return pos; } -static struct kernfs_node *kernfs_dir_next_pos(const void *ns, +static struct kernfs_node *kernfs_dir_next_pos(const struct ns_common *ns, struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) { pos = kernfs_dir_pos(ns, parent, ino, pos); @@ -1889,7 +1893,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) struct kernfs_node *parent = kernfs_dentry_node(dentry); struct kernfs_node *pos = file->private_data; struct kernfs_root *root; - const void *ns = NULL; + const struct ns_common *ns = NULL; if (!dir_emit_dots(file, ctx)) return 0; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e32406d62c0d..1163aa769738 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -1045,7 +1045,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, + void *priv, const struct ns_common *ns, struct lock_class_key *key) { struct kernfs_node *kn; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 6061b6f70d2a..b1fd9622a5e3 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -97,7 +97,7 @@ struct kernfs_super_info { * instance. If multiple tags become necessary, make the following * an array and compare kernfs_node tag against every entry. */ - const void *ns; + const struct ns_common *ns; /* anchored at kernfs_root->supers, protected by kernfs_rwsem */ struct list_head node; diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 048f00b73b71..6e3217b6e481 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -345,7 +345,7 @@ static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) * * Return: the namespace tag associated with kernfs super_block @sb. */ -const void *kernfs_super_ns(struct super_block *sb) +const struct ns_common *kernfs_super_ns(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 7d8921f524a6..1da4f707f9ef 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -127,9 +128,10 @@ static void nfs_netns_client_release(struct kobject *kobj) kfree(rcu_dereference_raw(c->identifier)); } -static const void *nfs_netns_client_namespace(const struct kobject *kobj) +static const struct ns_common *nfs_netns_client_namespace(const struct kobject *kobj) { - return container_of(kobj, struct nfs_netns_client, kobject)->net; + return to_ns_common(container_of(kobj, struct nfs_netns_client, + kobject)->net); } static struct kobj_attribute nfs_netns_client_id = __ATTR(identifier, @@ -156,9 +158,10 @@ static void nfs_netns_object_release(struct kobject *kobj) kfree(c); } -static const void *nfs_netns_namespace(const struct kobject *kobj) +static const struct ns_common *nfs_netns_namespace(const struct kobject *kobj) { - return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net; + return to_ns_common(container_of(kobj, struct nfs_netns_client, + nfs_net_kobj)->net); } static struct kobj_type nfs_netns_object_type = { @@ -350,9 +353,10 @@ static void nfs_sysfs_sb_release(struct kobject *kobj) /* no-op: why? see lib/kobject.c kobject_cleanup() */ } -static const void *nfs_netns_server_namespace(const struct kobject *kobj) +static const struct ns_common *nfs_netns_server_namespace(const struct kobject *kobj) { - return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net; + return to_ns_common(container_of(kobj, struct nfs_server, + kobj)->nfs_client->cl_net); } static struct kobj_type nfs_sb_ktype = { diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 94e12efd92f2..ffdcd4153c58 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -37,7 +37,7 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) * @kobj: object we're creating directory for * @ns: the namespace tag to use */ -int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) +int sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns) { struct kernfs_node *parent, *kn; kuid_t uid; @@ -103,7 +103,7 @@ void sysfs_remove_dir(struct kobject *kobj) } int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, - const void *new_ns) + const struct ns_common *new_ns) { struct kernfs_node *parent; int ret; @@ -115,7 +115,7 @@ int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns) + const struct ns_common *new_ns) { struct kernfs_node *kn = kobj->sd; struct kernfs_node *new_parent; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index a8176c875f55..5709cede1d75 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -272,7 +272,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, umode_t mode, kuid_t uid, - kgid_t gid, const void *ns) + kgid_t gid, const struct ns_common *ns) { struct kobject *kobj = parent->priv; const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; @@ -322,7 +322,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, const struct bin_attribute *battr, umode_t mode, size_t size, - kuid_t uid, kgid_t gid, const void *ns) + kuid_t uid, kgid_t gid, const struct ns_common *ns) { const struct attribute *attr = &battr->attr; struct lock_class_key *key = NULL; @@ -362,7 +362,7 @@ int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, * @ns: namespace the new file should belong to */ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { kuid_t uid; kgid_t gid; @@ -505,7 +505,7 @@ EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection); * Hash the attribute name and namespace tag and kill the victim. */ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { struct kernfs_node *parent = kobj->sd; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index e65c60158a04..b199e8ff79b1 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -55,7 +55,7 @@ static const struct fs_context_operations sysfs_fs_context_ops = { static int sysfs_init_fs_context(struct fs_context *fc) { struct kernfs_fs_context *kfc; - struct net *netns; + struct ns_common *ns; if (!(fc->sb_flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) @@ -66,12 +66,14 @@ static int sysfs_init_fs_context(struct fs_context *fc) if (!kfc) return -ENOMEM; - kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); + kfc->ns_tag = ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); kfc->root = sysfs_root; kfc->magic = SYSFS_MAGIC; fc->fs_private = kfc; fc->ops = &sysfs_fs_context_ops; - if (netns) { + if (ns) { + struct net *netns = to_net_ns(ns); + put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(netns->user_ns); } @@ -81,7 +83,7 @@ static int sysfs_init_fs_context(struct fs_context *fc) static void sysfs_kill_sb(struct super_block *sb) { - void *ns = (void *)kernfs_super_ns(sb); + struct ns_common *ns = (struct ns_common *)kernfs_super_ns(sb); kernfs_kill_sb(sb); kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 5603530a1a52..5f9c05fb1394 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_link_nowarn); void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, const char *name) { - const void *ns = NULL; + const struct ns_common *ns = NULL; /* * We don't own @target and it may be removed at any time. @@ -164,10 +164,11 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link); * A helper function for the common rename symlink idiom. */ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, - const char *old, const char *new, const void *new_ns) + const char *old, const char *new, + const struct ns_common *new_ns) { struct kernfs_node *parent, *kn = NULL; - const void *old_ns = NULL; + const struct ns_common *old_ns = NULL; int result; if (!kobj) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 8e012f25e1c0..f4583dcafcd1 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -29,10 +29,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name); */ int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, umode_t amode, kuid_t uid, - kgid_t gid, const void *ns); + kgid_t gid, const struct ns_common *ns); int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, const struct bin_attribute *battr, umode_t mode, size_t size, - kuid_t uid, kgid_t gid, const void *ns); + kuid_t uid, kgid_t gid, const struct ns_common *ns); /* * symlink.c diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 65880e60c720..021da0d61796 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -62,7 +62,7 @@ struct class { int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; - const void *(*namespace)(const struct device *dev); + const struct ns_common *(*namespace)(const struct device *dev); void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid); @@ -180,9 +180,9 @@ struct class_attribute { struct class_attribute class_attr_##_name = __ATTR_WO(_name) int __must_check class_create_file_ns(const struct class *class, const struct class_attribute *attr, - const void *ns); + const struct ns_common *ns); void class_remove_file_ns(const struct class *class, const struct class_attribute *attr, - const void *ns); + const struct ns_common *ns); static inline int __must_check class_create_file(const struct class *class, const struct class_attribute *attr) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index b5a5f32fdfd1..4f0ab88a1b31 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -23,6 +23,7 @@ struct file; struct dentry; struct iattr; +struct ns_common; struct seq_file; struct vm_area_struct; struct vm_operations_struct; @@ -209,7 +210,7 @@ struct kernfs_node { struct rb_node rb; - const void *ns; /* namespace tag */ + const struct ns_common *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ unsigned short flags; umode_t mode; @@ -331,7 +332,7 @@ struct kernfs_ops { */ struct kernfs_fs_context { struct kernfs_root *root; /* Root of the hierarchy being mounted */ - void *ns_tag; /* Namespace tag of the mount (or NULL) */ + struct ns_common *ns_tag; /* Namespace tag of the mount (or NULL) */ unsigned long magic; /* File system specific magic number */ /* The following are set/used by kernfs_mount() */ @@ -406,9 +407,11 @@ void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, - const char *name, const void *ns); + const char *name, + const struct ns_common *ns); struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, - const char *path, const void *ns); + const char *path, + const struct ns_common *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); @@ -426,7 +429,8 @@ unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns); + void *priv, + const struct ns_common *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, @@ -434,7 +438,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, + void *priv, + const struct ns_common *ns, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -446,9 +451,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, - const void *ns); + const struct ns_common *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns); + const char *new_name, const struct ns_common *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt); @@ -459,7 +464,7 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name, int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags); -const void *kernfs_super_ns(struct super_block *sb); +const struct ns_common *kernfs_super_ns(struct super_block *sb); int kernfs_get_tree(struct fs_context *fc); void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); @@ -494,11 +499,11 @@ static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, - const void *ns) + const struct ns_common *ns) { return NULL; } static inline struct kernfs_node * kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, - const void *ns) + const struct ns_common *ns) { return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } @@ -526,14 +531,15 @@ static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, - void *priv, const void *ns) + void *priv, const struct ns_common *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, struct lock_class_key *key) + void *priv, const struct ns_common *ns, + struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * @@ -549,12 +555,14 @@ static inline bool kernfs_remove_self(struct kernfs_node *kn) { return false; } static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, - const char *name, const void *ns) + const char *name, + const struct ns_common *ns) { return -ENOSYS; } static inline int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, - const char *new_name, const void *new_ns) + const char *new_name, + const struct ns_common *new_ns) { return -ENOSYS; } static inline int kernfs_setattr(struct kernfs_node *kn, @@ -575,7 +583,7 @@ static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { return -ENOSYS; } -static inline const void *kernfs_super_ns(struct super_block *sb) +static inline const struct ns_common *kernfs_super_ns(struct super_block *sb) { return NULL; } static inline int kernfs_get_tree(struct fs_context *fc) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c8219505a79f..bcb5d4e32001 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -109,7 +109,7 @@ struct kobject *kobject_get(struct kobject *kobj); struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj); void kobject_put(struct kobject *kobj); -const void *kobject_namespace(const struct kobject *kobj); +const struct ns_common *kobject_namespace(const struct kobject *kobj); void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); char *kobject_get_path(const struct kobject *kobj, gfp_t flag); @@ -118,7 +118,7 @@ struct kobj_type { const struct sysfs_ops *sysfs_ops; const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj); - const void *(*namespace)(const struct kobject *kobj); + const struct ns_common *(*namespace)(const struct kobject *kobj); void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 150fe2ae1b6b..4f0990e09b93 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -16,6 +16,7 @@ #ifndef _LINUX_KOBJECT_NS_H #define _LINUX_KOBJECT_NS_H +struct ns_common; struct sock; struct kobject; @@ -39,10 +40,10 @@ enum kobj_ns_type { struct kobj_ns_type_operations { enum kobj_ns_type type; bool (*current_may_mount)(void); - void *(*grab_current_ns)(void); - const void *(*netlink_ns)(struct sock *sk); - const void *(*initial_ns)(void); - void (*drop_ns)(void *); + struct ns_common *(*grab_current_ns)(void); + const struct ns_common *(*netlink_ns)(struct sock *sk); + const struct ns_common *(*initial_ns)(void); + void (*drop_ns)(struct ns_common *); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); @@ -51,7 +52,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); -void *kobj_ns_grab_current(enum kobj_ns_type type); -void kobj_ns_drop(enum kobj_ns_type type, void *ns); +struct ns_common *kobj_ns_grab_current(enum kobj_ns_type type); +void kobj_ns_drop(enum kobj_ns_type type, struct ns_common *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ca01eb3f7d2..85c20bdd36fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5339,9 +5339,9 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi } int netdev_class_create_file_ns(const struct class_attribute *class_attr, - const void *ns); + const struct ns_common *ns); void netdev_class_remove_file_ns(const struct class_attribute *class_attr, - const void *ns); + const struct ns_common *ns); extern const struct kobj_ns_type_operations net_ns_type_operations; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99b775f3ff46..468259fb6049 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -396,13 +396,13 @@ struct sysfs_ops { #ifdef CONFIG_SYSFS -int __must_check sysfs_create_dir_ns(struct kobject *kobj, const void *ns); +int __must_check sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns); void sysfs_remove_dir(struct kobject *kobj); int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, - const void *new_ns); + const struct ns_common *new_ns); int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns); + const struct ns_common *new_ns); int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, const char *name); void sysfs_remove_mount_point(struct kobject *parent_kobj, @@ -410,7 +410,7 @@ void sysfs_remove_mount_point(struct kobject *parent_kobj, int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns); + const struct ns_common *ns); int __must_check sysfs_create_files(struct kobject *kobj, const struct attribute * const *attr); int __must_check sysfs_chmod_file(struct kobject *kobj, @@ -419,7 +419,7 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, const struct attribute *attr); void sysfs_unbreak_active_protection(struct kernfs_node *kn); void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns); + const struct ns_common *ns); bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr); void sysfs_remove_files(struct kobject *kobj, const struct attribute * const *attr); @@ -437,7 +437,7 @@ void sysfs_remove_link(struct kobject *kobj, const char *name); int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *target, const char *old_name, const char *new_name, - const void *new_ns); + const struct ns_common *new_ns); void sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); @@ -502,7 +502,7 @@ ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, #else /* CONFIG_SYSFS */ -static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) +static inline int sysfs_create_dir_ns(struct kobject *kobj, const struct ns_common *ns) { return 0; } @@ -512,14 +512,14 @@ static inline void sysfs_remove_dir(struct kobject *kobj) } static inline int sysfs_rename_dir_ns(struct kobject *kobj, - const char *new_name, const void *new_ns) + const char *new_name, const struct ns_common *new_ns) { return 0; } static inline int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns) + const struct ns_common *new_ns) { return 0; } @@ -537,7 +537,7 @@ static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { return 0; } @@ -567,7 +567,7 @@ static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn) static inline void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) + const struct ns_common *ns) { } @@ -612,7 +612,7 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name) static inline int sysfs_rename_link_ns(struct kobject *k, struct kobject *t, const char *old_name, - const char *new_name, const void *ns) + const char *new_name, const struct ns_common *ns) { return 0; } diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index d7bec49ee9ea..80de5e98a66d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -264,14 +264,14 @@ void ipx_unregister_sysctl(void); #define ipx_unregister_sysctl() #endif -#ifdef CONFIG_NET_NS -void __put_net(struct net *net); - static inline struct net *to_net_ns(struct ns_common *ns) { return container_of(ns, struct net, ns); } +#ifdef CONFIG_NET_NS +void __put_net(struct net *net); + /* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { @@ -309,7 +309,7 @@ static inline int check_net(const struct net *net) return ns_ref_read(net) != 0; } -void net_drop_ns(void *); +void net_drop_ns(struct ns_common *); void net_passive_dec(struct net *net); #else diff --git a/lib/kobject.c b/lib/kobject.c index cfdb2c3f20a2..9c9ff0f5175f 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -27,7 +27,7 @@ * and thus @kobj should have a namespace tag associated with it. Returns * %NULL otherwise. */ -const void *kobject_namespace(const struct kobject *kobj) +const struct ns_common *kobject_namespace(const struct kobject *kobj) { const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj); @@ -1083,9 +1083,9 @@ bool kobj_ns_current_may_mount(enum kobj_ns_type type) return may_mount; } -void *kobj_ns_grab_current(enum kobj_ns_type type) +struct ns_common *kobj_ns_grab_current(enum kobj_ns_type type) { - void *ns = NULL; + struct ns_common *ns = NULL; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) @@ -1096,7 +1096,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type) } EXPORT_SYMBOL_GPL(kobj_ns_grab_current); -void kobj_ns_drop(enum kobj_ns_type type, void *ns) +void kobj_ns_drop(enum kobj_ns_type type, struct ns_common *ns) { spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 871941c9830c..ddbc4d7482d2 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -238,7 +238,7 @@ static int kobj_usermode_filter(struct kobject *kobj) ops = kobj_ns_ops(kobj); if (ops) { - const void *init_ns, *ns; + const struct ns_common *init_ns, *ns; ns = kobj->ktype->namespace(kobj); init_ns = ops->initial_ns(); @@ -388,7 +388,7 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, #ifdef CONFIG_NET const struct kobj_ns_type_operations *ops; - const struct net *net = NULL; + const struct ns_common *ns = NULL; ops = kobj_ns_ops(kobj); if (!ops && kobj->kset) { @@ -404,14 +404,17 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, */ if (ops && ops->netlink_ns && kobj->ktype->namespace) if (ops->type == KOBJ_NS_TYPE_NET) - net = kobj->ktype->namespace(kobj); + ns = kobj->ktype->namespace(kobj); - if (!net) + if (!ns) ret = uevent_net_broadcast_untagged(env, action_string, devpath); - else + else { + const struct net *net = container_of(ns, struct net, ns); + ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env, action_string, devpath); + } #endif return ret; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07624b682b08..b9740a397f55 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1181,24 +1181,24 @@ static void rx_queue_release(struct kobject *kobj) netdev_put(queue->dev, &queue->dev_tracker); } -static const void *rx_queue_namespace(const struct kobject *kobj) +static const struct ns_common *rx_queue_namespace(const struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); struct device *dev = &queue->dev->dev; - const void *ns = NULL; if (dev->class && dev->class->namespace) - ns = dev->class->namespace(dev); + return dev->class->namespace(dev); - return ns; + return NULL; } static void rx_queue_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { - const struct net *net = rx_queue_namespace(kobj); + const struct ns_common *ns = rx_queue_namespace(kobj); - net_ns_get_ownership(net, uid, gid); + net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL, + uid, gid); } static const struct kobj_type rx_queue_ktype = { @@ -1931,24 +1931,24 @@ static void netdev_queue_release(struct kobject *kobj) netdev_put(queue->dev, &queue->dev_tracker); } -static const void *netdev_queue_namespace(const struct kobject *kobj) +static const struct ns_common *netdev_queue_namespace(const struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); struct device *dev = &queue->dev->dev; - const void *ns = NULL; if (dev->class && dev->class->namespace) - ns = dev->class->namespace(dev); + return dev->class->namespace(dev); - return ns; + return NULL; } static void netdev_queue_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { - const struct net *net = netdev_queue_namespace(kobj); + const struct ns_common *ns = netdev_queue_namespace(kobj); - net_ns_get_ownership(net, uid, gid); + net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL, + uid, gid); } static const struct kobj_type netdev_queue_ktype = { @@ -2185,24 +2185,24 @@ static bool net_current_may_mount(void) return ns_capable(net->user_ns, CAP_SYS_ADMIN); } -static void *net_grab_current_ns(void) +static struct ns_common *net_grab_current_ns(void) { - struct net *ns = current->nsproxy->net_ns; + struct net *net = current->nsproxy->net_ns; #ifdef CONFIG_NET_NS - if (ns) - refcount_inc(&ns->passive); + if (net) + refcount_inc(&net->passive); #endif - return ns; + return net ? to_ns_common(net) : NULL; } -static const void *net_initial_ns(void) +static const struct ns_common *net_initial_ns(void) { - return &init_net; + return to_ns_common(&init_net); } -static const void *net_netlink_ns(struct sock *sk) +static const struct ns_common *net_netlink_ns(struct sock *sk) { - return sock_net(sk); + return to_ns_common(sock_net(sk)); } const struct kobj_ns_type_operations net_ns_type_operations = { @@ -2252,11 +2252,11 @@ static void netdev_release(struct device *d) kvfree(dev); } -static const void *net_namespace(const struct device *d) +static const struct ns_common *net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d); - return dev_net(dev); + return to_ns_common(dev_net(dev)); } static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid) @@ -2402,14 +2402,14 @@ int netdev_change_owner(struct net_device *ndev, const struct net *net_old, } int netdev_class_create_file_ns(const struct class_attribute *class_attr, - const void *ns) + const struct ns_common *ns) { return class_create_file_ns(&net_class, class_attr, ns); } EXPORT_SYMBOL(netdev_class_create_file_ns); void netdev_class_remove_file_ns(const struct class_attribute *class_attr, - const void *ns) + const struct ns_common *ns) { class_remove_file_ns(&net_class, class_attr, ns); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1057d16d5dd2..24aa10a1d0ea 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -540,12 +540,10 @@ void net_passive_dec(struct net *net) } } -void net_drop_ns(void *p) +void net_drop_ns(struct ns_common *ns) { - struct net *net = (struct net *)p; - - if (net) - net_passive_dec(net); + if (ns) + net_passive_dec(to_net_ns(ns)); } struct net *copy_net_ns(u64 flags, diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index af8fac9cedd4..a90480f80154 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "sysfs.h" @@ -553,20 +554,22 @@ static void rpc_sysfs_xprt_release(struct kobject *kobj) kfree(xprt); } -static const void *rpc_sysfs_client_namespace(const struct kobject *kobj) +static const struct ns_common *rpc_sysfs_client_namespace(const struct kobject *kobj) { - return container_of(kobj, struct rpc_sysfs_client, kobject)->net; + return to_ns_common(container_of(kobj, struct rpc_sysfs_client, + kobject)->net); } -static const void *rpc_sysfs_xprt_switch_namespace(const struct kobject *kobj) +static const struct ns_common *rpc_sysfs_xprt_switch_namespace(const struct kobject *kobj) { - return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net; + return to_ns_common(container_of(kobj, struct rpc_sysfs_xprt_switch, + kobject)->net); } -static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj) +static const struct ns_common *rpc_sysfs_xprt_namespace(const struct kobject *kobj) { - return container_of(kobj, struct rpc_sysfs_xprt, - kobject)->xprt->xprt_net; + return to_ns_common(container_of(kobj, struct rpc_sysfs_xprt, + kobject)->xprt->xprt_net); } static struct kobj_attribute rpc_sysfs_clnt_version = __ATTR(rpc_version, diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 2e0ea69b9604..0b9abe70d39d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -154,11 +154,11 @@ static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume); #define WIPHY_PM_OPS NULL #endif -static const void *wiphy_namespace(const struct device *d) +static const struct ns_common *wiphy_namespace(const struct device *d) { struct wiphy *wiphy = container_of(d, struct wiphy, dev); - return wiphy_net(wiphy); + return to_ns_common(wiphy_net(wiphy)); } struct class ieee80211_class = { -- cgit v1.2.3 From d6e152d905bdb1f32f9d99775e2f453350399a6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Apr 2026 10:54:17 +0200 Subject: clockevents: Prevent timer interrupt starvation Calvin reported an odd NMI watchdog lockup which claims that the CPU locked up in user space. He provided a reproducer, which sets up a timerfd based timer and then rearms it in a loop with an absolute expiry time of 1ns. As the expiry time is in the past, the timer ends up as the first expiring timer in the per CPU hrtimer base and the clockevent device is programmed with the minimum delta value. If the machine is fast enough, this ends up in a endless loop of programming the delta value to the minimum value defined by the clock event device, before the timer interrupt can fire, which starves the interrupt and consequently triggers the lockup detector because the hrtimer callback of the lockup mechanism is never invoked. As a first step to prevent this, avoid reprogramming the clock event device when: - a forced minimum delta event is pending - the new expiry delta is less then or equal to the minimum delta Thanks to Calvin for providing the reproducer and to Borislav for testing and providing data from his Zen5 machine. The problem is not limited to Zen5, but depending on the underlying clock event device (e.g. TSC deadline timer on Intel) and the CPU speed not necessarily observable. This change serves only as the last resort and further changes will be made to prevent this scenario earlier in the call chain as far as possible. [ tglx: Updated to restore the old behaviour vs. !force and delta <= 0 and fixed up the tick-broadcast handlers as pointed out by Borislav ] Fixes: d316c57ff6bf ("[PATCH] clockevents: add core functionality") Reported-by: Calvin Owens Signed-off-by: Thomas Gleixner Tested-by: Calvin Owens Tested-by: Borislav Petkov Link: https://lore.kernel.org/lkml/acMe-QZUel-bBYUh@mozart.vkv.me/ Link: https://patch.msgid.link/20260407083247.562657657@kernel.org --- include/linux/clockchips.h | 2 ++ kernel/time/clockevents.c | 27 +++++++++++++++++++-------- kernel/time/hrtimer.c | 1 + kernel/time/tick-broadcast.c | 8 +++++++- kernel/time/tick-common.c | 1 + kernel/time/tick-sched.c | 1 + 6 files changed, 31 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index b0df28ddd394..50cdc9da8d32 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -80,6 +80,7 @@ enum clock_event_state { * @shift: nanoseconds to cycles divisor (power of two) * @state_use_accessors:current state of the device, assigned by the core code * @features: features + * @next_event_forced: True if the last programming was a forced event * @retries: number of forced programming retries * @set_state_periodic: switch state to periodic * @set_state_oneshot: switch state to oneshot @@ -108,6 +109,7 @@ struct clock_event_device { u32 shift; enum clock_event_state state_use_accessors; unsigned int features; + unsigned int next_event_forced; unsigned long retries; int (*set_state_periodic)(struct clock_event_device *); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index eaae1ce9f060..38570998a19b 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -172,6 +172,7 @@ void clockevents_shutdown(struct clock_event_device *dev) { clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; } /** @@ -305,7 +306,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, { unsigned long long clc; int64_t delta; - int rc; if (WARN_ON_ONCE(expires < 0)) return -ETIME; @@ -324,16 +324,27 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, return dev->set_next_ktime(expires, dev); delta = ktime_to_ns(ktime_sub(expires, ktime_get())); - if (delta <= 0) - return force ? clockevents_program_min_delta(dev) : -ETIME; - delta = min(delta, (int64_t) dev->max_delta_ns); - delta = max(delta, (int64_t) dev->min_delta_ns); + /* Required for tick_periodic() during early boot */ + if (delta <= 0 && !force) + return -ETIME; + + if (delta > (int64_t)dev->min_delta_ns) { + delta = min(delta, (int64_t) dev->max_delta_ns); + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + if (!dev->set_next_event((unsigned long) clc, dev)) + return 0; + } - clc = ((unsigned long long) delta * dev->mult) >> dev->shift; - rc = dev->set_next_event((unsigned long) clc, dev); + if (dev->next_event_forced) + return 0; - return (rc && force) ? clockevents_program_min_delta(dev) : rc; + if (dev->set_next_event(dev->min_delta_ticks, dev)) { + if (!force || clockevents_program_min_delta(dev)) + return -ETIME; + } + dev->next_event_forced = 1; + return 0; } /* diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 860af7a58428..1e37142fe52f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1888,6 +1888,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f63c65881364..7e57fa31ee26 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -76,8 +76,10 @@ const struct clock_event_device *tick_get_wakeup_device(int cpu) */ static void tick_broadcast_start_periodic(struct clock_event_device *bc) { - if (bc) + if (bc) { + bc->next_event_forced = 0; tick_setup_periodic(bc, 1); + } } /* @@ -403,6 +405,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + tick_broadcast_device.evtdev->next_event_forced = 0; /* Handle spurious interrupts gracefully */ if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { @@ -696,6 +699,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; + tick_broadcast_device.evtdev->next_event_forced = 0; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); @@ -1063,6 +1067,7 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bc->event_handler = tick_handle_oneshot_broadcast; + bc->next_event_forced = 0; bc->next_event = KTIME_MAX; /* @@ -1175,6 +1180,7 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) } /* This moves the broadcast assignment to this CPU: */ + bc->next_event_forced = 0; clockevents_program_event(bc, bc->next_event, 1); } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d305d8521896..6a9198a4279b 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -110,6 +110,7 @@ void tick_handle_periodic(struct clock_event_device *dev) int cpu = smp_processor_id(); ktime_t next = dev->next_event; + dev->next_event_forced = 0; tick_periodic(cpu); /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 36449f0010a4..d1f27df1e60e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1513,6 +1513,7 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev) struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); dev->next_event = KTIME_MAX; + dev->next_event_forced = 0; if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART)) tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); -- cgit v1.2.3 From 1f95fdef685ee76393981de062e6b26210d88a9c Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Tue, 14 Apr 2026 09:54:41 +0800 Subject: ASoC: tas2781: Add tas5832 support TAS5832 is in same family with TAS5827/28/30. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20260414015441.2439-2-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 1 + sound/soc/codecs/tas2781-i2c.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index e847cf51878c..95296bb4a33a 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -131,6 +131,7 @@ enum audio_device { TAS5827, TAS5828, TAS5830, + TAS5832, TAS_OTHERS, }; diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 8af30f4d68da..a78a8f9b9833 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -119,6 +119,7 @@ static const struct i2c_device_id tasdevice_id[] = { { "tas5827", TAS5827 }, { "tas5828", TAS5828 }, { "tas5830", TAS5830 }, + { "tas5832", TAS5832 }, {} }; @@ -142,6 +143,7 @@ static const struct of_device_id tasdevice_of_match[] = { { .compatible = "ti,tas5827", .data = &tasdevice_id[TAS5827] }, { .compatible = "ti,tas5828", .data = &tasdevice_id[TAS5828] }, { .compatible = "ti,tas5830", .data = &tasdevice_id[TAS5830] }, + { .compatible = "ti,tas5832", .data = &tasdevice_id[TAS5832] }, {}, }; MODULE_DEVICE_TABLE(of, tasdevice_of_match); @@ -1744,6 +1746,7 @@ out: case TAS5827: case TAS5828: case TAS5830: + case TAS5832: /* If DSP FW fail, DSP kcontrol won't be created. */ tasdevice_dsp_remove(tas_priv); } @@ -1915,6 +1918,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec) case TAS5827: case TAS5828: case TAS5830: + case TAS5832: p = (struct snd_kcontrol_new *)tas5825_snd_controls; size = ARRAY_SIZE(tas5825_snd_controls); break; @@ -2101,6 +2105,7 @@ static const struct acpi_device_id tasdevice_acpi_match[] = { { "TXNW5827", (kernel_ulong_t)&tasdevice_id[TAS5827] }, { "TXNW5828", (kernel_ulong_t)&tasdevice_id[TAS5828] }, { "TXNW5830", (kernel_ulong_t)&tasdevice_id[TAS5830] }, + { "TXNW5832", (kernel_ulong_t)&tasdevice_id[TAS5832] }, {}, }; -- cgit v1.2.3