From db06ae7ce9fdd3076d81588e80a2a41c3ce82765 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Fri, 1 Mar 2024 11:46:10 +0800 Subject: scsi: ufs: core: Add config_scsi_dev vops comment Add config_scsi_dev vops comment. Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20240301034610.24928-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index cb2afcebbdf5..a35e12f8e68b 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -328,6 +328,7 @@ struct ufs_pwr_mode_info { * @op_runtime_config: called to config Operation and runtime regs Pointers * @get_outstanding_cqs: called to get outstanding completion queues * @config_esi: called to config Event Specific Interrupt + * @config_scsi_dev: called to configure SCSI device parameters */ struct ufs_hba_variant_ops { const char *name; -- cgit v1.2.3 From f6e922365faf4cd576bd1cf3e64b58c8a32e1856 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 18 Mar 2024 09:54:27 -0700 Subject: xsk: Don't assume metadata is always requested in TX completion `compl->tx_timestam != NULL` means that the user has explicitly requested the metadata via XDP_TX_METADATA+XDP_TX_METADATA_TIMESTAMP. Fixes: 48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support") Reported-by: Daniele Salvatore Albano Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Tested-by: Daniele Salvatore Albano Link: https://lore.kernel.org/bpf/20240318165427.1403313-1-sdf@google.com --- include/net/xdp_sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3cb4dc9bd70e..3d54de168a6d 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, { if (!compl) return; + if (!compl->tx_timestamp) + return; *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); } -- cgit v1.2.3 From 02545bc57512b7660625e454e60e3fb0d07f660d Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Mon, 27 Nov 2023 14:06:56 +0200 Subject: ALSA: hda: intel-nhlt: add intel_nhlt_ssp_device_type() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper function intel_nhlt_ssp_device_type() to detect the type of specific SSP port. The result is nhlt_device_type enum type which could be NHLT_DEVICE_BT or NHLT_DEVICE_I2S. Signed-off-by: Brent Lu Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Signed-off-by: Takashi Iwai Message-ID: <20231127120657.19764-2-peter.ujfalusi@linux.intel.com> --- include/sound/intel-nhlt.h | 10 ++++++++++ sound/hda/intel-nhlt.c | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 53470d6a28d6..24dbe16684ae 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -143,6 +143,9 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, u32 bus_id, u8 link_type, u8 vbps, u8 bps, u8 num_ch, u32 rate, u8 dir, u8 dev_type); +int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt, + u8 virtual_bus_id); + #else static inline struct nhlt_acpi_table *intel_nhlt_init(struct device *dev) @@ -184,6 +187,13 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, return NULL; } +static inline int intel_nhlt_ssp_device_type(struct device *dev, + struct nhlt_acpi_table *nhlt, + u8 virtual_bus_id) +{ + return -EINVAL; +} + #endif #endif diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 696a958d93e9..088cff799e0b 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -343,3 +343,29 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, return NULL; } EXPORT_SYMBOL(intel_nhlt_get_endpoint_blob); + +int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt, + u8 virtual_bus_id) +{ + struct nhlt_endpoint *epnt; + int i; + + if (!nhlt) + return -EINVAL; + + epnt = (struct nhlt_endpoint *)nhlt->desc; + for (i = 0; i < nhlt->endpoint_count; i++) { + /* for SSP link the virtual bus id is the SSP port number */ + if (epnt->linktype == NHLT_LINK_SSP && + epnt->virtual_bus_id == virtual_bus_id) { + dev_dbg(dev, "SSP%d: dev_type=%d\n", virtual_bus_id, + epnt->device_type); + return epnt->device_type; + } + + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); + } + + return -EINVAL; +} +EXPORT_SYMBOL(intel_nhlt_ssp_device_type); -- cgit v1.2.3 From 203a6763ab699da0568fd2b76303d03bb121abd4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 13 Mar 2024 16:32:27 -0700 Subject: Revert "crypto: pkcs7 - remove sha1 support" This reverts commit 16ab7cb5825fc3425c16ad2c6e53d827f382d7c6 because it broke iwd. iwd uses the KEYCTL_PKEY_* UAPIs via its dependency libell, and apparently it is relying on SHA-1 signature support. These UAPIs are fairly obscure, and their documentation does not mention which algorithms they support. iwd really should be using a properly supported userspace crypto library instead. Regardless, since something broke we have to revert the change. It may be possible that some parts of this commit can be reinstated without breaking iwd (e.g. probably the removal of MODULE_SIG_SHA1), but for now this just does a full revert to get things working again. Reported-by: Karel Balej Closes: https://lore.kernel.org/r/CZSHRUIJ4RKL.34T4EASV5DNJM@matfyz.cz Cc: Dimitri John Ledkov Signed-off-by: Eric Biggers Tested-by: Karel Balej Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/mscode_parser.c | 3 ++ crypto/asymmetric_keys/pkcs7_parser.c | 4 ++ crypto/asymmetric_keys/public_key.c | 3 +- crypto/asymmetric_keys/signature.c | 2 +- crypto/asymmetric_keys/x509_cert_parser.c | 8 ++++ crypto/testmgr.h | 80 +++++++++++++++++++++++++++++++ include/linux/oid_registry.h | 4 ++ kernel/module/Kconfig | 5 ++ 8 files changed, 107 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c index 05402ef8964e..8aecbe4637f3 100644 --- a/crypto/asymmetric_keys/mscode_parser.c +++ b/crypto/asymmetric_keys/mscode_parser.c @@ -75,6 +75,9 @@ int mscode_note_digest_algo(void *context, size_t hdrlen, oid = look_up_OID(value, vlen); switch (oid) { + case OID_sha1: + ctx->digest_algo = "sha1"; + break; case OID_sha256: ctx->digest_algo = "sha256"; break; diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index 5b08c50722d0..231ad7b3789d 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -227,6 +227,9 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { + case OID_sha1: + ctx->sinfo->sig->hash_algo = "sha1"; + break; case OID_sha256: ctx->sinfo->sig->hash_algo = "sha256"; break; @@ -278,6 +281,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, ctx->sinfo->sig->pkey_algo = "rsa"; ctx->sinfo->sig->encoding = "pkcs1"; break; + case OID_id_ecdsa_with_sha1: case OID_id_ecdsa_with_sha224: case OID_id_ecdsa_with_sha256: case OID_id_ecdsa_with_sha384: diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index e5f22691febd..e314fd57e6f8 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -115,7 +115,8 @@ software_key_determine_akcipher(const struct public_key *pkey, */ if (!hash_algo) return -EINVAL; - if (strcmp(hash_algo, "sha224") != 0 && + if (strcmp(hash_algo, "sha1") != 0 && + strcmp(hash_algo, "sha224") != 0 && strcmp(hash_algo, "sha256") != 0 && strcmp(hash_algo, "sha384") != 0 && strcmp(hash_algo, "sha512") != 0 && diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c index 398983be77e8..2deff81f8af5 100644 --- a/crypto/asymmetric_keys/signature.c +++ b/crypto/asymmetric_keys/signature.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(decrypt_blob); * Sign the specified data blob using the private key specified by params->key. * The signature is wrapped in an encoding if params->encoding is specified * (eg. "pkcs1"). If the encoding needs to know the digest type, this can be - * passed through params->hash_algo (eg. "sha512"). + * passed through params->hash_algo (eg. "sha1"). * * Returns the length of the data placed in the signature buffer or an error. */ diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 487204d39426..bb0bffa271b5 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -198,6 +198,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, default: return -ENOPKG; /* Unsupported combination */ + case OID_sha1WithRSAEncryption: + ctx->cert->sig->hash_algo = "sha1"; + goto rsa_pkcs1; + case OID_sha256WithRSAEncryption: ctx->cert->sig->hash_algo = "sha256"; goto rsa_pkcs1; @@ -214,6 +218,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, ctx->cert->sig->hash_algo = "sha224"; goto rsa_pkcs1; + case OID_id_ecdsa_with_sha1: + ctx->cert->sig->hash_algo = "sha1"; + goto ecdsa; + case OID_id_rsassa_pkcs1_v1_5_with_sha3_256: ctx->cert->sig->hash_algo = "sha3-256"; goto rsa_pkcs1; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 986f331a5fc2..12e1c892f366 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -653,6 +653,30 @@ static const struct akcipher_testvec rsa_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = { { .key = + "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1" + "\xd4\x09\x73\xcf\xea\xd0\x15\x07\x3d\xa5\x8a\x8a\x95\x43\xe4\x68" + "\xea\xc6\x25\xc1\xc1\x01\x25\x4c\x7e\xc3\x3c\xa6\x04\x0a\xe7\x08" + "\x98", + .key_len = 49, + .params = + "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48" + "\xce\x3d\x03\x01\x01", + .param_len = 21, + .m = + "\xcd\xb9\xd2\x1c\xb7\x6f\xcd\x44\xb3\xfd\x63\xea\xa3\x66\x7f\xae" + "\x63\x85\xe7\x82", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x35\x02\x19\x00\xba\xe5\x93\x83\x6e\xb6\x3b\x63\xa0\x27\x91" + "\xc6\xf6\x7f\xc3\x09\xad\x59\xad\x88\x27\xd6\x92\x6b\x02\x18\x10" + "\x68\x01\x9d\xba\xce\x83\x08\xef\x95\x52\x7b\xa0\x0f\xe4\x18\x86" + "\x80\x6f\xa5\x79\x77\xda\xd0", + .c_size = 55, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { + .key = "\x04\xb6\x4b\xb1\xd1\xac\xba\x24\x8f\x65\xb2\x60\x00\x90\xbf\xbd" "\x78\x05\x73\xe9\x79\x1d\x6f\x7c\x0b\xd2\xc3\x93\xa7\x28\xe1\x75" "\xf7\xd5\x95\x1d\x28\x10\xc0\x75\x50\x5c\x1a\x4f\x3f\x8f\xa5\xee" @@ -756,6 +780,32 @@ static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = { { .key = + "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41" + "\x22\xd6\x9a\xaa\x87\x17\xec\x4f\x63\x55\x2f\x94\xba\xdd\x83\xe9" + "\x34\x4b\xf3\xe9\x91\x13\x50\xb6\xcb\xca\x62\x08\xe7\x3b\x09\xdc" + "\xc3\x63\x4b\x2d\xb9\x73\x53\xe4\x45\xe6\x7c\xad\xe7\x6b\xb0\xe8" + "\xaf", + .key_len = 65, + .params = + "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48" + "\xce\x3d\x03\x01\x07", + .param_len = 21, + .m = + "\xc2\x2b\x5f\x91\x78\x34\x26\x09\x42\x8d\x6f\x51\xb2\xc5\xaf\x4c" + "\x0b\xde\x6a\x42", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x46\x02\x21\x00\xf9\x25\xce\x9f\x3a\xa6\x35\x81\xcf\xd4\xe7" + "\xb7\xf0\x82\x56\x41\xf7\xd4\xad\x8d\x94\x5a\x69\x89\xee\xca\x6a" + "\x52\x0e\x48\x4d\xcc\x02\x21\x00\xd7\xe4\xef\x52\x66\xd3\x5b\x9d" + "\x8a\xfa\x54\x93\x29\xa7\x70\x86\xf1\x03\x03\xf3\x3b\xe2\x73\xf7" + "\xfb\x9d\x8b\xde\xd4\x8d\x6f\xad", + .c_size = 72, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { + .key = "\x04\x8b\x6d\xc0\x33\x8e\x2d\x8b\x67\xf5\xeb\xc4\x7f\xa0\xf5\xd9" "\x7b\x03\xa5\x78\x9a\xb5\xea\x14\xe4\x23\xd0\xaf\xd7\x0e\x2e\xa0" "\xc9\x8b\xdb\x95\xf8\xb3\xaf\xac\x00\x2c\x2c\x1f\x7a\xfd\x95\x88" @@ -866,6 +916,36 @@ static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = { static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = { { + .key = /* secp384r1(sha1) */ + "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1" + "\x5a\xa5\x24\xf1\x95\x06\x9e\x28\xfb\xc4\xb9\xbe\x5a\x0d\xd9\x9f" + "\xf3\xd1\x4d\x2d\x07\x99\xbd\xda\xa7\x66\xec\xbb\xea\xba\x79\x42" + "\xc9\x34\x89\x6a\xe7\x0b\xc3\xf2\xfe\x32\x30\xbe\xba\xf9\xdf\x7e" + "\x4b\x6a\x07\x8e\x26\x66\x3f\x1d\xec\xa2\x57\x91\x51\xdd\x17\x0e" + "\x0b\x25\xd6\x80\x5c\x3b\xe6\x1a\x98\x48\x91\x45\x7a\x73\xb0\xc3" + "\xf1", + .key_len = 97, + .params = + "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04" + "\x00\x22", + .param_len = 18, + .m = + "\x12\x55\x28\xf0\x77\xd5\xb6\x21\x71\x32\x48\xcd\x28\xa8\x25\x22" + "\x3a\x69\xc1\x93", + .m_size = 20, + .algo = OID_id_ecdsa_with_sha1, + .c = + "\x30\x66\x02\x31\x00\xf5\x0f\x24\x4c\x07\x93\x6f\x21\x57\x55\x07" + "\x20\x43\x30\xde\xa0\x8d\x26\x8e\xae\x63\x3f\xbc\x20\x3a\xc6\xf1" + "\x32\x3c\xce\x70\x2b\x78\xf1\x4c\x26\xe6\x5b\x86\xcf\xec\x7c\x7e" + "\xd0\x87\xd7\xd7\x6e\x02\x31\x00\xcd\xbb\x7e\x81\x5d\x8f\x63\xc0" + "\x5f\x63\xb1\xbe\x5e\x4c\x0e\xa1\xdf\x28\x8c\x1b\xfa\xf9\x95\x88" + "\x74\xa0\x0f\xbf\xaf\xc3\x36\x76\x4a\xa1\x59\xf1\x1c\xa4\x58\x26" + "\x79\x12\x2a\xb7\xc5\x15\x92\xc5", + .c_size = 104, + .public_key_vec = true, + .siggen_sigver_test = true, + }, { .key = /* secp384r1(sha224) */ "\x04\x69\x6c\xcf\x62\xee\xd0\x0d\xe5\xb5\x2f\x70\x54\xcf\x26\xa0" "\xd9\x98\x8d\x92\x2a\xab\x9b\x11\xcb\x48\x18\xa1\xa9\x0d\xd5\x18" diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 3921fbed0b28..51421fdbb0ba 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -17,10 +17,12 @@ * build_OID_registry.pl to generate the data for look_up_OID(). */ enum OID { + OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ + OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ @@ -28,6 +30,7 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ + OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */ @@ -64,6 +67,7 @@ enum OID { OID_PKU2U, /* 1.3.5.1.5.2.7 */ OID_Scram, /* 1.3.6.1.5.5.14 */ OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ + OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 0ea1b2970a23..28db5b7589eb 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -236,6 +236,10 @@ choice possible to load a signed module containing the algorithm to check the signature on that module. +config MODULE_SIG_SHA1 + bool "Sign modules with SHA-1" + select CRYPTO_SHA1 + config MODULE_SIG_SHA256 bool "Sign modules with SHA-256" select CRYPTO_SHA256 @@ -265,6 +269,7 @@ endchoice config MODULE_SIG_HASH string depends on MODULE_SIG || IMA_APPRAISE_MODSIG + default "sha1" if MODULE_SIG_SHA1 default "sha256" if MODULE_SIG_SHA256 default "sha384" if MODULE_SIG_SHA384 default "sha512" if MODULE_SIG_SHA512 -- cgit v1.2.3 From be23b2d7c3b7c8bf57b1cf0bf890bd65df9d0186 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Mar 2024 11:09:51 +0100 Subject: wifi: cfg80211: add a flag to disable wireless extensions Wireless extensions are already disabled if MLO is enabled, given that we cannot support MLO there with all the hard- coded assumptions about BSSID etc. However, the WiFi7 ecosystem is still stabilizing, and some devices may need MLO disabled while that happens. In that case, we might end up with a device that supports wext (but not MLO) in one kernel, and then breaks wext in the future (by enabling MLO), which is not desirable. Add a flag to let such drivers/devices disable wext even if MLO isn't yet enabled. Cc: stable@vger.kernel.org Link: https://msgid.link/20240314110951.b50f1dc4ec21.I656ddd8178eedb49dc5c6c0e70f8ce5807afb54f@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/wext-core.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2e2be4fd2bb6..1e09329acc42 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4991,6 +4991,7 @@ struct cfg80211_ops { * set this flag to update channels on beacon hints. * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link * of an NSTR mobile AP MLD. + * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -5002,6 +5003,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), + WIPHY_FLAG_DISABLE_WEXT = BIT(9), WIPHY_FLAG_MESH_AUTH = BIT(10), WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12), diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index a161c64d1765..838ad6541a17 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -4,6 +4,7 @@ * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg + * Copyright (C) 2024 Intel Corporation * * (As all part of the Linux kernel, this file is GPL) */ @@ -662,7 +663,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev) dev->ieee80211_ptr->wiphy->wext && dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) { wireless_warn_cfg80211_wext(); - if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) + if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO | + WIPHY_FLAG_DISABLE_WEXT)) return NULL; return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev); } @@ -704,7 +706,8 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd) #ifdef CONFIG_CFG80211_WEXT if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) { wireless_warn_cfg80211_wext(); - if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) + if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO | + WIPHY_FLAG_DISABLE_WEXT)) return NULL; handlers = dev->ieee80211_ptr->wiphy->wext; } -- cgit v1.2.3 From f9eeb6bb13fb5d7af1ea5b74a10b1f8ead962540 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 21 Mar 2024 15:08:13 +0200 Subject: ALSA: hda: Add pplcllpl/u members to hdac_ext_stream The pplcllpl/u can be used to save the Link Connection Linear Link Position register value to be used for compensation of the LLP register value in case the counter is not reset (after pause/resume or stop/start without closing the stream). The LLP can be used along with PPHCLDP to calculate delay caused by the DSP processing for HDA links. Cc: stable@vger.kernel.org # 6.8 Signed-off-by: Peter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240321130814.4412-17-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/sound/hdaudio_ext.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index a8bebac1e4b2..957295364a5e 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -56,6 +56,9 @@ struct hdac_ext_stream { u32 pphcldpl; u32 pphcldpu; + u32 pplcllpl; + u32 pplcllpu; + bool decoupled:1; bool link_locked:1; bool link_prepared; -- cgit v1.2.3 From 0c76106cb97548810214def8ee22700bbbb90543 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 19 Mar 2024 16:12:09 +0900 Subject: scsi: sd: Fix TCG OPAL unlock on system resume Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") introduced the manage_system_start_stop scsi_device flag to allow libata to indicate to the SCSI disk driver that nothing should be done when resuming a disk on system resume. This change turned the execution of sd_resume() into a no-op for ATA devices on system resume. While this solved deadlock issues during device resume, this change also wrongly removed the execution of opal_unlock_from_suspend(). As a result, devices with TCG OPAL locking enabled remain locked and inaccessible after a system resume from sleep. To fix this issue, introduce the SCSI driver resume method and implement it with the sd_resume() function calling opal_unlock_from_suspend(). The former sd_resume() function is renamed to sd_resume_common() and modified to call the new sd_resume() function. For non-ATA devices, this result in no functional changes. In order for libata to explicitly execute sd_resume() when a device is resumed during system restart, the function scsi_resume_device() is introduced. libata calls this function from the revalidation work executed on devie resume, a state that is indicated with the new device flag ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked on resume, allowing normal operation. Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538 Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-eh.c | 5 ++++- drivers/ata/libata-scsi.c | 9 +++++++++ drivers/scsi/scsi_scan.c | 34 ++++++++++++++++++++++++++++++++++ drivers/scsi/sd.c | 23 +++++++++++++++++++---- include/linux/libata.h | 1 + include/scsi/scsi_driver.h | 1 + include/scsi/scsi_host.h | 1 + 7 files changed, 69 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b0d6e69c4a5b..214b935c2ced 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0a0f483124c3..2f4c58837641 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8d06475de17a..ffd7e7e72933 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ccff8f2e2e75..3cf898670290 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4108,7 +4108,21 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; @@ -4124,7 +4138,7 @@ static int sd_resume(struct device *dev, bool runtime) sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -4143,7 +4157,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -4170,7 +4184,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { @@ -4193,6 +4207,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, diff --git a/include/linux/libata.h b/include/linux/libata.h index 26d68115afb8..324d792e7c78 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba0..f40915d2ecee 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b259d42a1e1a..129001f600fc 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht); #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); -- cgit v1.2.3 From 429846b4b6ce9853e0d803a2357bb2e55083adf0 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 23 Mar 2024 17:41:55 +0900 Subject: scsi: mpi3mr: Avoid memcpy field-spanning write WARNING When the "storcli2 show" command is executed for eHBA-9600, mpi3mr driver prints this WARNING message: memcpy: detected field-spanning write (size 128) of single field "bsg_reply_buf->reply_buf" at drivers/scsi/mpi3mr/mpi3mr_app.c:1658 (size 1) WARNING: CPU: 0 PID: 12760 at drivers/scsi/mpi3mr/mpi3mr_app.c:1658 mpi3mr_bsg_request+0x6b12/0x7f10 [mpi3mr] The cause of the WARN is 128 bytes memcpy to the 1 byte size array "__u8 replay_buf[1]" in the struct mpi3mr_bsg_in_reply_buf. The array is intended to be a flexible length array, so the WARN is a false positive. To suppress the WARN, remove the constant number '1' from the array declaration and clarify that it has flexible length. Also, adjust the memory allocation size to match the change. Suggested-by: Sathya Prakash Veerichetty Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240323084155.166835-1-shinichiro.kawasaki@wdc.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_app.c | 2 +- include/uapi/scsi/scsi_bsg_mpi3mr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 0380996b5ad2..55d590b91947 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -1644,7 +1644,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) if ((mpirep_offset != 0xFF) && drv_bufs[mpirep_offset].bsg_buf_len) { drv_buf_iter = &drv_bufs[mpirep_offset]; - drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 + + drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) + mrioc->reply_sz); bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL); diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h index c72ce387286a..30a5c1a59376 100644 --- a/include/uapi/scsi/scsi_bsg_mpi3mr.h +++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h @@ -382,7 +382,7 @@ struct mpi3mr_bsg_in_reply_buf { __u8 mpi_reply_type; __u8 rsvd1; __u16 rsvd2; - __u8 reply_buf[1]; + __u8 reply_buf[]; }; /** -- cgit v1.2.3 From c2ddeb29612f7ca84ed10c6d4f3ac99705135447 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 13:58:08 +0100 Subject: genirq: Introduce IRQF_COND_ONESHOT and use it in pinctrl-amd There is a problem when a driver requests a shared interrupt line to run a threaded handler on it without IRQF_ONESHOT set if that flag has been set already for the IRQ in question by somebody else. Namely, the request fails which usually leads to a probe failure even though the driver might have worked just fine with IRQF_ONESHOT, but it does not want to use it by default. Currently, the only way to handle this is to try to request the IRQ without IRQF_ONESHOT, but with IRQF_PROBE_SHARED set and if this fails, try again with IRQF_ONESHOT set. However, this is a bit cumbersome and not very clean. When commit 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI") switched the ACPI subsystem over to using a threaded interrupt handler for the SCI, it had to use IRQF_ONESHOT for it because that's required due to the way the SCI handler works (it needs to walk all of the enabled GPEs before the interrupt line can be unmasked). The SCI interrupt line is not shared with other users very often due to the SCI handling overhead, but on sone systems it is shared and when the other user of it attempts to install a threaded handler, a flags mismatch related to IRQF_ONESHOT may occur. As it turned out, that happened to the pinctrl-amd driver and so commit 4451e8e8415e ("pinctrl: amd: Add IRQF_ONESHOT to the interrupt request") attempted to address the issue by adding IRQF_ONESHOT to the interrupt flags in that driver, but this is now causing an IRQF_ONESHOT-related mismatch to occur on another system which cannot boot as a result of it. Clearly, pinctrl-amd can work with IRQF_ONESHOT if need be, but it should not set that flag by default, so it needs a way to indicate that to the interrupt subsystem. To that end, introdcuce a new interrupt flag, IRQF_COND_ONESHOT, which will only have effect when the IRQ line is shared and IRQF_ONESHOT has been set for it already, in which case it will be promoted to the latter. This is sufficient for drivers sharing the interrupt line with the SCI as it is requested by the ACPI subsystem before any drivers are probed, so they will always see IRQF_ONESHOT set for the interrupt in question. Fixes: 4451e8e8415e ("pinctrl: amd: Add IRQF_ONESHOT to the interrupt request") Reported-by: Francisco Ayala Le Brun Signed-off-by: Rafael J. Wysocki Signed-off-by: Thomas Gleixner Reviewed-by: Linus Walleij Cc: 6.8+ # 6.8+ Closes: https://lore.kernel.org/lkml/CAN-StX1HqWqi+YW=t+V52-38Mfp5fAz7YHx4aH-CQjgyNiKx3g@mail.gmail.com/ Link: https://lore.kernel.org/r/12417336.O9o76ZdvQC@kreacher --- drivers/pinctrl/pinctrl-amd.c | 2 +- include/linux/interrupt.h | 3 +++ kernel/irq/manage.c | 9 +++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 49f89b70dcec..7f66ec73199a 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev) } ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler, - IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev); + IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev); if (ret) goto out2; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76121c2bb4f8..5c9bdd3ffccc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -67,6 +67,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +84,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad3eaf2ab959..bf9ae8a8686f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ -- cgit v1.2.3 From 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2024 13:57:32 +0000 Subject: tcp: properly terminate timers for kernel sockets We had various syzbot reports about tcp timers firing after the corresponding netns has been dismantled. Fortunately Josef Bacik could trigger the issue more often, and could test a patch I wrote two years ago. When TCP sockets are closed, we call inet_csk_clear_xmit_timers() to 'stop' the timers. inet_csk_clear_xmit_timers() can be called from any context, including when socket lock is held. This is the reason it uses sk_stop_timer(), aka del_timer(). This means that ongoing timers might finish much later. For user sockets, this is fine because each running timer holds a reference on the socket, and the user socket holds a reference on the netns. For kernel sockets, we risk that the netns is freed before timer can complete, because kernel sockets do not hold reference on the netns. This patch adds inet_csk_clear_xmit_timers_sync() function that using sk_stop_timer_sync() to make sure all timers are terminated before the kernel socket is released. Modules using kernel sockets close them in their netns exit() handler. Also add sock_not_owned_by_me() helper to get LOCKDEP support : inet_csk_clear_xmit_timers_sync() must not be called while socket lock is held. It is very possible we can revert in the future commit 3a58f13a881e ("net: rds: acquire refcount on TCP sockets") which attempted to solve the issue in rds only. (net/smc/af_smc.c and net/mptcp/subflow.c have similar code) We probably can remove the check_net() tests from tcp_out_of_resources() and __tcp_close() in the future. Reported-by: Josef Bacik Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/ Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket") Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgsatjw@mail.gmail.com/ Signed-off-by: Eric Dumazet Tested-by: Josef Bacik Cc: Tetsuo Handa Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 1 + include/net/sock.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ net/ipv4/tcp.c | 2 ++ 4 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9ab4bf704e86..ccf171f7eb60 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -175,6 +175,7 @@ void inet_csk_init_xmit_timers(struct sock *sk, void (*delack_handler)(struct timer_list *), void (*keepalive_handler)(struct timer_list *)); void inet_csk_clear_xmit_timers(struct sock *sk); +void inet_csk_clear_xmit_timers_sync(struct sock *sk); static inline void inet_csk_schedule_ack(struct sock *sk) { diff --git a/include/net/sock.h b/include/net/sock.h index b5e00702acc1..f57bfd8a2ad2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1759,6 +1759,13 @@ static inline void sock_owned_by_me(const struct sock *sk) #endif } +static inline void sock_not_owned_by_me(const struct sock *sk) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks); +#endif +} + static inline bool sock_owned_by_user(const struct sock *sk) { sock_owned_by_me(sk); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7d8090f109ef..c038e28e2f1e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -771,6 +771,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk) } EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +void inet_csk_clear_xmit_timers_sync(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* ongoing timer handlers need to acquire socket lock. */ + sock_not_owned_by_me(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; + + sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer_sync(sk, &icsk->icsk_delack_timer); + sk_stop_timer_sync(sk, &sk->sk_timer); +} + void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d20b62d52171..e767721b3a58 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout) lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); + if (!sk->sk_net_refcnt) + inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } EXPORT_SYMBOL(tcp_close); -- cgit v1.2.3 From e89c928bedd77d181edc2df01cb6672184775140 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 5 Mar 2024 18:48:39 +0000 Subject: KVM: arm64: Fix host-programmed guest events in nVHE Programming PMU events in the host that count during guest execution is a feature supported by perf, e.g. perf stat -e cpu_cycles:G ./lkvm run While this works for VHE, the guest/host event bitmaps are not carried through to the hypervisor in the nVHE configuration. Make kvm_pmu_update_vcpu_events() conditional on whether or not _hardware_ supports PMUv3 rather than if the vCPU as vPMU enabled. Cc: stable@vger.kernel.org Fixes: 84d751a019a9 ("KVM: arm64: Pass pmu events to hyp via vcpu") Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240305184840.636212-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index eb4c369a79eb..35d4ca4f6122 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void); */ #define kvm_pmu_update_vcpu_events(vcpu) \ do { \ - if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \ + if (!has_vhe() && kvm_arm_support_pmu_v3()) \ vcpu->arch.pmu.events = *kvm_get_pmu_events(); \ } while (0) -- cgit v1.2.3 From 52464f59a361a3ba49d6eabc4f65d5c0b9d1de39 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Mar 2024 17:00:57 +0000 Subject: gpiolib: Add stubs for GPIO lookup functions The gpio_device_find_by_() functions do not have stubs which means that if they are referenced from code with an optiona dependency on gpiolib then the code will fail to link. Add stubs for lookups via fwnode and label. I have not added a stub for plain gpio_device_find() since it seems harder to see a use case for that which does not depend on gpiolib. With the addition of the GPIO reset controller (which lacks a gpiolib dependency) to the arm64 defconfig this is causing build breaks for arm64 virtconfig in -next: aarch64-linux-gnu-ld: drivers/reset/core.o: in function `__reset_add_reset_gpio_lookup': /build/stage/linux/drivers/reset/core.c:861:(.text+0xccc): undefined reference to `gpio_device_find_by_fwnode' Signed-off-by: Mark Brown Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index dc75f802e284..f8617eaf08ba 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -646,8 +646,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_device *gpio_device_find(const void *data, int (*match)(struct gpio_chip *gc, const void *data)); -struct gpio_device *gpio_device_find_by_label(const char *label); -struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); @@ -814,6 +812,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + #else /* CONFIG_GPIOLIB */ #include @@ -843,6 +844,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev) return NULL; } +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit v1.2.3 From 9cecde80aae0fb0aa44425575d5aca71bc646d89 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Mar 2024 14:08:21 +0000 Subject: mm: increase folio batch size On a 104 thread, 2 socket Skylake system, Intel report a 4.7% performance reduction with will-it-scale page_fault2. This was due to reducing the size of the batch from 32 to 15. Increasing the folio batch size from 15 to 31 gives a performance increase of 12.5% relative to the original, or 17.2% relative to the reduced performance commit. The penalty of this commit is an additional 128 bytes of stack usage. Six folio_batches are also allocated from percpu memory in cpu_fbatches so that will be an additional 768 bytes of percpu memory (per CPU). Tim Chen originally submitted a patch like this in 2020: https://lore.kernel.org/linux-mm/d1cc9f12a8ad6c2a52cb600d93b06b064f2bbc57.1593205965.git.tim.c.chen@linux.intel.com/ Link: https://lkml.kernel.org/r/20240315140823.2478146-1-willy@infradead.org Fixes: 99fbb6bfc16f ("mm: make folios_put() the basis of release_pages()") Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Yujie Liu Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202403151058.7048f6a8-oliver.sang@intel.com Signed-off-by: Andrew Morton --- include/linux/pagevec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index fcc06c300a72..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; -- cgit v1.2.3 From d5aad4c2ca057e760a92a9a7d65bd38d72963f27 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Mon, 26 Feb 2024 17:35:41 -0800 Subject: prctl: generalize PR_SET_MDWE support check to be per-arch Patch series "ARM: prctl: Reject PR_SET_MDWE where not supported". I noticed after a recent kernel update that my ARM926 system started segfaulting on any execve() after calling prctl(PR_SET_MDWE). After some investigation it appears that ARMv5 is incapable of providing the appropriate protections for MDWE, since any readable memory is also implicitly executable. The prctl_set_mdwe() function already had some special-case logic added disabling it on PARISC (commit 793838138c15, "prctl: Disable prctl(PR_SET_MDWE) on parisc"); this patch series (1) generalizes that check to use an arch_*() function, and (2) adds a corresponding override for ARM to disable MDWE on pre-ARMv6 CPUs. With the series applied, prctl(PR_SET_MDWE) is rejected on ARMv5 and subsequent execve() calls (as well as mmap(PROT_READ|PROT_WRITE)) can succeed instead of unconditionally failing; on ARMv6 the prctl works as it did previously. [0] https://lore.kernel.org/all/2023112456-linked-nape-bf19@gregkh/ This patch (of 2): There exist systems other than PARISC where MDWE may not be feasible to support; rather than cluttering up the generic code with additional arch-specific logic let's add a generic function for checking MDWE support and allow each arch to override it as needed. Link: https://lkml.kernel.org/r/20240227013546.15769-4-zev@bewilderbeest.net Link: https://lkml.kernel.org/r/20240227013546.15769-5-zev@bewilderbeest.net Signed-off-by: Zev Weiss Acked-by: Helge Deller [parisc] Cc: Borislav Petkov Cc: David Hildenbrand Cc: Florent Revest Cc: "James E.J. Bottomley" Cc: Josh Triplett Cc: Kees Cook Cc: Miguel Ojeda Cc: Mike Rapoport (IBM) Cc: Oleg Nesterov Cc: Ondrej Mosnacek Cc: Rick Edgecombe Cc: Russell King (Oracle) Cc: Sam James Cc: Stefan Roesch Cc: Yang Shi Cc: Yin Fengwei Cc: [6.3+] Signed-off-by: Andrew Morton --- arch/parisc/include/asm/mman.h | 14 ++++++++++++++ include/linux/mman.h | 8 ++++++++ kernel/sys.c | 7 +++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 arch/parisc/include/asm/mman.h (limited to 'include') diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 000000000000..47c5a1991d10 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include + +/* PARISC cannot allow mdwe as it needs writable stacks */ +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return false; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/include/linux/mman.h b/include/linux/mman.h index dc7048824be8..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags) unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/kernel/sys.c b/kernel/sys.c index f8e543f1e38a..8bb106a56b3a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; - /* PARISC cannot allow mdwe as it needs writable stacks */ - if (IS_ENABLED(CONFIG_PARISC)) + /* + * EOPNOTSUPP might be more appropriate here in principle, but + * existing userspace depends on EINVAL specifically. + */ + if (!arch_memory_deny_write_exec_supported()) return -EINVAL; current_bits = get_current_mdwe(); -- cgit v1.2.3 From 0462c56c290a99a7f03e817ae5b843116dfb575c Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 16:21:25 +0100 Subject: driver core: Introduce device_link_wait_removal() The commit 80dd33cf72d1 ("drivers: base: Fix device link removal") introduces a workqueue to release the consumer and supplier devices used in the devlink. In the job queued, devices are release and in turn, when all the references to these devices are dropped, the release function of the device itself is called. Nothing is present to provide some synchronisation with this workqueue in order to ensure that all ongoing releasing operations are done and so, some other operations can be started safely. For instance, in the following sequence: 1) of_platform_depopulate() 2) of_overlay_remove() During the step 1, devices are released and related devlinks are removed (jobs pushed in the workqueue). During the step 2, OF nodes are destroyed but, without any synchronisation with devlink removal jobs, of_overlay_remove() can raise warnings related to missing of_node_put(): ERROR: memory leak, expected refcount 1 instead of 2 Indeed, the missing of_node_put() call is going to be done, too late, from the workqueue job execution. Introduce device_link_wait_removal() to offer a way to synchronize operations waiting for the end of devlink removals (i.e. end of workqueue jobs). Also, as a flushing operation is done on the workqueue, the workqueue used is moved from a system-wide workqueue to a local one. Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Tested-by: Luca Ceresoli Reviewed-by: Nuno Sa Reviewed-by: Saravana Kannan Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240325152140.198219-2-herve.codina@bootlin.com Signed-off-by: Rob Herring --- drivers/base/core.c | 26 +++++++++++++++++++++++--- include/linux/device.h | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index b93f3c5716ae..5f4e03336e68 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; +static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. @@ -533,12 +534,26 @@ static void devlink_dev_release(struct device *dev) /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or - * supplier devices get deleted when it runs, so put it into the "long" - * workqueue. + * supplier devices get deleted when it runs, so put it into the + * dedicated workqueue. */ - queue_work(system_long_wq, &link->rm_work); + queue_work(device_link_wq, &link->rm_work); } +/** + * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate + */ +void device_link_wait_removal(void) +{ + /* + * devlink removal jobs are queued in the dedicated work queue. + * To be sure that all removal jobs are terminated, ensure that any + * scheduled work has run to completion. + */ + flush_workqueue(device_link_wq); +} +EXPORT_SYMBOL_GPL(device_link_wait_removal); + static struct class devlink_class = { .name = "devlink", .dev_groups = devlink_groups, @@ -4164,9 +4179,14 @@ int __init devices_init(void) sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; + device_link_wq = alloc_workqueue("device_link_wq", 0, 0); + if (!device_link_wq) + goto wq_err; return 0; + wq_err: + kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: diff --git a/include/linux/device.h b/include/linux/device.h index 97c4b046c09d..b9f5464f44ed 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1247,6 +1247,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ -- cgit v1.2.3 From ddd65e19c60140673ea9f7249af0a672f1820623 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 23 Mar 2024 17:11:19 +0100 Subject: block: handle BLK_OPEN_RESTRICT_WRITES correctly Last kernel release we introduce CONFIG_BLK_DEV_WRITE_MOUNTED. By default this option is set. When it is set the long-standing behavior of being able to write to mounted block devices is enabled. But in order to guard against unintended corruption by writing to the block device buffer cache CONFIG_BLK_DEV_WRITE_MOUNTED can be turned off. In that case it isn't possible to write to mounted block devices anymore. A filesystem may open its block devices with BLK_OPEN_RESTRICT_WRITES which disallows concurrent BLK_OPEN_WRITE access. When we still had the bdev handle around we could recognize BLK_OPEN_RESTRICT_WRITES because the mode was passed around. Since we managed to get rid of the bdev handle we changed that logic to recognize BLK_OPEN_RESTRICT_WRITES based on whether the file was opened writable and writes to that block device are blocked. That logic doesn't work because we do allow BLK_OPEN_RESTRICT_WRITES to be specified without BLK_OPEN_WRITE. Fix the detection logic and use an FMODE_* bit. We could've also abused O_EXCL as an indicator that BLK_OPEN_RESTRICT_WRITES has been requested. For userspace open paths O_EXCL will never be retained but for internal opens where we open files that are never installed into a file descriptor table this is fine. But it would be a gamble that this doesn't cause bugs. Note that BLK_OPEN_RESTRICT_WRITES is an internal only flag that cannot directly be raised by userspace. It is implicitly raised during mounting. Passes xftests and blktests with CONFIG_BLK_DEV_WRITE_MOUNTED set and unset. Link: https://lore.kernel.org/r/ZfyyEwu9Uq5Pgb94@casper.infradead.org Link: https://lore.kernel.org/r/20240323-zielbereich-mittragen-6fdf14876c3e@brauner Fixes: 321de651fa56 ("block: don't rely on BLK_OPEN_RESTRICT_WRITES when yielding write access") Reviewed-by: Yu Kuai Reviewed-by: Jan Kara Reported-by: Matthew Wilcox Signed-off-by: Christian Brauner --- block/bdev.c | 14 +++++++------- include/linux/fs.h | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/bdev.c b/block/bdev.c index 7a5f611c3d2e..ab9cae0e05f1 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -821,13 +821,11 @@ static void bdev_yield_write_access(struct file *bdev_file) return; bdev = file_bdev(bdev_file); - /* Yield exclusive or shared write access. */ - if (bdev_file->f_mode & FMODE_WRITE) { - if (bdev_writes_blocked(bdev)) - bdev_unblock_writes(bdev); - else - bdev->bd_writers--; - } + + if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED) + bdev_unblock_writes(bdev); + else if (bdev_file->f_mode & FMODE_WRITE) + bdev->bd_writers--; } /** @@ -907,6 +905,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; if (bdev_nowait(bdev)) bdev_file->f_mode |= FMODE_NOWAIT; + if (mode & BLK_OPEN_RESTRICT_WRITES) + bdev_file->f_mode |= FMODE_WRITE_RESTRICTED; bdev_file->f_mapping = bdev->bd_inode->i_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); bdev_file->private_data = holder; diff --git a/include/linux/fs.h b/include/linux/fs.h index 00fc429b0af0..8dfd53b52744 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -121,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) +/* File writes are restricted (block device specific) */ +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ -- cgit v1.2.3 From ea2c09283b44d1a3732a195a9b257d56779c8863 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 09:25:05 +0100 Subject: net: wan: framer: Add missing static inline qualifiers Compilation with CONFIG_GENERIC_FRAMER disabled lead to the following warnings: framer.h:184:16: warning: no previous prototype for function 'framer_get' [-Wmissing-prototypes] 184 | struct framer *framer_get(struct device *dev, const char *con_id) framer.h:184:1: note: declare 'static' if the function is not intended to be used outside of this translation unit 184 | struct framer *framer_get(struct device *dev, const char *con_id) framer.h:189:6: warning: no previous prototype for function 'framer_put' [-Wmissing-prototypes] 189 | void framer_put(struct device *dev, struct framer *framer) framer.h:189:1: note: declare 'static' if the function is not intended to be used outside of this translation unit 189 | void framer_put(struct device *dev, struct framer *framer) Add missing 'static inline' qualifiers for these functions. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403241110.hfJqeJRu-lkp@intel.com/ Fixes: 82c944d05b1a ("net: wan: Add framer framework support") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/framer/framer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h index 9a9b88962c29..2b85fe9e7f9a 100644 --- a/include/linux/framer/framer.h +++ b/include/linux/framer/framer.h @@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer, return -ENOSYS; } -struct framer *framer_get(struct device *dev, const char *con_id) +static inline struct framer *framer_get(struct device *dev, const char *con_id) { return ERR_PTR(-ENOSYS); } -void framer_put(struct device *dev, struct framer *framer) +static inline void framer_put(struct device *dev, struct framer *framer) { } -- cgit v1.2.3 From 22650a99821dda3d05f1c334ea90330b4982de56 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 26 Mar 2024 13:47:22 +0100 Subject: fs,block: yield devices early Currently a device is only really released once the umount returns to userspace due to how file closing works. That ultimately could cause an old umount assumption to be violated that concurrent umount and mount don't fail. So an exclusively held device with a temporary holder should be yielded before the filesystem is gone. Add a helper that allows callers to do that. This also allows us to remove the two holder ops that Linus wasn't excited about. Link: https://lore.kernel.org/r/20240326-vfs-bdev-end_holder-v1-1-20af85202918@kernel.org Fixes: f3a608827d1f ("bdev: open block device as files") # mainline only Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Suggested-by: Linus Torvalds Signed-off-by: Christian Brauner --- block/bdev.c | 64 +++++++++++++++++++++++++++++++++++------ drivers/mtd/devices/block2mtd.c | 2 +- fs/bcachefs/super-io.c | 2 +- fs/cramfs/inode.c | 2 +- fs/ext4/super.c | 8 +++--- fs/f2fs/super.c | 2 +- fs/jfs/jfs_logmgr.c | 4 +-- fs/reiserfs/journal.c | 2 +- fs/romfs/super.c | 2 +- fs/super.c | 24 ++-------------- fs/xfs/xfs_buf.c | 2 +- fs/xfs/xfs_super.c | 6 ++-- include/linux/blkdev.h | 11 +------ 13 files changed, 76 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/block/bdev.c b/block/bdev.c index a1946a902df3..b8e32d933a63 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder, mutex_unlock(&bdev->bd_holder_lock); bd_clear_claiming(whole, holder); mutex_unlock(&bdev_lock); - - if (hops && hops->get_holder) - hops->get_holder(holder); } /** @@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming); static void bd_end_claim(struct block_device *bdev, void *holder) { struct block_device *whole = bdev_whole(bdev); - const struct blk_holder_ops *hops = bdev->bd_holder_ops; bool unblock = false; /* @@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder) whole->bd_holder = NULL; mutex_unlock(&bdev_lock); - if (hops && hops->put_holder) - hops->put_holder(holder); - /* * If this was the last claim, remove holder link and unblock evpoll if * it was a write holder. @@ -813,6 +806,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode) bdev->bd_writers++; } +static inline bool bdev_unclaimed(const struct file *bdev_file) +{ + return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host); +} + static void bdev_yield_write_access(struct file *bdev_file) { struct block_device *bdev; @@ -820,6 +818,9 @@ static void bdev_yield_write_access(struct file *bdev_file) if (bdev_allow_write_mounted) return; + if (bdev_unclaimed(bdev_file)) + return; + bdev = file_bdev(bdev_file); if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED) @@ -1012,6 +1013,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_file_open_by_path); +static inline void bd_yield_claim(struct file *bdev_file) +{ + struct block_device *bdev = file_bdev(bdev_file); + void *holder = bdev_file->private_data; + + lockdep_assert_held(&bdev->bd_disk->open_mutex); + + if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder))) + return; + + if (!bdev_unclaimed(bdev_file)) + bd_end_claim(bdev, holder); +} + void bdev_release(struct file *bdev_file) { struct block_device *bdev = file_bdev(bdev_file); @@ -1036,7 +1051,7 @@ void bdev_release(struct file *bdev_file) bdev_yield_write_access(bdev_file); if (holder) - bd_end_claim(bdev, holder); + bd_yield_claim(bdev_file); /* * Trigger event checking and tell drivers to flush MEDIA_CHANGE @@ -1056,6 +1071,39 @@ put_no_open: blkdev_put_no_open(bdev); } +/** + * bdev_fput - yield claim to the block device and put the file + * @bdev_file: open block device + * + * Yield claim on the block device and put the file. Ensure that the + * block device can be reclaimed before the file is closed which is a + * deferred operation. + */ +void bdev_fput(struct file *bdev_file) +{ + if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops)) + return; + + if (bdev_file->private_data) { + struct block_device *bdev = file_bdev(bdev_file); + struct gendisk *disk = bdev->bd_disk; + + mutex_lock(&disk->open_mutex); + bdev_yield_write_access(bdev_file); + bd_yield_claim(bdev_file); + /* + * Tell release we already gave up our hold on the + * device and if write restrictions are available that + * we already gave up write access to the device. + */ + bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host); + mutex_unlock(&disk->open_mutex); + } + + fput(bdev_file); +} +EXPORT_SYMBOL(bdev_fput); + /** * lookup_bdev() - Look up a struct block_device by name. * @pathname: Name of the block device in the filesystem. diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index 97a00ec9a4d4..caacdc0a3819 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->bdev_file) { invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1); - fput(dev->bdev_file); + bdev_fput(dev->bdev_file); } kfree(dev); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index ad28e370b640..cb7b4de11a49 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb) { kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->s_bdev_file)) - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); kfree(sb->holder); kfree(sb->sb_name); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 39e75131fd5a..9901057a15ba 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb) sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } kfree(sbi); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfb8449c731f..044135796f2b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5668,7 +5668,7 @@ failed_mount: brelse(sbi->s_sbh); if (sbi->s_journal_bdev_file) { invalidate_bdev(file_bdev(sbi->s_journal_bdev_file)); - fput(sbi->s_journal_bdev_file); + bdev_fput(sbi->s_journal_bdev_file); } out_fail: invalidate_bdev(sb->s_bdev); @@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb, out_bh: brelse(bh); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - fput(bdev_file); + bdev_fput(bdev_file); return ERR_PTR(errno); } @@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb) kill_block_super(sb); if (bdev_file) - fput(bdev_file); + bdev_fput(bdev_file); } static struct file_system_type ext4_fs_type = { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6867f26f141..a4bc26dfdb1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { if (i > 0) - fput(FDEV(i).bdev_file); + bdev_fput(FDEV(i).bdev_file); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 73389c68e251..9609349e92e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - fput(bdev_file); + bdev_fput(bdev_file); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev_file = log->bdev_file; rc = lmLogShutdown(log); - fput(bdev_file); + bdev_fput(bdev_file); kfree(log); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 6474529c4253..e539ccd39e1e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb) static void release_journal_dev(struct reiserfs_journal *journal) { if (journal->j_bdev_file) { - fput(journal->j_bdev_file); + bdev_fput(journal->j_bdev_file); journal->j_bdev_file = NULL; } } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 2be227532f39..2cbb92462074 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb) #ifdef CONFIG_ROMFS_ON_BLOCK if (sb->s_bdev) { sync_blockdev(sb->s_bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } #endif } diff --git a/fs/super.c b/fs/super.c index 71d9779c42b1..69ce6c600968 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev) return error; } -static void fs_bdev_super_get(void *data) -{ - struct super_block *sb = data; - - spin_lock(&sb_lock); - sb->s_count++; - spin_unlock(&sb_lock); -} - -static void fs_bdev_super_put(void *data) -{ - struct super_block *sb = data; - - put_super(sb); -} - const struct blk_holder_ops fs_holder_ops = { .mark_dead = fs_bdev_mark_dead, .sync = fs_bdev_sync, .freeze = fs_bdev_freeze, .thaw = fs_bdev_thaw, - .get_holder = fs_bdev_super_get, - .put_holder = fs_bdev_super_put, }; EXPORT_SYMBOL_GPL(fs_holder_ops); @@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, * writable from userspace even for a read-only block device. */ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { - fput(bdev_file); + bdev_fput(bdev_file); return -EACCES; } @@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (fc) warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - fput(bdev_file); + bdev_fput(bdev_file); return -EBUSY; } spin_lock(&sb_lock); @@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb) generic_shutdown_super(sb); if (bdev) { sync_blockdev(bdev); - fput(sb->s_bdev_file); + bdev_fput(sb->s_bdev_file); } } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1a18c381127e..f0fa02264eda 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2030,7 +2030,7 @@ xfs_free_buftarg( fs_put_dax(btp->bt_daxdev, btp->bt_mount); /* the main block device is closed by kill_block_super */ if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) - fput(btp->bt_bdev_file); + bdev_fput(btp->bt_bdev_file); kfree(btp); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c21f10ab0f5d..bce020374c5e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -485,7 +485,7 @@ xfs_open_devices( mp->m_logdev_targp = mp->m_ddev_targp; /* Handle won't be used, drop it */ if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); } return 0; @@ -497,10 +497,10 @@ xfs_open_devices( xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: if (rtdev_file) - fput(rtdev_file); + bdev_fput(rtdev_file); out_close_logdev: if (logdev_file) - fput(logdev_file); + bdev_fput(logdev_file); return error; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c3e8f7cf96be..172c91879999 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1505,16 +1505,6 @@ struct blk_holder_ops { * Thaw the file system mounted on the block device. */ int (*thaw)(struct block_device *bdev); - - /* - * If needed, get a reference to the holder. - */ - void (*get_holder)(void *holder); - - /* - * Release the holder. - */ - void (*put_holder)(void *holder); }; /* @@ -1585,6 +1575,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) int bdev_freeze(struct block_device *bdev); int bdev_thaw(struct block_device *bdev); +void bdev_fput(struct file *bdev_file); struct io_comp_batch { struct request *req_list; -- cgit v1.2.3 From 0cac183b98d8a8c692c98e8dba37df15a9e9210d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 1 Mar 2024 12:25:17 -0500 Subject: drm/amdkfd: range check cp bad op exception interrupts Due to a CP interrupt bug, bad packet garbage exception codes are raised. Do a range check so that the debugger and runtime do not receive garbage codes. Update the user api to guard exception code type checking as well. Signed-off-by: Jonathan Kim Tested-by: Jesse Zhang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 3 ++- include/uapi/linux/kfd_ioctl.h | 17 ++++++++++++++--- 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 9a06c6fb6605..40a21be6c07c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 7e2859736a55..fe2ad0c0de95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 91dd5e045b51..c4c6a29052ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, sq_int_data, 24); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 9ce46edc62a5..2040a470ddb4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code { KFD_EC_MASK(EC_DEVICE_NEW)) #define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \ KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE)) +#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)) /* Checks for exception code types for KFD search */ +#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX) #define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) #define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) #define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ - (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET)) /* Runtime enable states */ -- cgit v1.2.3 From 0316e4b04e0156633df7474bae8e8b0791ce028f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 23 Mar 2024 18:06:15 +0900 Subject: export.h: remove include/asm-generic/export.h Commit 3a6dd5f614a1 ("riscv: remove unneeded #include ") removed the last use of include/asm-generic/export.h. This deprecated header can go away. Signed-off-by: Masahiro Yamada --- include/asm-generic/export.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 include/asm-generic/export.h (limited to 'include') diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h deleted file mode 100644 index 570cd4da7210..000000000000 --- a/include/asm-generic/export.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef __ASM_GENERIC_EXPORT_H -#define __ASM_GENERIC_EXPORT_H - -/* - * and are deprecated. - * Please include directly. - */ -#include - -#endif -- cgit v1.2.3 From 18685451fc4e546fc0e718580d32df3c0e5c8272 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Mar 2024 11:18:41 +0100 Subject: inet: inet_defrag: prevent sk release while still in use ip_local_out() and other functions can pass skb->sk as function argument. If the skb is a fragment and reassembly happens before such function call returns, the sk must not be released. This affects skb fragments reassembled via netfilter or similar modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. Eric Dumazet made an initial analysis of this bug. Quoting Eric: Calling ip_defrag() in output path is also implying skb_orphan(), which is buggy because output path relies on sk not disappearing. A relevant old patch about the issue was : 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") [..] net/ipv4/ip_output.c depends on skb->sk being set, and probably to an inet socket, not an arbitrary one. If we orphan the packet in ipvlan, then downstream things like FQ packet scheduler will not work properly. We need to change ip_defrag() to only use skb_orphan() when really needed, ie whenever frag_list is going to be used. Eric suggested to stash sk in fragment queue and made an initial patch. However there is a problem with this: If skb is refragmented again right after, ip_do_fragment() will copy head->sk to the new fragments, and sets up destructor to sock_wfree. IOW, we have no choice but to fix up sk_wmem accouting to reflect the fully reassembled skb, else wmem will underflow. This change moves the orphan down into the core, to last possible moment. As ip_defrag_offset is aliased with sk_buff->sk member, we must move the offset into the FRAG_CB, else skb->sk gets clobbered. This allows to delay the orphaning long enough to learn if the skb has to be queued or if the skb is completing the reasm queue. In the former case, things work as before, skb is orphaned. This is safe because skb gets queued/stolen and won't continue past reasm engine. In the latter case, we will steal the skb->sk reference, reattach it to the head skb, and fix up wmem accouting when inet_frag inflates truesize. Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") Diagnosed-by: Eric Dumazet Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 7 +--- net/ipv4/inet_fragment.c | 70 +++++++++++++++++++++++++++------ net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0c7c67b3a87b..9d24aec064e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -753,8 +753,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -875,10 +873,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7072fc0783ef..c88c9034d630 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a4941f53b523..fb947d1613fe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a51a44571c3..d0dcbaca1994 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { -- cgit v1.2.3 From 1a80dbcb2dbaf6e4c216e62e30fa7d3daa8001ce Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 27 Mar 2024 22:24:26 -0700 Subject: bpf: support deferring bpf_link dealloc to after RCU grace period BPF link for some program types is passed as a "context" which can be used by those BPF programs to look up additional information. E.g., for multi-kprobes and multi-uprobes, link is used to fetch BPF cookie values. Because of this runtime dependency, when bpf_link refcnt drops to zero there could still be active BPF programs running accessing link data. This patch adds generic support to defer bpf_link dealloc callback to after RCU GP, if requested. This is done by exposing two different deallocation callbacks, one synchronous and one deferred. If deferred one is provided, bpf_link_free() will schedule dealloc_deferred() callback to happen after RCU GP. BPF is using two flavors of RCU: "classic" non-sleepable one and RCU tasks trace one. The latter is used when sleepable BPF programs are used. bpf_link_free() accommodates that by checking underlying BPF program's sleepable flag, and goes either through normal RCU GP only for non-sleepable, or through RCU tasks trace GP *and* then normal RCU GP (taking into account rcu_trace_implies_rcu_gp() optimization), if BPF program is sleepable. We use this for multi-kprobe and multi-uprobe links, which dereference link during program run. We also preventively switch raw_tp link to use deferred dealloc callback, as upcoming changes in bpf-next tree expose raw_tp link data (specifically, cookie value) to BPF program at runtime as well. Fixes: 0dcac2725406 ("bpf: Add multi kprobe link") Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link") Reported-by: syzbot+981935d9485a560bfbcb@syzkaller.appspotmail.com Reported-by: syzbot+2cb5a6c573e98db598cc@syzkaller.appspotmail.com Reported-by: syzbot+62d8b26793e8a2bd0516@syzkaller.appspotmail.com Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20240328052426.3042617-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 16 +++++++++++++++- kernel/bpf/syscall.c | 35 ++++++++++++++++++++++++++++++++--- kernel/trace/bpf_trace.c | 4 ++-- 3 files changed, 49 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4f20f62f9d63..890e152d553e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1574,12 +1574,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae2ff73bde7e..c287925471f6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link) atomic64_inc(&link->refcnt); } +static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) +{ + struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); + + /* free bpf_link and its containing memory */ + link->ops->dealloc_deferred(link); +} + +static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_link_defer_dealloc_rcu_gp(rcu); + else + call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); +} + /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + bool sleepable = false; + bpf_link_free_id(link->id); if (link->prog) { + sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ link->ops->release(link); bpf_prog_put(link->prog); } - /* free bpf_link and its containing memory */ - link->ops->dealloc(link); + if (link->ops->dealloc_deferred) { + /* schedule BPF link deallocation; if underlying BPF program + * is sleepable, we need to first wait for RCU tasks trace + * sync, then go through "classic" RCU grace period + */ + if (sleepable) + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + else + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); + } + if (link->ops->dealloc) + link->ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) @@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, - .dealloc = bpf_raw_tp_link_dealloc, + .dealloc_deferred = bpf_raw_tp_link_dealloc, .show_fdinfo = bpf_raw_tp_link_show_fdinfo, .fill_link_info = bpf_raw_tp_link_fill_link_info, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0b73fe5f7206..9dc605f08a23 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, - .dealloc = bpf_kprobe_multi_link_dealloc, + .dealloc_deferred = bpf_kprobe_multi_link_dealloc, .fill_link_info = bpf_kprobe_multi_link_fill_link_info, }; @@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, - .dealloc = bpf_uprobe_multi_link_dealloc, + .dealloc_deferred = bpf_uprobe_multi_link_dealloc, .fill_link_info = bpf_uprobe_multi_link_fill_link_info, }; -- cgit v1.2.3 From 831ec5e3538e989c7995137b5c5c661991a09504 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 28 Mar 2024 23:47:37 +0100 Subject: ASoC: tas2781: mark dvc_tlv with __maybe_unused Since we put dvc_tlv static variable to a header file it's copied to each module that includes the header. But not all of them are actually used it. Fix this W=1 build warning: include/sound/tas2781-tlv.h:18:35: warning: 'dvc_tlv' defined but not used [-Wunused-const-variable=] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403290354.v0StnRpc-lkp@intel.com/ Fixes: ae065d0ce9e3 ("ALSA: hda/tas2781: remove digital gain kcontrol") Signed-off-by: Gergo Koteles Message-ID: <0e461545a2a6e9b6152985143e50526322e5f76b.1711665731.git.soyer@irl.hu> Signed-off-by: Takashi Iwai --- include/sound/tas2781-tlv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 4038dd421150..1dc59005d241 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); #endif -- cgit v1.2.3 From 3d010c8031e39f5fa1e8b13ada77e0321091011f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:33:58 +0100 Subject: udp: do not accept non-tunnel GSO skbs landing in a tunnel When rx-udp-gro-forwarding is enabled UDP packets might be GROed when being forwarded. If such packets might land in a tunnel this can cause various issues and udp_gro_receive makes sure this isn't the case by looking for a matching socket. This is performed in udp4/6_gro_lookup_skb but only in the current netns. This is an issue with tunneled packets when the endpoint is in another netns. In such cases the packets will be GROed at the UDP level, which leads to various issues later on. The same thing can happen with rx-gro-list. We saw this with geneve packets being GROed at the UDP level. In such case gso_size is set; later the packet goes through the geneve rx path, the geneve header is pulled, the offset are adjusted and frag_list skbs are not adjusted with regard to geneve. When those skbs hit skb_fragment, it will misbehave. Different outcomes are possible depending on what the GROed skbs look like; from corrupted packets to kernel crashes. One example is a BUG_ON[1] triggered in skb_segment while processing the frag_list. Because gso_size is wrong (geneve header was pulled) skb_segment thinks there is "geneve header size" of data in frag_list, although it's in fact the next packet. The BUG_ON itself has nothing to do with the issue. This is only one of the potential issues. Looking up for a matching socket in udp_gro_receive is fragile: the lookup could be extended to all netns (not speaking about performances) but nothing prevents those packets from being modified in between and we could still not find a matching socket. It's OK to keep the current logic there as it should cover most cases but we also need to make sure we handle tunnel packets being GROed too early. This is done by extending the checks in udp_unexpected_gso: GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits and landing in a tunnel must be segmented. [1] kernel BUG at net/core/skbuff.c:4408! RIP: 0010:skb_segment+0xd2a/0xf70 __udp_gso_segment+0xaa/0x560 Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/udp.h | 28 ++++++++++++++++++++++++++++ net/ipv4/udp.c | 7 +++++++ net/ipv4/udp_offload.c | 6 ++++-- net/ipv6/udp.c | 2 +- 4 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 3748e82b627b..17539d089666 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 661d0e0d273f..c02bf011d4a6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); +EXPORT_SYMBOL(udp_encap_needed_key); + +#if IS_ENABLED(CONFIG_IPV6) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +EXPORT_SYMBOL(udpv6_encap_needed_key); +#endif + void udp_encap_enable(void) { static_branch_inc(&udp_encap_needed_key); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b9880743765c..e9719afe91cf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -551,8 +551,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - /* we can do L4 aggregation only if the packet can't land in a tunnel - * otherwise we could corrupt the inner stream + /* We can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream. Detecting such packets + * cannot be foolproof and the aggregation might still happen in some + * cases. Such packets should be caught in udp_unexpected_gso later. */ NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c1e6469d091..8b1dd7f51249 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -447,7 +447,7 @@ csum_copy_err: goto try_again; } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void) { static_branch_inc(&udpv6_encap_needed_key); -- cgit v1.2.3 From 39646f29b100566451d37abc4cc8cdd583756dfe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 20 Mar 2024 08:55:53 +0100 Subject: Bluetooth: add quirk for broken address properties Some Bluetooth controllers lack persistent storage for the device address and instead one can be provided by the boot firmware using the 'local-bd-address' devicetree property. The Bluetooth devicetree bindings clearly states that the address should be specified in little-endian order, but due to a long-standing bug in the Qualcomm driver which reversed the address some boot firmware has been providing the address in big-endian order instead. Add a new quirk that can be set on platforms with broken firmware and use it to reverse the address when parsing the property so that the underlying driver bug can be fixed. Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address") Cc: stable@vger.kernel.org # 5.1 Reviewed-by: Douglas Anderson Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 9 +++++++++ net/bluetooth/hci_sync.c | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8701ca5f31ee..5c12761cbc0e 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -176,6 +176,15 @@ enum { */ HCI_QUIRK_USE_BDADDR_PROPERTY, + /* When this quirk is set, the Bluetooth Device Address provided by + * the 'local-bd-address' fwnode property is incorrectly specified in + * big-endian order. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BDADDR_PROPERTY_BROKEN, + /* When this quirk is set, the duplicate filtering during * scanning is based on Bluetooth devices addresses. To allow * RSSI based updates, restart scanning if needed. diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index f6b662369322..639090b9f4b8 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3416,7 +3416,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; - bacpy(&hdev->public_addr, &ba); + if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) + baswap(&hdev->public_addr, &ba); + else + bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { -- cgit v1.2.3 From aa7cbefe65e455178c33eca308349e687d262ea7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 31 Mar 2024 10:26:47 -0700 Subject: time/timecounter: Fix inline documentation Fix kernel-doc warnings, text punctuation, and a kernel-doc marker (change '%' to '&' to indicate a struct): timecounter.h:72: warning: No description found for return value of 'cyclecounter_cyc2ns' timecounter.h:85: warning: Function parameter or member 'tc' not described in 'timecounter_adjtime' timecounter.h:111: warning: No description found for return value of 'timecounter_read' timecounter.h:128: warning: No description found for return value of 'timecounter_cyc2time' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240331172652.14086-2-rdunlap@infradead.org --- include/linux/timecounter.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index c6540ceea143..0982d1d52b24 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -22,7 +22,7 @@ * * @read: returns the current cycle value * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, + * subtraction of non-64-bit counters, * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) @@ -35,7 +35,7 @@ struct cyclecounter { }; /** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds * Contains the state needed by timecounter_read() to detect * cycle counter wrap around. Initialize with * timecounter_init(). Also used to convert cycle counts into the @@ -66,6 +66,8 @@ struct timecounter { * @cycles: Cycles * @mask: bit mask for maintaining the 'frac' field * @frac: pointer to storage for the fractional nanoseconds. + * + * Returns: cycle counter cycles converted to nanoseconds */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, u64 cycles, u64 mask, u64 *frac) @@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, /** * timecounter_adjtime - Shifts the time of the clock. + * @tc: The &struct timecounter to adjust * @delta: Desired change in nanoseconds. */ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) @@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc, * * In other words, keeps track of time since the same epoch as * the function which generated the initial time stamp. + * + * Returns: nanoseconds since the initial time stamp */ extern u64 timecounter_read(struct timecounter *tc); @@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc); * * This allows conversion of cycle counter values which were generated * in the past. + * + * Returns: cycle counter converted to nanoseconds since the initial time stamp */ extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); -- cgit v1.2.3 From 76f788ee4a7d9f826738a034f9d2ee0bc4cd291b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 31 Mar 2024 10:26:48 -0700 Subject: time/timekeeping: Fix kernel-doc warnings and typos Fix punctuation, spellos, and kernel-doc warnings: timekeeping.h:79: warning: No description found for return value of 'ktime_get_real' timekeeping.h:95: warning: No description found for return value of 'ktime_get_boottime' timekeeping.h:108: warning: No description found for return value of 'ktime_get_clocktai' timekeeping.h:149: warning: Function parameter or struct member 'mono' not described in 'ktime_mono_to_real' timekeeping.h:149: warning: No description found for return value of 'ktime_mono_to_real' timekeeping.h:255: warning: Function parameter or struct member 'cs_id' not described in 'system_time_snapshot' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240331172652.14086-3-rdunlap@infradead.org --- include/linux/timekeeping.h | 49 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7e50cbd97f86..0ea7823b7f31 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* - * ktime_get() family: read the current time in a multitude of ways, + * ktime_get() family - read the current time in a multitude of ways. * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * - * To get the time in a different format, use the ones wit + * To get the time in a different format, use the ones with * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. @@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format + * + * Returns: real (wall) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { @@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void) } /** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * ktime_get_boottime - Get monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. + * + * Returns: monotonic time since boot in ktime_t format */ static inline ktime_t ktime_get_boottime(void) { @@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void) } /** - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format + * ktime_get_clocktai - Get the TAI time of day in ktime_t format + * + * Returns: the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { @@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void) /** * ktime_mono_to_real - Convert monotonic time to clock realtime + * @mono: monotonic time to convert + * + * Returns: time converted to realtime clock */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } +/** + * ktime_get_ns - Get the current time in nanoseconds + * + * Returns: current time converted to nanoseconds + */ static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } +/** + * ktime_get_real_ns - Get the current real/wall time in nanoseconds + * + * Returns: current real time converted to nanoseconds + */ static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } +/** + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds + * + * Returns: current boottime converted to nanoseconds + */ static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } +/** + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds + * + * Returns: current TAI time converted to nanoseconds + */ static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } +/** + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds + * + * Returns: current raw monotonic time converted to nanoseconds + */ static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); @@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps +/** + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp @@ -242,7 +276,8 @@ struct ktime_timestamps { * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time - * @clock_was_set_seq: The sequence number of clock was set events + * @cs_id: Clocksource ID + * @clock_was_set_seq: The sequence number of clock-was-set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { -- cgit v1.2.3 From b87752ef5cc15b0bae04583d599e873d92dc0618 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 31 Mar 2024 10:26:49 -0700 Subject: timers: Fix kernel-doc format and add Return values Fix kernel-doc format and warnings: timer.h:26: warning: Cannot understand * @TIMER_DEFERRABLE: A deferrable timer will work normally when the on line 26 - I thought it was a doc line timer.h:146: warning: No description found for return value of 'timer_pending' timer.h:180: warning: No description found for return value of 'del_timer_sync' timer.h:193: warning: No description found for return value of 'del_timer' Signed-off-by: Randy Dunlap Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240331172652.14086-4-rdunlap@infradead.org --- include/linux/timer.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 14a633ba61d6..e67ecd1cbc97 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -22,7 +22,7 @@ #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/** +/* * @TIMER_DEFERRABLE: A deferrable timer will work normally when the * system is busy, but will not cause a CPU to come out of idle just * to service it; instead, the timer will be serviced when the CPU @@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } * or not. Callers must ensure serialization wrt. other operations done * to this timer, eg. interrupt contexts, or other CPUs on SMP. * - * return value: 1 if the timer is pending, 0 if not. + * Returns: 1 if the timer is pending, 0 if not. */ static inline int timer_pending(const struct timer_list * timer) { @@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer); * See timer_delete_sync() for detailed explanation. * * Do not use in new code. Use timer_delete_sync() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer_sync(struct timer_list *timer) { @@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer) * See timer_delete() for detailed explanation. * * Do not use in new code. Use timer_delete() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer(struct timer_list *timer) { -- cgit v1.2.3 From 09ab7eff38202159271534d2f5ad45526168f2a5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Mar 2024 10:45:07 -0600 Subject: io_uring/kbuf: get rid of lower BGID lists Just rely on the xarray for any kind of bgid. This simplifies things, and it really doesn't bring us much, if anything. Cc: stable@vger.kernel.org # v6.4+ Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - io_uring/io_uring.c | 2 -- io_uring/kbuf.c | 70 +++++------------------------------------- 3 files changed, 8 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e24893625085..05df0e399d7c 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -294,7 +294,6 @@ struct io_ring_ctx { struct io_submit_state submit_state; - struct io_buffer_list *io_bl; struct xarray io_bl_xa; struct io_hash_table cancel_table_locked; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d1defb99b89e..bc730f59265f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -351,7 +351,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) err: kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); return NULL; @@ -2932,7 +2931,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_napi_free(ctx); kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); } diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 693c26da4ee1..8bf0121f00af 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -17,8 +17,6 @@ #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) -#define BGID_ARRAY 64 - /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) @@ -40,13 +38,9 @@ struct io_buf_free { int inuse; }; -static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, - struct io_buffer_list *bl, - unsigned int bgid) +static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) { - if (bl && bgid < BGID_ARRAY) - return &bl[bgid]; - return xa_load(&ctx->io_bl_xa, bgid); } @@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, { lockdep_assert_held(&ctx->uring_lock); - return __io_buffer_get_list(ctx, ctx->io_bl, bgid); + return __io_buffer_get_list(ctx, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, @@ -68,10 +62,6 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, */ bl->bgid = bgid; smp_store_release(&bl->is_ready, 1); - - if (bgid < BGID_ARRAY) - return 0; - return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } @@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, return ret; } -static __cold int io_init_bl_list(struct io_ring_ctx *ctx) -{ - struct io_buffer_list *bl; - int i; - - bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); - if (!bl) - return -ENOMEM; - - for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&bl[i].buf_list); - bl[i].bgid = i; - } - - smp_store_release(&ctx->io_bl, bl); - return 0; -} - /* * Mark the given mapped range as free for reuse */ @@ -300,13 +272,6 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) struct list_head *item, *tmp; struct io_buffer *buf; unsigned long index; - int i; - - for (i = 0; i < BGID_ARRAY; i++) { - if (!ctx->io_bl) - break; - __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); - } xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); @@ -489,12 +454,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) io_ring_submit_lock(ctx, issue_flags); - if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { - ret = io_init_bl_list(ctx); - if (ret) - goto err; - } - bl = io_buffer_get_list(ctx, p->bgid); if (unlikely(!bl)) { bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); @@ -507,14 +466,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) if (ret) { /* * Doesn't need rcu free as it was never visible, but - * let's keep it consistent throughout. Also can't - * be a lower indexed array group, as adding one - * where lookup failed cannot happen. + * let's keep it consistent throughout. */ - if (p->bgid >= BGID_ARRAY) - kfree_rcu(bl, rcu); - else - WARN_ON_ONCE(1); + kfree_rcu(bl, rcu); goto err; } } @@ -679,12 +633,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; - if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { - int ret = io_init_bl_list(ctx); - if (ret) - return ret; - } - bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ @@ -734,10 +682,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return -EINVAL; __io_remove_buffers(ctx, bl, -1U); - if (bl->bgid >= BGID_ARRAY) { - xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree_rcu(bl, rcu); - } + xa_erase(&ctx->io_bl_xa, bl->bgid); + kfree_rcu(bl, rcu); return 0; } @@ -771,7 +717,7 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) { struct io_buffer_list *bl; - bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); + bl = __io_buffer_get_list(ctx, bgid); if (!bl || !bl->is_mmap) return NULL; -- cgit v1.2.3 From cffaefd15a8f423cdee5d8eac15d267bc92de314 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 20 Mar 2024 19:02:15 +0100 Subject: vdso: Use CONFIG_PAGE_SHIFT in vdso/datapage.h Both the vdso rework and the CONFIG_PAGE_SHIFT changes were merged during the v6.9 merge window, so it is now possible to use CONFIG_PAGE_SHIFT instead of including asm/page.h in the vdso. This avoids the workaround for arm64 - commit 8b3843ae3634 ("vdso/datapage: Quick fix - use asm/page-def.h for ARM64") and addresses a build warning for powerpc64: In file included from :4: In file included from /home/arnd/arm-soc/arm-soc/lib/vdso/gettimeofday.c:5: In file included from ../include/vdso/datapage.h:25: arch/powerpc/include/asm/page.h:230:9: error: result of comparison of constant 13835058055282163712 with expression of type 'unsigned long' is always true [-Werror,-Wtautological-constant-out-of-range-compare] 230 | return __pa(kaddr) >> PAGE_SHIFT; | ^~~~~~~~~~~ arch/powerpc/include/asm/page.h:217:37: note: expanded from macro '__pa' 217 | VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~ arch/powerpc/include/asm/page.h:202:73: note: expanded from macro 'VIRTUAL_WARN_ON' 202 | #define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x)) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ arch/powerpc/include/asm/bug.h:88:25: note: expanded from macro 'WARN_ON' 88 | int __ret_warn_on = !!(x); \ | ^ Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Acked-by: Michael Ellerman (powerpc) Link: https://lore.kernel.org/r/20240320180228.136371-1-arnd@kernel.org --- arch/powerpc/include/asm/vdso/gettimeofday.h | 3 +-- include/vdso/datapage.h | 8 +------- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index f0a4cf01e85c..78302f6c2580 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include #include #include #include @@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void); static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { - return (void *)vd + PAGE_SIZE; + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); } #endif diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 5d5c0b8efff2..c71ddb6d4691 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -19,12 +19,6 @@ #include #include -#ifdef CONFIG_ARM64 -#include -#else -#include -#endif - #ifdef CONFIG_ARCH_HAS_VDSO_DATA #include #else @@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden */ union vdso_data_store { struct vdso_data data[CS_BASES]; - u8 page[PAGE_SIZE]; + u8 page[1U << CONFIG_PAGE_SHIFT]; }; /* -- cgit v1.2.3 From c0de6ab920aafb56feab56058e46b688e694a246 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 2 Apr 2024 12:48:36 -0700 Subject: net: mana: Fix Rx DMA datasize and skb_over_panic mana_get_rxbuf_cfg() aligns the RX buffer's DMA datasize to be multiple of 64. So a packet slightly bigger than mtu+14, say 1536, can be received and cause skb_over_panic. Sample dmesg: [ 5325.237162] skbuff: skb_over_panic: text:ffffffffc043277a len:1536 put:1536 head:ff1100018b517000 data:ff1100018b517100 tail:0x700 end:0x6ea dev: [ 5325.243689] ------------[ cut here ]------------ [ 5325.245748] kernel BUG at net/core/skbuff.c:192! [ 5325.247838] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 5325.258374] RIP: 0010:skb_panic+0x4f/0x60 [ 5325.302941] Call Trace: [ 5325.304389] [ 5325.315794] ? skb_panic+0x4f/0x60 [ 5325.317457] ? asm_exc_invalid_op+0x1f/0x30 [ 5325.319490] ? skb_panic+0x4f/0x60 [ 5325.321161] skb_put+0x4e/0x50 [ 5325.322670] mana_poll+0x6fa/0xb50 [mana] [ 5325.324578] __napi_poll+0x33/0x1e0 [ 5325.326328] net_rx_action+0x12e/0x280 As discussed internally, this alignment is not necessary. To fix this bug, remove it from the code. So oversized packets will be marked as CQE_RX_TRUNCATED by NIC, and dropped. Cc: stable@vger.kernel.org Fixes: 2fbbd712baf1 ("net: mana: Enable RX path to handle various MTU sizes") Signed-off-by: Haiyang Zhang Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/1712087316-20886-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- include/net/mana/mana.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 59287c6e6cee..d8af5e7e15b4 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; - *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); + *datasize = mtu + ETH_HLEN; } static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 76147feb0d10..4eeedf14711b 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -39,7 +39,6 @@ enum TRI_STATE { #define COMP_ENTRY_SIZE 64 #define RX_BUFFERS_PER_QUEUE 512 -#define MANA_RX_DATA_ALIGN 64 #define MAX_SEND_BUFFERS_PER_QUEUE 256 -- cgit v1.2.3 From bc6f707fc0feec72acc2f49c312eb31d257363a3 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 27 Mar 2024 16:43:16 +0100 Subject: x86/cc: Add cc_platform_set/_clear() helpers Add functionality to set and/or clear different attributes of the machine as a confidential computing platform. Add the first one too: whether the machine is running as a host for SEV-SNP guests. Fixes: 216d106c7ff7 ("x86/sev: Add SEV-SNP host initialization support") Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Tested-by: Srikanth Aithal Link: https://lore.kernel.org/r/20240327154317.29909-5-bp@alien8.de --- arch/x86/coco/core.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cc_platform.h | 12 +++++++++++ 2 files changed, 64 insertions(+) (limited to 'include') diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index ddd4efdc79d6..b31ef2424d19 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -20,6 +20,11 @@ enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; u64 cc_mask __ro_after_init; +static struct cc_attr_flags { + __u64 host_sev_snp : 1, + __resv : 63; +} cc_flags; + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { @@ -93,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_HOST_SEV_SNP: + return cc_flags.host_sev_snp; + default: return false; } @@ -153,6 +161,50 @@ u64 cc_mkdec(u64 val) } EXPORT_SYMBOL_GPL(cc_mkdec); +static void amd_cc_platform_clear(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 0; + break; + default: + break; + } +} + +void cc_platform_clear(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_clear(attr); + break; + default: + break; + } +} + +static void amd_cc_platform_set(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 1; + break; + default: + break; + } +} + +void cc_platform_set(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_set(attr); + break; + default: + break; + } +} + __init void cc_random_init(void) { /* diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index cb0d6cd1c12f..60693a145894 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -90,6 +90,14 @@ enum cc_attr { * Examples include TDX Guest. */ CC_ATTR_HOTPLUG_DISABLED, + + /** + * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. + * + * The host kernel is running with the necessary features + * enabled to run SEV-SNP guests. + */ + CC_ATTR_HOST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -107,10 +115,14 @@ enum cc_attr { * * FALSE - Specified Confidential Computing attribute is not active */ bool cc_platform_has(enum cc_attr attr); +void cc_platform_set(enum cc_attr attr); +void cc_platform_clear(enum cc_attr attr); #else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ static inline bool cc_platform_has(enum cc_attr attr) { return false; } +static inline void cc_platform_set(enum cc_attr attr) { } +static inline void cc_platform_clear(enum cc_attr attr) { } #endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ -- cgit v1.2.3 From 8130b05c559d1aa83d0c8971b422ba0da18ef24a Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 4 Apr 2024 12:42:00 +0200 Subject: PM: EM: fix wrong utilization estimation in em_cpu_energy() Commit 1b600da51073 ("PM: EM: Optimize em_cpu_energy() and remove division") has added back map_util_perf() in em_cpu_energy() computation which has been removed with the rework of scheduler/cpufreq interface. This is wrong because sugov_effective_cpu_perf() already takes care of mapping the utilization to a performance level. Fixes: 1b600da51073 ("PM: EM: Optimize em_cpu_energy() and remove division") Signed-off-by: Vincent Guittot Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 770755df852f..70cd7258cd29 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -245,7 +245,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * max utilization to the allowed CPU capacity before calculating * effective performance. */ - max_util = map_util_perf(max_util); max_util = min(max_util, allowed_cpu_cap); /* -- cgit v1.2.3 From 65291dcfcf8936e1b23cfd7718fdfde7cfaf7706 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 26 Mar 2024 15:32:08 +0100 Subject: mm/secretmem: fix GUP-fast succeeding on secretmem folios folio_is_secretmem() currently relies on secretmem folios being LRU folios, to save some cycles. However, folios might reside in a folio batch without the LRU flag set, or temporarily have their LRU flag cleared. Consequently, the LRU flag is unreliable for this purpose. In particular, this is the case when secretmem_fault() allocates a fresh page and calls filemap_add_folio()->folio_add_lru(). The folio might be added to the per-cpu folio batch and won't get the LRU flag set until the batch was drained using e.g., lru_add_drain(). Consequently, folio_is_secretmem() might not detect secretmem folios and GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel when we would later try reading/writing to the folio, because the folio has been unmapped from the directmap. Fix it by removing that unreliable check. Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: David Hildenbrand Reported-by: xingwei lee Reported-by: yue sun Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ Debugged-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Mike Rapoport (IBM) Cc: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton --- include/linux/secretmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 35f3a4a8ceb1..acf7e1a3f3de 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio) /* * Using folio_mapping() is quite slow because of the actual call * instruction. - * We know that secretmem pages are not compound and LRU so we can + * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (folio_test_large(folio) || !folio_test_lru(folio)) + if (folio_test_large(folio)) return false; mapping = (struct address_space *) -- cgit v1.2.3 From a6c1d9cb9a68bfa4512248419c4f4d880d19fe90 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 1 Apr 2024 17:14:58 -0700 Subject: stackdepot: rename pool_index to pool_index_plus_1 Commit 3ee34eabac2a ("lib/stackdepot: fix first entry having a 0-handle") changed the meaning of the pool_index field to mean "the pool index plus 1". This made the code accessing this field less self-documenting, as well as causing debuggers such as drgn to not be able to easily remain compatible with both old and new kernels, because they typically do that by testing for presence of the new field. Because stackdepot is a debugging tool, we should make sure that it is debugger friendly. Therefore, give the field a different name to improve readability as well as enabling debugger backwards compatibility. This is needed in 6.9, which would otherwise become an odd release with the new semantics and old name so debuggers wouldn't recognize the new semantics there. Fixes: 3ee34eabac2a ("lib/stackdepot: fix first entry having a 0-handle") Link: https://lkml.kernel.org/r/20240402001500.53533-1-pcc@google.com Link: https://linux-review.googlesource.com/id/Ib3e70c36c1d230dd0a118dc22649b33e768b9f88 Signed-off-by: Peter Collingbourne Acked-by: Vlastimil Babka Reviewed-by: Alexander Potapenko Acked-by: Marco Elver Acked-by: Oscar Salvador Cc: Andrey Konovalov Cc: Michal Hocko Cc: Omar Sandoval Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 7 +++---- lib/stackdepot.c | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 3c6caa5abc7c..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -44,10 +44,9 @@ typedef u32 depot_stack_handle_t; union handle_parts { depot_stack_handle_t handle; struct { - /* pool_index is offset by 1 */ - u32 pool_index : DEPOT_POOL_INDEX_BITS; - u32 offset : DEPOT_OFFSET_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; diff --git a/lib/stackdepot.c b/lib/stackdepot.c index af6cc19a2003..68c97387aa54 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -330,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) stack = current_pool + pool_offset; /* Pre-initialize handle once. */ - stack->handle.pool_index = pool_index + 1; + stack->handle.pool_index_plus_1 = pool_index + 1; stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; INIT_LIST_HEAD(&stack->hash_list); @@ -441,7 +441,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; - u32 pool_index = parts.pool_index - 1; + u32 pool_index = parts.pool_index_plus_1 - 1; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; -- cgit v1.2.3 From 9d50f95bc0d5df56f2591b950a251d90bffad094 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 16 Apr 2024 11:09:01 +0100 Subject: gpio: swnode: Add ability to specify native chip selects for SPI SPI devices can specify a cs-gpios property to enumerate their chip selects. Under device tree, a zero entry in this property can be used to specify that a particular chip select is using the SPI controllers native chip select, for example: cs-gpios = <&gpio1 0 0>, <0>; Here, the second chip select is native. However, when using swnodes there is currently no way to specify a native chip select. The proposal here is to register a swnode_gpio_undefined software node, that can be specified to allow the indication of a native chip select. For example: static const struct software_node_ref_args device_cs_refs[] = { { .node = &device_gpiochip_swnode, .nargs = 2, .args = { 0, GPIO_ACTIVE_LOW }, }, { .node = &swnode_gpio_undefined, .nargs = 0, }, }; Register the swnode as the gpiolib is initialised and check in swnode_get_gpio_device() if the returned node matches swnode_gpio_undefined and return -ENOENT, which matches the behaviour of the device tree system when it encounters a 0 phandle. Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20240416100904.3738093-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/gpio/Kconfig | 9 +++++++++ drivers/gpio/gpiolib-swnode.c | 44 +++++++++++++++++++++++++++++++++++++++++++ include/linux/gpio/property.h | 4 ++++ 3 files changed, 57 insertions(+) (limited to 'include') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b50d0b470849..00b5c007a2bb 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -103,6 +103,15 @@ config GPIO_REGMAP select REGMAP tristate +config GPIO_SWNODE_UNDEFINED + bool + help + This adds a special place holder for software nodes to contain an + undefined GPIO reference, this is primarily used by SPI to allow a + list of GPIO chip selects to mark a certain chip select as being + controlled the SPI device's internal chip select mechanism and not + a GPIO. + # put drivers in the right section, in alphabetical order # This symbol is selected by both I2C and SPI expanders diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c index fa52bdb1a29a..cec1ab878af8 100644 --- a/drivers/gpio/gpiolib-swnode.c +++ b/drivers/gpio/gpiolib-swnode.c @@ -4,8 +4,13 @@ * * Copyright 2022 Google LLC */ + +#define pr_fmt(fmt) "gpiolib: swnode: " fmt + #include #include +#include +#include #include #include #include @@ -17,6 +22,8 @@ #include "gpiolib.h" #include "gpiolib-swnode.h" +#define GPIOLIB_SWNODE_UNDEFINED_NAME "swnode-gpio-undefined" + static void swnode_format_propname(const char *con_id, char *propname, size_t max_size) { @@ -40,6 +47,14 @@ static struct gpio_device *swnode_get_gpio_device(struct fwnode_handle *fwnode) if (!gdev_node || !gdev_node->name) return ERR_PTR(-EINVAL); + /* + * Check for a special node that identifies undefined GPIOs, this is + * primarily used as a key for internal chip selects in SPI bindings. + */ + if (IS_ENABLED(CONFIG_GPIO_SWNODE_UNDEFINED) && + !strcmp(gdev_node->name, GPIOLIB_SWNODE_UNDEFINED_NAME)) + return ERR_PTR(-ENOENT); + gdev = gpio_device_find_by_label(gdev_node->name); return gdev ?: ERR_PTR(-EPROBE_DEFER); } @@ -121,3 +136,32 @@ int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id) return count ?: -ENOENT; } + +#if IS_ENABLED(CONFIG_GPIO_SWNODE_UNDEFINED) +/* + * A special node that identifies undefined GPIOs, this is primarily used as + * a key for internal chip selects in SPI bindings. + */ +const struct software_node swnode_gpio_undefined = { + .name = GPIOLIB_SWNODE_UNDEFINED_NAME, +}; +EXPORT_SYMBOL_NS_GPL(swnode_gpio_undefined, GPIO_SWNODE); + +static int __init swnode_gpio_init(void) +{ + int ret; + + ret = software_node_register(&swnode_gpio_undefined); + if (ret < 0) + pr_err("failed to register swnode: %d\n", ret); + + return ret; +} +subsys_initcall(swnode_gpio_init); + +static void __exit swnode_gpio_cleanup(void) +{ + software_node_unregister(&swnode_gpio_undefined); +} +__exitcall(swnode_gpio_cleanup); +#endif diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h index 6c75c8bd44a0..832a60c2e0b9 100644 --- a/include/linux/gpio/property.h +++ b/include/linux/gpio/property.h @@ -5,7 +5,11 @@ #include /* for GPIO_* flags */ #include +struct software_node; + #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ PROPERTY_ENTRY_REF(_name_, _chip_node_, _idx_, _flags_) +extern const struct software_node swnode_gpio_undefined; + #endif /* __LINUX_GPIO_PROPERTY_H */ -- cgit v1.2.3